Pytorch Implementation of Smell Prediction
Pytorch Implementation of Smell Prediction#
import pandas as pd
import numpy as np
from os.path import isfile, join
from os import listdir
from copy import deepcopy
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
Below we hide a bunch of functions for preprocessing the data.
def answer_preprocess_sensor(df_list):
"""
This function is the answer of task 5.
Preprocess sensor data.
Parameters
----------
df_list : list of pandas.DataFrame
A list of data frames that contain sensor data from multiple stations.
Returns
-------
pandas.DataFrame
The preprocessed sensor data.
"""
# Resample all the data frames.
df_resample_list = []
for df in df_list:
# Convert the timestamp to datetime.
df.index = pd.to_datetime(df.index, unit="s", utc=True)
# Resample the timestamps by hour and average all the previous values.
# Because we want data from the past, so label need to be "right".
df_resample_list.append(df.resample("60Min", label="right").mean())
# Merge all data frames.
df = df_resample_list.pop(0)
index_name = df.index.name
while len(df_resample_list) != 0:
# We need to use outer merging since we want to preserve data from both data frames.
df = pd.merge_ordered(df, df_resample_list.pop(0), on=df.index.name, how="outer", fill_method=None)
# Move the datetime column to index
df = df.set_index(index_name)
# Fill in the missing data with value -1.
df = df.fillna(-1)
return df
def answer_preprocess_smell(df):
"""
This function is the answer of task 4.
Preprocess smell data.
Parameters
----------
df : pandas.DataFrame
The raw smell reports data.
Returns
-------
pandas.DataFrame
The preprocessed smell data.
"""
# Copy the dataframe to avoid editing the original one.
df = df.copy(deep=True)
# Drop the columns that we do not need.
df = df.drop(columns=["feelings_symptoms", "smell_description", "zipcode"])
# Select only the reports within the range of 3 and 5.
df = df[(df["smell_value"]>=3)&(df["smell_value"]<=5)]
# Convert the timestamp to datetime.
df.index = pd.to_datetime(df.index, unit="s", utc=True)
# Resample the timestamps by hour and sum up all the future values.
# Because we want data from the future, so label need to be "left".
df = df.resample("60Min", label="left").sum()
# Fill in the missing data with value 0.
df = df.fillna(0)
return df
def answer_sum_current_and_future_data(df, n_hr=0):
"""
This function is the answer of task 6.
Sum up data in the current and future hours.
Parameters
----------
df : pandas.DataFrame
The preprocessed smell data.
n_hr : int
Number of hours that we want to sum up the future smell data.
Returns
-------
pandas.DataFrame
The transformed smell data.
"""
# Copy data frame to prevent editing the original one.
df = df.copy(deep=True)
# Fast return if n_hr is 0
if n_hr == 0: return df
# Sum up all smell_values in future hours.
# The rolling function only works for summing up previous values.
# So we need to shift back to get the value in the future.
# Be careful that we need to add 1 to the rolling window size.
# Becasue window size 1 means only using the current data.
# Parameter "closed" need to be "right" because we want the current data.
df = df.rolling(n_hr+1, min_periods=1, closed="right").sum().shift(-1*n_hr)
# Delete the last n_hr rows.
# These n_hr rows have wrong data due to data shifting.
df = df.iloc[:-1*n_hr]
return df
def insert_previous_data_to_cols(df, n_hr=0):
"""
Insert columns to indicate the data from the previous hours.
Parameters
----------
df : pandas.DataFrame
The preprocessed sensor data.
n_hr : int
Number of hours that we want to insert the previous sensor data.
Returns
-------
pandas.DataFrame
The transformed sensor data.
"""
# Copy data frame to prevent editing the original one.
df = df.copy(deep=True)
# Add the data from the previous hours.
df_all = []
for h in range(1, n_hr + 1):
# Shift the data frame to get previous data.
df_pre = df.shift(h)
# Edit the name to indicate it is previous data.
# The orginal data frame already has data from the previous 1 hour.
# (as indicated in the preprocessing phase of sensor data)
# So we need to add 1 here.
df_pre.columns += "_pre_" + str(h+1) + "h"
# Add the data to an array for merging.
df_all.append(df_pre)
# Rename the columns in the original data frame.
# The orginal data frame already has data from the previous 1 hour.
# (as indicated in the preprocessing phase of sensor data)
df.columns += "_pre_1h"
# Merge all data.
df_merge = df
for d in df_all:
# The join function merges dataframes by index.
df_merge = df_merge.join(d)
# Delete the first n_hr rows.
# These n_hr rows have no data due to data shifting.
df_merge = df_merge.iloc[n_hr:]
return df_merge
def convert_wind_direction(df):
"""
Convert wind directions to sine and cosine components.
Parameters
----------
df : pandas.DataFrame
The data frame that contains the wind direction data.
Returns
-------
pandas.DataFrame
The transformed data frame.
"""
# Copy data frame to prevent editing the original one.
df_cp = df.copy(deep=True)
# Convert columns with wind directions.
for c in df.columns:
if "SONICWD_DEG" in c:
df_c = df[c]
df_c_cos = np.cos(np.deg2rad(df_c))
df_c_sin = np.sin(np.deg2rad(df_c))
df_c_cos.name += "_cosine"
df_c_sin.name += "_sine"
df_cp.drop([c], axis=1, inplace=True)
df_cp[df_c_cos.name] = df_c_cos
df_cp[df_c_sin.name] = df_c_sin
return df_cp
def compute_feature_label(df_smell, df_sensor, b_hr_sensor=0, f_hr_smell=0):
"""
Compute features and labels from the smell and sensor data.
Parameters
----------
df_smell : pandas.DataFrame
The preprocessed smell data.
df_sensor : pandas.DataFrame
The preprocessed sensor data.
b_hr_sensor : int
Number of hours that we want to insert the previous sensor data.
f_hr_smell : int
Number of hours that we want to sum up the future smell data.
Returns
-------
df_x : pandas.DataFrame
The features that we want to use for modeling.
df_y : pandas.DataFrame
The labels that we want to use for modeling.
"""
# Copy data frames to prevent editing the original ones.
df_smell = df_smell.copy(deep=True)
df_sensor = df_sensor.copy(deep=True)
# Replace -1 values in sensor data to NaN
df_sensor[df_sensor==-1] = np.nan
# Convert all wind directions.
df_sensor = convert_wind_direction(df_sensor)
# Scale sensor data and fill in missing values
df_sensor = (df_sensor - df_sensor.mean()) / df_sensor.std()
df_sensor = df_sensor.round(6)
df_sensor = df_sensor.fillna(-1)
# Insert previous sensor data as features.
# Noice that the df_sensor is already using the previous data.
# So b_hr_sensor=0 means using data from the previous 1 hour.
# And b_hr_sensor=n means using data from the previous n+1 hours.
df_sensor = insert_previous_data_to_cols(df_sensor, b_hr_sensor)
# Sum up current and future smell values as label.
# Notice that the df_smell is already the data from the future 1 hour.
# (as indicated in the preprocessing phase of smell data)
# So f_hr_smell=0 means using data from the future 1 hour.
# And f_hr_smell=n means using data from the future n+1 hours.
df_smell = answer_sum_current_and_future_data(df_smell, f_hr_smell)
# Add suffix to the column name of the smell data to prevent confusion.
# See the description above for the reason of adding 1 to the f_hr_smell.
df_smell.columns += "_future_" + str(f_hr_smell+1) + "h"
# We need to first merge these two timestamps based on the available data.
# In this way, we synchronize the time stamps in the sensor and smell data.
# This also means that the sensor and smell data have the same number of data points.
df = pd.merge_ordered(df_sensor.reset_index(), df_smell.reset_index(), on=df_smell.index.name, how="inner", fill_method=None)
# Sanity check: there should be no missing data.
assert df.isna().sum().sum() == 0, "Error! There is missing data."
# Separate features (x) and labels (y).
df_x = df[df_sensor.columns]
df_y = df[df_smell.columns]
# Add the hour of day and the day of week.
dow_radian = df["EpochTime"].dt.dayofweek.copy(deep=True) * 2 * np.pi / 6.0
tod_radian = df["EpochTime"].dt.hour.copy(deep=True) * 2 * np.pi / 23.0
df_x.loc[:,"day_of_week_sine"] = np.sin(dow_radian)
df_x.loc[:,"day_of_week_cosine"] = np.cos(dow_radian)
df_x.loc[:,"hour_of_day_sine"] = np.sin(tod_radian)
df_x.loc[:,"hour_of_day_cosine"] = np.cos(tod_radian)
return df_x, df_y
# Load and preprocess sensor data
path = "smellpgh-v1/esdr_raw"
list_of_files = [f for f in listdir(path) if isfile(join(path, f))]
sensor_raw_list = []
for f in list_of_files:
sensor_raw_list.append(pd.read_csv(join(path, f)).set_index("EpochTime"))
df_sensor = answer_preprocess_sensor(sensor_raw_list)
# Load and preprocess smell data
smell_raw = pd.read_csv("smellpgh-v1/smell_raw.csv").set_index("EpochTime")
df_smell = answer_preprocess_smell(smell_raw)
# Compute features and labels
df_x, df_y = compute_feature_label(df_smell, df_sensor, b_hr_sensor=2, f_hr_smell=7)
/var/folders/xr/ddxdh8x16q53_r8yf2zj9m600000gn/T/ipykernel_13861/4187725043.py:261: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_x.loc[:,"day_of_week_sine"] = np.sin(dow_radian)
df_x
3.feed_1.SO2_PPM_pre_1h | 3.feed_1.H2S_PPM_pre_1h | 3.feed_1.SIGTHETA_DEG_pre_1h | 3.feed_1.SONICWS_MPH_pre_1h | 3.feed_23.CO_PPM_pre_1h | 3.feed_23.PM10_UG_M3_pre_1h | 3.feed_29.PM10_UG_M3_pre_1h | 3.feed_29.PM25_UG_M3_pre_1h | 3.feed_11067.CO_PPB..3.feed_43.CO_PPB_pre_1h | 3.feed_11067.NO2_PPB..3.feed_43.NO2_PPB_pre_1h | ... | 3.feed_28.SONICWD_DEG_cosine_pre_3h | 3.feed_28.SONICWD_DEG_sine_pre_3h | 3.feed_26.SONICWD_DEG_cosine_pre_3h | 3.feed_26.SONICWD_DEG_sine_pre_3h | 3.feed_3.SONICWD_DEG_cosine_pre_3h | 3.feed_3.SONICWD_DEG_sine_pre_3h | day_of_week_sine | day_of_week_cosine | hour_of_day_sine | hour_of_day_cosine | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.273112 | -0.403688 | -1.520058 | -0.599075 | -0.388936 | -0.777225 | -0.406466 | -0.395826 | -0.716551 | -0.585693 | ... | 0.279097 | 1.746934 | -0.383942 | 1.929446 | -0.542867 | 1.331119 | 0.000000 | 1.0 | -2.449294e-16 | 1.000000 |
1 | -0.273112 | -0.403688 | -1.433654 | -0.684709 | -0.388936 | -0.690974 | 0.007500 | -0.305936 | -0.426597 | 0.488014 | ... | 1.089779 | 1.481480 | 0.945548 | 1.350182 | 0.512949 | 1.355712 | 0.866025 | 0.5 | 0.000000e+00 | 1.000000 |
2 | -0.273112 | -0.403688 | 1.142731 | -0.941610 | 0.147335 | -0.173473 | -0.147737 | -0.216045 | -0.444787 | 0.829648 | ... | 0.799733 | 1.640186 | 0.726159 | 1.603583 | 0.537757 | 1.347897 | 0.866025 | 0.5 | 2.697968e-01 | 0.962917 |
3 | -0.273112 | -0.403688 | -0.082623 | -0.941610 | 0.147335 | -0.432224 | -0.302974 | -0.216045 | -0.796641 | 0.081306 | ... | 0.960380 | 1.562966 | 1.185067 | 0.816616 | 0.512949 | 1.355712 | 0.866025 | 0.5 | 5.195840e-01 | 0.854419 |
4 | -0.273112 | -0.403688 | 1.527618 | -0.984426 | 0.147335 | -0.259723 | -0.458211 | -0.485717 | -0.762976 | -0.504352 | ... | 1.623480 | 0.780539 | 1.225168 | 0.602477 | 0.659294 | 1.303117 | 0.866025 | 0.5 | 7.308360e-01 | 0.682553 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
16746 | -0.273112 | -0.403688 | 0.011635 | -0.128090 | -0.925207 | -0.604724 | -0.561703 | -0.575607 | 0.529598 | -0.748376 | ... | 1.707841 | 0.380565 | 1.119099 | -0.254164 | 1.210281 | -0.738344 | -0.866025 | 0.5 | -9.976688e-01 | -0.068242 |
16747 | -0.273112 | -0.403688 | 0.443651 | 0.000361 | -0.925207 | -0.690974 | -0.406466 | -0.665498 | 0.662087 | -0.292864 | ... | 1.693445 | 0.048275 | 1.098706 | -0.303805 | 1.327922 | -0.583204 | -0.866025 | 0.5 | -9.790841e-01 | 0.203456 |
16748 | -0.273112 | -0.403688 | 0.443651 | -0.256540 | -0.925207 | -0.604724 | -0.458211 | -0.575607 | 0.181817 | -0.862254 | ... | 1.489886 | -0.500401 | 0.609087 | -0.931955 | 0.798657 | -1.062282 | -0.866025 | 0.5 | -8.878852e-01 | 0.460065 |
16749 | -0.273112 | -0.403688 | 0.270844 | -0.085273 | -0.925207 | -0.518474 | -0.302974 | -0.575607 | 0.856204 | -0.439279 | ... | 1.402368 | -0.626362 | 0.237194 | -1.124325 | 0.706601 | -1.107672 | -0.866025 | 0.5 | -7.308360e-01 | 0.682553 |
16750 | -0.273112 | -0.403688 | -0.341833 | 0.085995 | -0.925207 | -0.432224 | -0.406466 | -0.395826 | 0.798647 | -0.309133 | ... | 0.581575 | -1.153330 | -0.684058 | -1.060369 | -0.161757 | -1.243870 | -0.866025 | 0.5 | -5.195840e-01 | 0.854419 |
16751 rows × 148 columns
df_y
smell_value_future_8h | |
---|---|
0 | 8.0 |
1 | 5.0 |
2 | 5.0 |
3 | 5.0 |
4 | 5.0 |
... | ... |
16746 | 6.0 |
16747 | 6.0 |
16748 | 6.0 |
16749 | 3.0 |
16750 | 11.0 |
16751 rows × 1 columns
# Set random seed for reproducibility
torch.manual_seed(42)
# Load data
feature = df_x[df_x.columns].to_numpy()
label = (df_y>=40).astype(int)['smell_value_future_8h'].to_numpy()
# Create the dataset object
class SmellPittsburghDataset(Dataset):
def __init__(self, feature=None, label=None):
self.feature = feature
self.label = label
def __len__(self):
return len(self.feature)
def __getitem__(self, idx):
x = self.feature[idx]
y = self.label[idx]
x = torch.from_numpy(x).float()
y = torch.from_numpy(np.array([y])).float()
return x, y
def scorer(y_predict, y):
"""
A customized scoring function to evaluate a PyTorch classifier.
Parameters
----------
y_predict : torch.Tensor
The predicted labels.
y : torch.Tensor
The true labels.
Returns
-------
dict of int or float
A dictionary of evaluation metrics.
"""
c = confusion_matrix(y, y_predict, labels=[0,1])
return {"tn": c[0,0], "fp": c[0,1], "fn": c[1,0], "tp": c[1,1]}
def train(model, criterion, optimizer, dataloader_train, dataloader_test, num_epochs=30):
"""Train the model."""
def run_one_epoch(dataloader, phase="train"):
if phase == "train": model.train() # training mode
else: model.eval() # evaluation mode
c = 0 # just a counter
accu_loss = 0 # accumulated loss
accu_score = None # accumulated scores
# Loop the data
for x, y in dataloader:
c += 1 # increase the counter
y_pred = model(x)
loss = criterion(y_pred, y)
if phase == "train":
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Store statistics for the training set
accu_loss += loss # add up the loss
y_label = (y_pred > 0.5).float()
score = scorer(y_label, y)
if accu_score is None:
accu_score = score
else:
for k in score:
accu_score[k] += score[k]
# Return statistics
return accu_loss/c, accu_score
def compute_statistics(score):
tp_fp = score["tp"] + score["fp"]
if tp_fp == 0:
precision = 0
else:
precision = round(score["tp"]/tp_fp, 2)
tp_fn = score["tp"] + score["fn"]
if tp_fn == 0:
recall = 0
else:
recall = round(score["tp"]/tp_fn, 2)
tp_tp_fp_fn = tp_fp + tp_fn
if tp_tp_fp_fn == 0:
f1 = 0
else:
f1 = round(2*score["tp"]/tp_tp_fp_fn, 2)
return precision, recall, f1
# Run one epoch
for epoch in range(num_epochs):
# Run through the entire training set
loss_train, score_train = run_one_epoch(dataloader_train, phase="train")
loss_train = torch.round(loss_train, decimals=2)
p_train, r_train, f1_train = compute_statistics(score_train)
# Run through the entire testing set
with torch.no_grad():
loss_test, score_test = run_one_epoch(dataloader_test, phase="test")
loss_test = torch.round(loss_test, decimals=2)
p_test, r_test, f1_test = compute_statistics(score_test)
# Print loss and scores
if ((epoch+1)%30 == 0):
print(f"-"*10)
print(f"Epoch [{epoch+1}/{num_epochs}]")
print(f"Training loss: {loss_train:.4f}, prevision: {p_train:.2f}, recall: {r_train:.2f}, f1: {f1_train:.2f}")
print(f"Training evaluation: {score_train}")
print(f"Testing loss: {loss_test:.4f}, prevision: {p_test:.2f}, recall: {r_test:.2f}, f1: {f1_test:.2f}")
print(f"Testing evaluation: {score_test}")
# Return statistics
return p_test, r_test, f1_test
# Define neural network model
class DeepLogisticRegression(nn.Module):
def __init__(self, input_size, hidden_size=64, output_size=1):
super(DeepLogisticRegression, self).__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.linear2 = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.linear1(x)
out = self.relu(out)
out = self.linear2(out)
out = self.sigmoid(out)
return out
# Create time series splits for cross-validation.
splits = []
dataset_size = df_x.shape[0]
train_size = 8000
test_size = 168
input_size = feature.shape[1]
for i in range(train_size, dataset_size, test_size):
start = i - train_size
end = i + test_size
if (end >= dataset_size): break
train_index = range(start, i)
test_index = range(i, end)
splits.append((list(train_index), list(test_index)))
# Cross-validate the model for every split
precision_list = []
recall_list = []
f1_list = []
for i in range(len(splits)):
print(f"Split: {i}")
dataset_train = SmellPittsburghDataset(feature=feature[splits[i][0]], label=label[splits[i][0]])
dataset_test = SmellPittsburghDataset(feature=feature[splits[i][1]], label=label[splits[i][1]])
dataloader_train = DataLoader(dataset_train, batch_size=1024, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=1024, shuffle=False)
model = DeepLogisticRegression(input_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
p_test, r_test, f1_test = train(model, criterion, optimizer, dataloader_train, dataloader_test)
precision_list.append(p_test)
recall_list.append(r_test)
f1_list.append(f1_test)
print("="*30)
Split: 0
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.98, recall: 0.97, f1: 0.97
Training evaluation: {'tn': 7363, 'fp': 11, 'fn': 21, 'tp': 605}
Testing loss: 0.1800, prevision: 0.83, recall: 0.71, f1: 0.77
Testing evaluation: {'tn': 144, 'fp': 3, 'fn': 6, 'tp': 15}
==============================
Split: 1
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7385, 'fp': 2, 'fn': 7, 'tp': 606}
Testing loss: 0.2900, prevision: 1.00, recall: 0.30, f1: 0.46
Testing evaluation: {'tn': 158, 'fp': 0, 'fn': 7, 'tp': 3}
==============================
Split: 2
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7400, 'fp': 7, 'fn': 15, 'tp': 578}
Testing loss: 1.0500, prevision: 0.78, recall: 0.57, f1: 0.66
Testing evaluation: {'tn': 95, 'fp': 10, 'fn': 27, 'tp': 36}
==============================
Split: 3
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7391, 'fp': 1, 'fn': 8, 'tp': 600}
Testing loss: 0.4800, prevision: 0.70, recall: 0.52, f1: 0.60
Testing evaluation: {'tn': 135, 'fp': 6, 'fn': 13, 'tp': 14}
==============================
Split: 4
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7382, 'fp': 3, 'fn': 12, 'tp': 603}
Testing loss: 0.0600, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 165, 'fp': 3, 'fn': 0, 'tp': 0}
==============================
Split: 5
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7391, 'fp': 2, 'fn': 19, 'tp': 588}
Testing loss: 0.2700, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 165, 'fp': 0, 'fn': 3, 'tp': 0}
==============================
Split: 6
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7388, 'fp': 2, 'fn': 13, 'tp': 597}
Testing loss: 0.0300, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 166, 'fp': 2, 'fn': 0, 'tp': 0}
==============================
Split: 7
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7399, 'fp': 2, 'fn': 11, 'tp': 588}
Testing loss: 0.6800, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 142, 'fp': 17, 'fn': 9, 'tp': 0}
==============================
Split: 8
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7402, 'fp': 0, 'fn': 13, 'tp': 585}
Testing loss: 0.4400, prevision: 0.93, recall: 0.48, f1: 0.64
Testing evaluation: {'tn': 138, 'fp': 1, 'fn': 15, 'tp': 14}
==============================
Split: 9
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7373, 'fp': 0, 'fn': 8, 'tp': 619}
Testing loss: 1.5300, prevision: 0.83, recall: 0.16, f1: 0.27
Testing evaluation: {'tn': 136, 'fp': 1, 'fn': 26, 'tp': 5}
==============================
Split: 10
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7338, 'fp': 4, 'fn': 14, 'tp': 644}
Testing loss: 0.5000, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 8, 'tp': 0}
==============================
Split: 11
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 1.00
Training evaluation: {'tn': 7333, 'fp': 1, 'fn': 4, 'tp': 662}
Testing loss: 0.1800, prevision: 0.53, recall: 0.82, f1: 0.64
Testing evaluation: {'tn': 149, 'fp': 8, 'fn': 2, 'tp': 9}
==============================
Split: 12
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7335, 'fp': 7, 'fn': 22, 'tp': 636}
Testing loss: 0.0100, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 168, 'fp': 0, 'fn': 0, 'tp': 0}
==============================
Split: 13
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7341, 'fp': 1, 'fn': 11, 'tp': 647}
Testing loss: 0.1400, prevision: 0.83, recall: 0.62, f1: 0.71
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 3, 'tp': 5}
==============================
Split: 14
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7332, 'fp': 6, 'fn': 14, 'tp': 648}
Testing loss: 0.8200, prevision: 0.64, recall: 0.32, f1: 0.42
Testing evaluation: {'tn': 142, 'fp': 4, 'fn': 15, 'tp': 7}
==============================
Split: 15
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.97, f1: 0.99
Training evaluation: {'tn': 7329, 'fp': 0, 'fn': 19, 'tp': 652}
Testing loss: 0.3200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 160, 'fp': 1, 'fn': 7, 'tp': 0}
==============================
Split: 16
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.97, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7339, 'fp': 18, 'fn': 34, 'tp': 609}
Testing loss: 0.6100, prevision: 0.57, recall: 0.42, f1: 0.48
Testing evaluation: {'tn': 127, 'fp': 10, 'fn': 18, 'tp': 13}
==============================
Split: 17
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7347, 'fp': 1, 'fn': 16, 'tp': 636}
Testing loss: 0.4400, prevision: 0.20, recall: 0.12, f1: 0.15
Testing evaluation: {'tn': 156, 'fp': 4, 'fn': 7, 'tp': 1}
==============================
Split: 18
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7347, 'fp': 10, 'fn': 26, 'tp': 617}
Testing loss: 0.4500, prevision: 0.11, recall: 0.06, f1: 0.08
Testing evaluation: {'tn': 144, 'fp': 8, 'fn': 15, 'tp': 1}
==============================
Split: 19
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.96, f1: 0.98
Training evaluation: {'tn': 7338, 'fp': 3, 'fn': 29, 'tp': 630}
Testing loss: 0.1100, prevision: 0.58, recall: 0.78, f1: 0.67
Testing evaluation: {'tn': 154, 'fp': 5, 'fn': 2, 'tp': 7}
==============================
Split: 20
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7342, 'fp': 0, 'fn': 12, 'tp': 646}
Testing loss: 0.6500, prevision: 0.14, recall: 0.04, f1: 0.06
Testing evaluation: {'tn': 138, 'fp': 6, 'fn': 23, 'tp': 1}
==============================
Split: 21
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.94, f1: 0.96
Training evaluation: {'tn': 7307, 'fp': 11, 'fn': 44, 'tp': 638}
Testing loss: 1.2400, prevision: 1.00, recall: 0.37, f1: 0.54
Testing evaluation: {'tn': 130, 'fp': 0, 'fn': 24, 'tp': 14}
==============================
Split: 22
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7266, 'fp': 14, 'fn': 35, 'tp': 685}
Testing loss: 0.0100, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 167, 'fp': 1, 'fn': 0, 'tp': 0}
==============================
Split: 23
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7283, 'fp': 8, 'fn': 29, 'tp': 680}
Testing loss: 0.1300, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 166, 'fp': 0, 'fn': 2, 'tp': 0}
==============================
Split: 24
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7298, 'fp': 13, 'fn': 48, 'tp': 641}
Testing loss: 0.2200, prevision: 0.20, recall: 0.20, f1: 0.20
Testing evaluation: {'tn': 150, 'fp': 8, 'fn': 8, 'tp': 2}
==============================
Split: 25
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7289, 'fp': 12, 'fn': 31, 'tp': 668}
Testing loss: 0.1500, prevision: 0.50, recall: 0.12, f1: 0.20
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 7, 'tp': 1}
==============================
Split: 26
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7292, 'fp': 16, 'fn': 36, 'tp': 656}
Testing loss: 0.1300, prevision: 0.90, recall: 0.60, f1: 0.72
Testing evaluation: {'tn': 152, 'fp': 1, 'fn': 6, 'tp': 9}
==============================
Split: 27
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7280, 'fp': 13, 'fn': 52, 'tp': 655}
Testing loss: 0.6600, prevision: 0.86, recall: 0.26, f1: 0.40
Testing evaluation: {'tn': 144, 'fp': 1, 'fn': 17, 'tp': 6}
==============================
Split: 28
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7264, 'fp': 13, 'fn': 37, 'tp': 686}
Testing loss: 0.0100, prevision: 1.00, recall: 1.00, f1: 1.00
Testing evaluation: {'tn': 162, 'fp': 0, 'fn': 0, 'tp': 6}
==============================
Split: 29
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7289, 'fp': 0, 'fn': 15, 'tp': 696}
Testing loss: 0.3000, prevision: 0.38, recall: 0.27, f1: 0.32
Testing evaluation: {'tn': 152, 'fp': 5, 'fn': 8, 'tp': 3}
==============================
Split: 30
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7271, 'fp': 7, 'fn': 25, 'tp': 697}
Testing loss: 1.0500, prevision: 0.95, recall: 0.39, f1: 0.55
Testing evaluation: {'tn': 121, 'fp': 1, 'fn': 28, 'tp': 18}
==============================
Split: 31
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7228, 'fp': 10, 'fn': 21, 'tp': 741}
Testing loss: 1.1600, prevision: 0.88, recall: 0.25, f1: 0.39
Testing evaluation: {'tn': 139, 'fp': 1, 'fn': 21, 'tp': 7}
==============================
Split: 32
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7212, 'fp': 6, 'fn': 27, 'tp': 755}
Testing loss: 0.6200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 162, 'fp': 1, 'fn': 5, 'tp': 0}
==============================
Split: 33
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7227, 'fp': 8, 'fn': 18, 'tp': 747}
Testing loss: 0.2100, prevision: 0.47, recall: 0.78, f1: 0.58
Testing evaluation: {'tn': 151, 'fp': 8, 'fn': 2, 'tp': 7}
==============================
Split: 34
----------
Epoch [30/30]
Training loss: 0.0400, prevision: 0.97, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7202, 'fp': 24, 'fn': 53, 'tp': 721}
Testing loss: 0.8100, prevision: 0.25, recall: 0.29, f1: 0.27
Testing evaluation: {'tn': 136, 'fp': 15, 'fn': 12, 'tp': 5}
==============================
Split: 35
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7211, 'fp': 10, 'fn': 25, 'tp': 754}
Testing loss: 0.2900, prevision: 0.54, recall: 0.65, f1: 0.59
Testing evaluation: {'tn': 137, 'fp': 11, 'fn': 7, 'tp': 13}
==============================
Split: 36
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 1.00
Training evaluation: {'tn': 7234, 'fp': 1, 'fn': 6, 'tp': 759}
Testing loss: 0.5700, prevision: 0.67, recall: 0.20, f1: 0.31
Testing evaluation: {'tn': 157, 'fp': 1, 'fn': 8, 'tp': 2}
==============================
Split: 37
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.93, f1: 0.96
Training evaluation: {'tn': 7225, 'fp': 10, 'fn': 54, 'tp': 711}
Testing loss: 0.3100, prevision: 0.42, recall: 0.50, f1: 0.46
Testing evaluation: {'tn': 141, 'fp': 11, 'fn': 8, 'tp': 8}
==============================
Split: 38
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7224, 'fp': 10, 'fn': 18, 'tp': 748}
Testing loss: 0.0700, prevision: 0.90, recall: 0.75, f1: 0.82
Testing evaluation: {'tn': 155, 'fp': 1, 'fn': 3, 'tp': 9}
==============================
Split: 39
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7217, 'fp': 16, 'fn': 28, 'tp': 739}
Testing loss: 0.3900, prevision: 0.26, recall: 0.75, f1: 0.39
Testing evaluation: {'tn': 143, 'fp': 17, 'fn': 2, 'tp': 6}
==============================
Split: 40
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7235, 'fp': 12, 'fn': 24, 'tp': 729}
Testing loss: 0.8100, prevision: 0.91, recall: 0.31, f1: 0.47
Testing evaluation: {'tn': 135, 'fp': 1, 'fn': 22, 'tp': 10}
==============================
Split: 41
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7238, 'fp': 9, 'fn': 13, 'tp': 740}
Testing loss: 0.9700, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 132, 'fp': 32, 'fn': 4, 'tp': 0}
==============================
Split: 42
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7270, 'fp': 4, 'fn': 15, 'tp': 711}
Testing loss: 0.6800, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 148, 'fp': 6, 'fn': 14, 'tp': 0}
==============================
Split: 43
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7266, 'fp': 6, 'fn': 19, 'tp': 709}
Testing loss: 1.3900, prevision: 0.67, recall: 0.19, f1: 0.30
Testing evaluation: {'tn': 122, 'fp': 4, 'fn': 34, 'tp': 8}
==============================
Split: 44
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.94, f1: 0.96
Training evaluation: {'tn': 7231, 'fp': 14, 'fn': 49, 'tp': 706}
Testing loss: 0.9600, prevision: 0.08, recall: 0.04, f1: 0.05
Testing evaluation: {'tn': 132, 'fp': 11, 'fn': 24, 'tp': 1}
==============================
Split: 45
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7216, 'fp': 11, 'fn': 28, 'tp': 745}
Testing loss: 0.5800, prevision: 0.30, recall: 0.60, f1: 0.40
Testing evaluation: {'tn': 132, 'fp': 21, 'fn': 6, 'tp': 9}
==============================
Split: 46
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.96, recall: 0.94, f1: 0.95
Training evaluation: {'tn': 7199, 'fp': 27, 'fn': 45, 'tp': 729}
Testing loss: 0.1700, prevision: 0.89, recall: 0.57, f1: 0.70
Testing evaluation: {'tn': 153, 'fp': 1, 'fn': 6, 'tp': 8}
==============================
Split: 47
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7207, 'fp': 15, 'fn': 32, 'tp': 746}
Testing loss: 0.1200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 161, 'fp': 5, 'fn': 2, 'tp': 0}
==============================
Split: 48
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.96, f1: 0.98
Training evaluation: {'tn': 7223, 'fp': 8, 'fn': 27, 'tp': 742}
Testing loss: 0.2100, prevision: 0.82, recall: 0.89, f1: 0.85
Testing evaluation: {'tn': 126, 'fp': 7, 'fn': 4, 'tp': 31}
==============================
Split: 49
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7222, 'fp': 4, 'fn': 40, 'tp': 734}
Testing loss: 0.0200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 167, 'fp': 1, 'fn': 0, 'tp': 0}
==============================
Split: 50
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7231, 'fp': 15, 'fn': 34, 'tp': 720}
Testing loss: 0.2400, prevision: 0.40, recall: 0.89, f1: 0.55
Testing evaluation: {'tn': 147, 'fp': 12, 'fn': 1, 'tp': 8}
==============================
Split: 51
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.93, f1: 0.96
Training evaluation: {'tn': 7292, 'fp': 7, 'fn': 47, 'tp': 654}
Testing loss: 0.0600, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 162, 'fp': 6, 'fn': 0, 'tp': 0}
==============================
# Print the overall performance
print("average precision:", round(np.mean(precision_list), 2))
print("average recall:", round(np.mean(recall_list), 2))
print("average f1-score:", round(np.mean(f1_list), 2))
average precision: 0.44
average recall: 0.32
average f1-score: 0.34