Pytorch Implementation of Smell Prediction#

import pandas as pd
import numpy as np
from os.path import isfile, join
from os import listdir
from copy import deepcopy
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

Below we hide a bunch of functions for preprocessing the data.

def answer_preprocess_sensor(df_list):
    """
    This function is the answer of task 5.
    Preprocess sensor data.
    
    Parameters
    ----------
    df_list : list of pandas.DataFrame
        A list of data frames that contain sensor data from multiple stations.
         
    Returns
    -------
    pandas.DataFrame
        The preprocessed sensor data.
    """
    # Resample all the data frames.
    df_resample_list = []
    for df in df_list:
        # Convert the timestamp to datetime.
        df.index = pd.to_datetime(df.index, unit="s", utc=True)
        # Resample the timestamps by hour and average all the previous values.
        # Because we want data from the past, so label need to be "right".
        df_resample_list.append(df.resample("60Min", label="right").mean())
    
    # Merge all data frames.
    df = df_resample_list.pop(0)
    index_name = df.index.name
    while len(df_resample_list) != 0:
        # We need to use outer merging since we want to preserve data from both data frames.
        df = pd.merge_ordered(df, df_resample_list.pop(0), on=df.index.name, how="outer", fill_method=None)
        # Move the datetime column to index
        df = df.set_index(index_name)

    # Fill in the missing data with value -1.
    df = df.fillna(-1)
    return df


def answer_preprocess_smell(df):
    """
    This function is the answer of task 4.
    Preprocess smell data.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The raw smell reports data.
         
    Returns
    -------
    pandas.DataFrame
        The preprocessed smell data.
    """
    # Copy the dataframe to avoid editing the original one.
    df = df.copy(deep=True)
    
    # Drop the columns that we do not need.
    df = df.drop(columns=["feelings_symptoms", "smell_description", "zipcode"])
    
    # Select only the reports within the range of 3 and 5.
    df = df[(df["smell_value"]>=3)&(df["smell_value"]<=5)]
    
    # Convert the timestamp to datetime.
    df.index = pd.to_datetime(df.index, unit="s", utc=True)

    # Resample the timestamps by hour and sum up all the future values.
    # Because we want data from the future, so label need to be "left".
    df = df.resample("60Min", label="left").sum()
    
    # Fill in the missing data with value 0.
    df = df.fillna(0)
    return df


def answer_sum_current_and_future_data(df, n_hr=0):
    """
    This function is the answer of task 6.
    Sum up data in the current and future hours.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The preprocessed smell data.
    n_hr : int
         Number of hours that we want to sum up the future smell data.
         
    Returns
    -------
    pandas.DataFrame
        The transformed smell data.
    """
    # Copy data frame to prevent editing the original one.
    df = df.copy(deep=True)
    
    # Fast return if n_hr is 0
    if n_hr == 0: return df
    
    # Sum up all smell_values in future hours.
    # The rolling function only works for summing up previous values.
    # So we need to shift back to get the value in the future.
    # Be careful that we need to add 1 to the rolling window size.
    # Becasue window size 1 means only using the current data.
    # Parameter "closed" need to be "right" because we want the current data.
    df = df.rolling(n_hr+1, min_periods=1, closed="right").sum().shift(-1*n_hr)
    
    # Delete the last n_hr rows.
    # These n_hr rows have wrong data due to data shifting.
    df = df.iloc[:-1*n_hr]
    return df


def insert_previous_data_to_cols(df, n_hr=0):
    """
    Insert columns to indicate the data from the previous hours.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The preprocessed sensor data.
    n_hr : int
        Number of hours that we want to insert the previous sensor data.
         
    Returns
    -------
    pandas.DataFrame
        The transformed sensor data.
    """
    # Copy data frame to prevent editing the original one.
    df = df.copy(deep=True)

    # Add the data from the previous hours.
    df_all = []
    for h in range(1, n_hr + 1):
        # Shift the data frame to get previous data.
        df_pre = df.shift(h)
        # Edit the name to indicate it is previous data.
        # The orginal data frame already has data from the previous 1 hour.
        # (as indicated in the preprocessing phase of sensor data)
        # So we need to add 1 here.
        df_pre.columns += "_pre_" + str(h+1) + "h"
        # Add the data to an array for merging.
        df_all.append(df_pre)

    # Rename the columns in the original data frame.
    # The orginal data frame already has data from the previous 1 hour.
    # (as indicated in the preprocessing phase of sensor data)
    df.columns += "_pre_1h"

    # Merge all data.
    df_merge = df
    for d in df_all:
        # The join function merges dataframes by index.
        df_merge = df_merge.join(d)
        
    # Delete the first n_hr rows.
    # These n_hr rows have no data due to data shifting.
    df_merge = df_merge.iloc[n_hr:]
    return df_merge


def convert_wind_direction(df):
    """
    Convert wind directions to sine and cosine components.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The data frame that contains the wind direction data.
         
    Returns
    -------
    pandas.DataFrame
        The transformed data frame.
    """
    # Copy data frame to prevent editing the original one.
    df_cp = df.copy(deep=True)
    
    # Convert columns with wind directions.
    for c in df.columns:
        if "SONICWD_DEG" in c:
            df_c = df[c]
            df_c_cos = np.cos(np.deg2rad(df_c))
            df_c_sin = np.sin(np.deg2rad(df_c))
            df_c_cos.name += "_cosine"
            df_c_sin.name += "_sine"
            df_cp.drop([c], axis=1, inplace=True)
            df_cp[df_c_cos.name] = df_c_cos
            df_cp[df_c_sin.name] = df_c_sin
    return df_cp


def compute_feature_label(df_smell, df_sensor, b_hr_sensor=0, f_hr_smell=0):
    """
    Compute features and labels from the smell and sensor data.
    
    Parameters
    ----------
    df_smell : pandas.DataFrame
        The preprocessed smell data.
    df_sensor : pandas.DataFrame
        The preprocessed sensor data.
    b_hr_sensor : int
        Number of hours that we want to insert the previous sensor data.
    f_hr_smell : int
        Number of hours that we want to sum up the future smell data.
    
    Returns
    -------
    df_x : pandas.DataFrame
        The features that we want to use for modeling.
    df_y : pandas.DataFrame
        The labels that we want to use for modeling.
    """
    # Copy data frames to prevent editing the original ones.
    df_smell = df_smell.copy(deep=True)
    df_sensor = df_sensor.copy(deep=True)
    
    # Replace -1 values in sensor data to NaN
    df_sensor[df_sensor==-1] = np.nan
    
    # Convert all wind directions.
    df_sensor = convert_wind_direction(df_sensor)
    
    # Scale sensor data and fill in missing values
    df_sensor = (df_sensor - df_sensor.mean()) / df_sensor.std()
    df_sensor = df_sensor.round(6)
    df_sensor = df_sensor.fillna(-1)
    
    # Insert previous sensor data as features.
    # Noice that the df_sensor is already using the previous data.
    # So b_hr_sensor=0 means using data from the previous 1 hour.
    # And b_hr_sensor=n means using data from the previous n+1 hours.
    df_sensor = insert_previous_data_to_cols(df_sensor, b_hr_sensor)
    
    # Sum up current and future smell values as label.
    # Notice that the df_smell is already the data from the future 1 hour.
    # (as indicated in the preprocessing phase of smell data)
    # So f_hr_smell=0 means using data from the future 1 hour.
    # And f_hr_smell=n means using data from the future n+1 hours.
    df_smell = answer_sum_current_and_future_data(df_smell, f_hr_smell)
    
    # Add suffix to the column name of the smell data to prevent confusion.
    # See the description above for the reason of adding 1 to the f_hr_smell.
    df_smell.columns += "_future_" + str(f_hr_smell+1) + "h"
    
    # We need to first merge these two timestamps based on the available data.
    # In this way, we synchronize the time stamps in the sensor and smell data.
    # This also means that the sensor and smell data have the same number of data points.
    df = pd.merge_ordered(df_sensor.reset_index(), df_smell.reset_index(), on=df_smell.index.name, how="inner", fill_method=None)
    
    # Sanity check: there should be no missing data.
    assert df.isna().sum().sum() == 0, "Error! There is missing data."
    
    # Separate features (x) and labels (y).
    df_x = df[df_sensor.columns]
    df_y = df[df_smell.columns]
    
    # Add the hour of day and the day of week.
    dow_radian = df["EpochTime"].dt.dayofweek.copy(deep=True) * 2 * np.pi / 6.0
    tod_radian = df["EpochTime"].dt.hour.copy(deep=True) * 2 * np.pi / 23.0
    df_x.loc[:,"day_of_week_sine"] = np.sin(dow_radian)
    df_x.loc[:,"day_of_week_cosine"] = np.cos(dow_radian)
    df_x.loc[:,"hour_of_day_sine"] = np.sin(tod_radian)
    df_x.loc[:,"hour_of_day_cosine"] = np.cos(tod_radian)
    return df_x, df_y
# Load and preprocess sensor data
path = "smellpgh-v1/esdr_raw"
list_of_files = [f for f in listdir(path) if isfile(join(path, f))]
sensor_raw_list = []
for f in list_of_files:
    sensor_raw_list.append(pd.read_csv(join(path, f)).set_index("EpochTime"))
df_sensor = answer_preprocess_sensor(sensor_raw_list)

# Load and preprocess smell data
smell_raw = pd.read_csv("smellpgh-v1/smell_raw.csv").set_index("EpochTime")
df_smell = answer_preprocess_smell(smell_raw)

# Compute features and labels
df_x, df_y = compute_feature_label(df_smell, df_sensor, b_hr_sensor=2, f_hr_smell=7)
/var/folders/xr/ddxdh8x16q53_r8yf2zj9m600000gn/T/ipykernel_13861/4187725043.py:261: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_x.loc[:,"day_of_week_sine"] = np.sin(dow_radian)
df_x
3.feed_1.SO2_PPM_pre_1h 3.feed_1.H2S_PPM_pre_1h 3.feed_1.SIGTHETA_DEG_pre_1h 3.feed_1.SONICWS_MPH_pre_1h 3.feed_23.CO_PPM_pre_1h 3.feed_23.PM10_UG_M3_pre_1h 3.feed_29.PM10_UG_M3_pre_1h 3.feed_29.PM25_UG_M3_pre_1h 3.feed_11067.CO_PPB..3.feed_43.CO_PPB_pre_1h 3.feed_11067.NO2_PPB..3.feed_43.NO2_PPB_pre_1h ... 3.feed_28.SONICWD_DEG_cosine_pre_3h 3.feed_28.SONICWD_DEG_sine_pre_3h 3.feed_26.SONICWD_DEG_cosine_pre_3h 3.feed_26.SONICWD_DEG_sine_pre_3h 3.feed_3.SONICWD_DEG_cosine_pre_3h 3.feed_3.SONICWD_DEG_sine_pre_3h day_of_week_sine day_of_week_cosine hour_of_day_sine hour_of_day_cosine
0 -0.273112 -0.403688 -1.520058 -0.599075 -0.388936 -0.777225 -0.406466 -0.395826 -0.716551 -0.585693 ... 0.279097 1.746934 -0.383942 1.929446 -0.542867 1.331119 0.000000 1.0 -2.449294e-16 1.000000
1 -0.273112 -0.403688 -1.433654 -0.684709 -0.388936 -0.690974 0.007500 -0.305936 -0.426597 0.488014 ... 1.089779 1.481480 0.945548 1.350182 0.512949 1.355712 0.866025 0.5 0.000000e+00 1.000000
2 -0.273112 -0.403688 1.142731 -0.941610 0.147335 -0.173473 -0.147737 -0.216045 -0.444787 0.829648 ... 0.799733 1.640186 0.726159 1.603583 0.537757 1.347897 0.866025 0.5 2.697968e-01 0.962917
3 -0.273112 -0.403688 -0.082623 -0.941610 0.147335 -0.432224 -0.302974 -0.216045 -0.796641 0.081306 ... 0.960380 1.562966 1.185067 0.816616 0.512949 1.355712 0.866025 0.5 5.195840e-01 0.854419
4 -0.273112 -0.403688 1.527618 -0.984426 0.147335 -0.259723 -0.458211 -0.485717 -0.762976 -0.504352 ... 1.623480 0.780539 1.225168 0.602477 0.659294 1.303117 0.866025 0.5 7.308360e-01 0.682553
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16746 -0.273112 -0.403688 0.011635 -0.128090 -0.925207 -0.604724 -0.561703 -0.575607 0.529598 -0.748376 ... 1.707841 0.380565 1.119099 -0.254164 1.210281 -0.738344 -0.866025 0.5 -9.976688e-01 -0.068242
16747 -0.273112 -0.403688 0.443651 0.000361 -0.925207 -0.690974 -0.406466 -0.665498 0.662087 -0.292864 ... 1.693445 0.048275 1.098706 -0.303805 1.327922 -0.583204 -0.866025 0.5 -9.790841e-01 0.203456
16748 -0.273112 -0.403688 0.443651 -0.256540 -0.925207 -0.604724 -0.458211 -0.575607 0.181817 -0.862254 ... 1.489886 -0.500401 0.609087 -0.931955 0.798657 -1.062282 -0.866025 0.5 -8.878852e-01 0.460065
16749 -0.273112 -0.403688 0.270844 -0.085273 -0.925207 -0.518474 -0.302974 -0.575607 0.856204 -0.439279 ... 1.402368 -0.626362 0.237194 -1.124325 0.706601 -1.107672 -0.866025 0.5 -7.308360e-01 0.682553
16750 -0.273112 -0.403688 -0.341833 0.085995 -0.925207 -0.432224 -0.406466 -0.395826 0.798647 -0.309133 ... 0.581575 -1.153330 -0.684058 -1.060369 -0.161757 -1.243870 -0.866025 0.5 -5.195840e-01 0.854419

16751 rows × 148 columns

df_y
smell_value_future_8h
0 8.0
1 5.0
2 5.0
3 5.0
4 5.0
... ...
16746 6.0
16747 6.0
16748 6.0
16749 3.0
16750 11.0

16751 rows × 1 columns

# Set random seed for reproducibility
torch.manual_seed(42)

# Load data
feature = df_x[df_x.columns].to_numpy()
label = (df_y>=40).astype(int)['smell_value_future_8h'].to_numpy()

# Create the dataset object
class SmellPittsburghDataset(Dataset):
    def __init__(self, feature=None, label=None):
        self.feature = feature
        self.label = label

    def __len__(self):
        return len(self.feature)

    def __getitem__(self, idx):
        x = self.feature[idx]
        y = self.label[idx]
        x = torch.from_numpy(x).float()
        y = torch.from_numpy(np.array([y])).float()
        return x, y
def scorer(y_predict, y):
    """
    A customized scoring function to evaluate a PyTorch classifier.
    
    Parameters
    ----------
    y_predict : torch.Tensor
        The predicted labels.
    y : torch.Tensor
        The true labels.
    
    Returns
    -------
    dict of int or float
        A dictionary of evaluation metrics.
    """
    c = confusion_matrix(y, y_predict, labels=[0,1])
    return {"tn": c[0,0], "fp": c[0,1], "fn": c[1,0], "tp": c[1,1]}
def train(model, criterion, optimizer, dataloader_train, dataloader_test, num_epochs=30):
    """Train the model."""
    
    def run_one_epoch(dataloader, phase="train"):
        if phase == "train": model.train() # training mode
        else: model.eval() # evaluation mode
        c = 0 # just a counter
        accu_loss = 0 # accumulated loss
        accu_score = None # accumulated scores
        # Loop the data
        for x, y in dataloader:
            c += 1 # increase the counter
            y_pred = model(x)
            loss = criterion(y_pred, y)
            if phase == "train":
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            # Store statistics for the training set
            accu_loss += loss # add up the loss
            y_label = (y_pred > 0.5).float()
            score = scorer(y_label, y)
            if accu_score is None:
                accu_score = score
            else:
                for k in score:
                    accu_score[k] += score[k]
        # Return statistics
        return accu_loss/c, accu_score
    
    def compute_statistics(score):
        tp_fp = score["tp"] + score["fp"]
        if tp_fp == 0:
            precision = 0
        else:
            precision = round(score["tp"]/tp_fp, 2)
        tp_fn = score["tp"] + score["fn"]
        if tp_fn == 0:
            recall = 0
        else:
            recall = round(score["tp"]/tp_fn, 2)
        tp_tp_fp_fn = tp_fp + tp_fn
        if tp_tp_fp_fn == 0:
            f1 = 0
        else:
            f1 = round(2*score["tp"]/tp_tp_fp_fn, 2)
        return precision, recall, f1
    
    # Run one epoch
    for epoch in range(num_epochs):
        # Run through the entire training set
        loss_train, score_train = run_one_epoch(dataloader_train, phase="train")
        loss_train = torch.round(loss_train, decimals=2)
        p_train, r_train, f1_train = compute_statistics(score_train)
        # Run through the entire testing set
        with torch.no_grad():
            loss_test, score_test = run_one_epoch(dataloader_test, phase="test")
        loss_test = torch.round(loss_test, decimals=2)
        p_test, r_test, f1_test = compute_statistics(score_test)
        # Print loss and scores
        if ((epoch+1)%30 == 0):
            print(f"-"*10)
            print(f"Epoch [{epoch+1}/{num_epochs}]")
            print(f"Training loss: {loss_train:.4f}, prevision: {p_train:.2f}, recall: {r_train:.2f}, f1: {f1_train:.2f}")
            print(f"Training evaluation: {score_train}")
            print(f"Testing loss: {loss_test:.4f}, prevision: {p_test:.2f}, recall: {r_test:.2f}, f1: {f1_test:.2f}")
            print(f"Testing evaluation: {score_test}")
    
    # Return statistics
    return p_test, r_test, f1_test
# Define neural network model
class DeepLogisticRegression(nn.Module):
    def __init__(self, input_size, hidden_size=64, output_size=1):
        super(DeepLogisticRegression, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out
# Create time series splits for cross-validation.
splits = []
dataset_size = df_x.shape[0]
train_size = 8000
test_size = 168
input_size = feature.shape[1]
for i in range(train_size, dataset_size, test_size):
    start = i - train_size
    end = i + test_size
    if (end >= dataset_size): break
    train_index = range(start, i)
    test_index = range(i, end)
    splits.append((list(train_index), list(test_index)))
    
# Cross-validate the model for every split
precision_list = []
recall_list = []
f1_list = []
for i in range(len(splits)):
    print(f"Split: {i}")
    dataset_train = SmellPittsburghDataset(feature=feature[splits[i][0]], label=label[splits[i][0]])
    dataset_test = SmellPittsburghDataset(feature=feature[splits[i][1]], label=label[splits[i][1]])
    dataloader_train = DataLoader(dataset_train, batch_size=1024, shuffle=True)
    dataloader_test = DataLoader(dataset_test, batch_size=1024, shuffle=False)
    model = DeepLogisticRegression(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    p_test, r_test, f1_test = train(model, criterion, optimizer, dataloader_train, dataloader_test)
    precision_list.append(p_test)
    recall_list.append(r_test)
    f1_list.append(f1_test)
    print("="*30)
Split: 0
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.98, recall: 0.97, f1: 0.97
Training evaluation: {'tn': 7363, 'fp': 11, 'fn': 21, 'tp': 605}
Testing loss: 0.1800, prevision: 0.83, recall: 0.71, f1: 0.77
Testing evaluation: {'tn': 144, 'fp': 3, 'fn': 6, 'tp': 15}
==============================
Split: 1
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7385, 'fp': 2, 'fn': 7, 'tp': 606}
Testing loss: 0.2900, prevision: 1.00, recall: 0.30, f1: 0.46
Testing evaluation: {'tn': 158, 'fp': 0, 'fn': 7, 'tp': 3}
==============================
Split: 2
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7400, 'fp': 7, 'fn': 15, 'tp': 578}
Testing loss: 1.0500, prevision: 0.78, recall: 0.57, f1: 0.66
Testing evaluation: {'tn': 95, 'fp': 10, 'fn': 27, 'tp': 36}
==============================
Split: 3
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7391, 'fp': 1, 'fn': 8, 'tp': 600}
Testing loss: 0.4800, prevision: 0.70, recall: 0.52, f1: 0.60
Testing evaluation: {'tn': 135, 'fp': 6, 'fn': 13, 'tp': 14}
==============================
Split: 4
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7382, 'fp': 3, 'fn': 12, 'tp': 603}
Testing loss: 0.0600, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 165, 'fp': 3, 'fn': 0, 'tp': 0}
==============================
Split: 5
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7391, 'fp': 2, 'fn': 19, 'tp': 588}
Testing loss: 0.2700, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 165, 'fp': 0, 'fn': 3, 'tp': 0}
==============================
Split: 6
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7388, 'fp': 2, 'fn': 13, 'tp': 597}
Testing loss: 0.0300, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 166, 'fp': 2, 'fn': 0, 'tp': 0}
==============================
Split: 7
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7399, 'fp': 2, 'fn': 11, 'tp': 588}
Testing loss: 0.6800, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 142, 'fp': 17, 'fn': 9, 'tp': 0}
==============================
Split: 8
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7402, 'fp': 0, 'fn': 13, 'tp': 585}
Testing loss: 0.4400, prevision: 0.93, recall: 0.48, f1: 0.64
Testing evaluation: {'tn': 138, 'fp': 1, 'fn': 15, 'tp': 14}
==============================
Split: 9
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7373, 'fp': 0, 'fn': 8, 'tp': 619}
Testing loss: 1.5300, prevision: 0.83, recall: 0.16, f1: 0.27
Testing evaluation: {'tn': 136, 'fp': 1, 'fn': 26, 'tp': 5}
==============================
Split: 10
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7338, 'fp': 4, 'fn': 14, 'tp': 644}
Testing loss: 0.5000, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 8, 'tp': 0}
==============================
Split: 11
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 1.00
Training evaluation: {'tn': 7333, 'fp': 1, 'fn': 4, 'tp': 662}
Testing loss: 0.1800, prevision: 0.53, recall: 0.82, f1: 0.64
Testing evaluation: {'tn': 149, 'fp': 8, 'fn': 2, 'tp': 9}
==============================
Split: 12
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7335, 'fp': 7, 'fn': 22, 'tp': 636}
Testing loss: 0.0100, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 168, 'fp': 0, 'fn': 0, 'tp': 0}
==============================
Split: 13
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7341, 'fp': 1, 'fn': 11, 'tp': 647}
Testing loss: 0.1400, prevision: 0.83, recall: 0.62, f1: 0.71
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 3, 'tp': 5}
==============================
Split: 14
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7332, 'fp': 6, 'fn': 14, 'tp': 648}
Testing loss: 0.8200, prevision: 0.64, recall: 0.32, f1: 0.42
Testing evaluation: {'tn': 142, 'fp': 4, 'fn': 15, 'tp': 7}
==============================
Split: 15
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.97, f1: 0.99
Training evaluation: {'tn': 7329, 'fp': 0, 'fn': 19, 'tp': 652}
Testing loss: 0.3200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 160, 'fp': 1, 'fn': 7, 'tp': 0}
==============================
Split: 16
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.97, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7339, 'fp': 18, 'fn': 34, 'tp': 609}
Testing loss: 0.6100, prevision: 0.57, recall: 0.42, f1: 0.48
Testing evaluation: {'tn': 127, 'fp': 10, 'fn': 18, 'tp': 13}
==============================
Split: 17
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7347, 'fp': 1, 'fn': 16, 'tp': 636}
Testing loss: 0.4400, prevision: 0.20, recall: 0.12, f1: 0.15
Testing evaluation: {'tn': 156, 'fp': 4, 'fn': 7, 'tp': 1}
==============================
Split: 18
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7347, 'fp': 10, 'fn': 26, 'tp': 617}
Testing loss: 0.4500, prevision: 0.11, recall: 0.06, f1: 0.08
Testing evaluation: {'tn': 144, 'fp': 8, 'fn': 15, 'tp': 1}
==============================
Split: 19
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.96, f1: 0.98
Training evaluation: {'tn': 7338, 'fp': 3, 'fn': 29, 'tp': 630}
Testing loss: 0.1100, prevision: 0.58, recall: 0.78, f1: 0.67
Testing evaluation: {'tn': 154, 'fp': 5, 'fn': 2, 'tp': 7}
==============================
Split: 20
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7342, 'fp': 0, 'fn': 12, 'tp': 646}
Testing loss: 0.6500, prevision: 0.14, recall: 0.04, f1: 0.06
Testing evaluation: {'tn': 138, 'fp': 6, 'fn': 23, 'tp': 1}
==============================
Split: 21
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.94, f1: 0.96
Training evaluation: {'tn': 7307, 'fp': 11, 'fn': 44, 'tp': 638}
Testing loss: 1.2400, prevision: 1.00, recall: 0.37, f1: 0.54
Testing evaluation: {'tn': 130, 'fp': 0, 'fn': 24, 'tp': 14}
==============================
Split: 22
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7266, 'fp': 14, 'fn': 35, 'tp': 685}
Testing loss: 0.0100, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 167, 'fp': 1, 'fn': 0, 'tp': 0}
==============================
Split: 23
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7283, 'fp': 8, 'fn': 29, 'tp': 680}
Testing loss: 0.1300, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 166, 'fp': 0, 'fn': 2, 'tp': 0}
==============================
Split: 24
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7298, 'fp': 13, 'fn': 48, 'tp': 641}
Testing loss: 0.2200, prevision: 0.20, recall: 0.20, f1: 0.20
Testing evaluation: {'tn': 150, 'fp': 8, 'fn': 8, 'tp': 2}
==============================
Split: 25
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7289, 'fp': 12, 'fn': 31, 'tp': 668}
Testing loss: 0.1500, prevision: 0.50, recall: 0.12, f1: 0.20
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 7, 'tp': 1}
==============================
Split: 26
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7292, 'fp': 16, 'fn': 36, 'tp': 656}
Testing loss: 0.1300, prevision: 0.90, recall: 0.60, f1: 0.72
Testing evaluation: {'tn': 152, 'fp': 1, 'fn': 6, 'tp': 9}
==============================
Split: 27
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7280, 'fp': 13, 'fn': 52, 'tp': 655}
Testing loss: 0.6600, prevision: 0.86, recall: 0.26, f1: 0.40
Testing evaluation: {'tn': 144, 'fp': 1, 'fn': 17, 'tp': 6}
==============================
Split: 28
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7264, 'fp': 13, 'fn': 37, 'tp': 686}
Testing loss: 0.0100, prevision: 1.00, recall: 1.00, f1: 1.00
Testing evaluation: {'tn': 162, 'fp': 0, 'fn': 0, 'tp': 6}
==============================
Split: 29
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7289, 'fp': 0, 'fn': 15, 'tp': 696}
Testing loss: 0.3000, prevision: 0.38, recall: 0.27, f1: 0.32
Testing evaluation: {'tn': 152, 'fp': 5, 'fn': 8, 'tp': 3}
==============================
Split: 30
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7271, 'fp': 7, 'fn': 25, 'tp': 697}
Testing loss: 1.0500, prevision: 0.95, recall: 0.39, f1: 0.55
Testing evaluation: {'tn': 121, 'fp': 1, 'fn': 28, 'tp': 18}
==============================
Split: 31
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7228, 'fp': 10, 'fn': 21, 'tp': 741}
Testing loss: 1.1600, prevision: 0.88, recall: 0.25, f1: 0.39
Testing evaluation: {'tn': 139, 'fp': 1, 'fn': 21, 'tp': 7}
==============================
Split: 32
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7212, 'fp': 6, 'fn': 27, 'tp': 755}
Testing loss: 0.6200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 162, 'fp': 1, 'fn': 5, 'tp': 0}
==============================
Split: 33
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7227, 'fp': 8, 'fn': 18, 'tp': 747}
Testing loss: 0.2100, prevision: 0.47, recall: 0.78, f1: 0.58
Testing evaluation: {'tn': 151, 'fp': 8, 'fn': 2, 'tp': 7}
==============================
Split: 34
----------
Epoch [30/30]
Training loss: 0.0400, prevision: 0.97, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7202, 'fp': 24, 'fn': 53, 'tp': 721}
Testing loss: 0.8100, prevision: 0.25, recall: 0.29, f1: 0.27
Testing evaluation: {'tn': 136, 'fp': 15, 'fn': 12, 'tp': 5}
==============================
Split: 35
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7211, 'fp': 10, 'fn': 25, 'tp': 754}
Testing loss: 0.2900, prevision: 0.54, recall: 0.65, f1: 0.59
Testing evaluation: {'tn': 137, 'fp': 11, 'fn': 7, 'tp': 13}
==============================
Split: 36
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 1.00
Training evaluation: {'tn': 7234, 'fp': 1, 'fn': 6, 'tp': 759}
Testing loss: 0.5700, prevision: 0.67, recall: 0.20, f1: 0.31
Testing evaluation: {'tn': 157, 'fp': 1, 'fn': 8, 'tp': 2}
==============================
Split: 37
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.93, f1: 0.96
Training evaluation: {'tn': 7225, 'fp': 10, 'fn': 54, 'tp': 711}
Testing loss: 0.3100, prevision: 0.42, recall: 0.50, f1: 0.46
Testing evaluation: {'tn': 141, 'fp': 11, 'fn': 8, 'tp': 8}
==============================
Split: 38
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7224, 'fp': 10, 'fn': 18, 'tp': 748}
Testing loss: 0.0700, prevision: 0.90, recall: 0.75, f1: 0.82
Testing evaluation: {'tn': 155, 'fp': 1, 'fn': 3, 'tp': 9}
==============================
Split: 39
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7217, 'fp': 16, 'fn': 28, 'tp': 739}
Testing loss: 0.3900, prevision: 0.26, recall: 0.75, f1: 0.39
Testing evaluation: {'tn': 143, 'fp': 17, 'fn': 2, 'tp': 6}
==============================
Split: 40
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7235, 'fp': 12, 'fn': 24, 'tp': 729}
Testing loss: 0.8100, prevision: 0.91, recall: 0.31, f1: 0.47
Testing evaluation: {'tn': 135, 'fp': 1, 'fn': 22, 'tp': 10}
==============================
Split: 41
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7238, 'fp': 9, 'fn': 13, 'tp': 740}
Testing loss: 0.9700, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 132, 'fp': 32, 'fn': 4, 'tp': 0}
==============================
Split: 42
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7270, 'fp': 4, 'fn': 15, 'tp': 711}
Testing loss: 0.6800, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 148, 'fp': 6, 'fn': 14, 'tp': 0}
==============================
Split: 43
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7266, 'fp': 6, 'fn': 19, 'tp': 709}
Testing loss: 1.3900, prevision: 0.67, recall: 0.19, f1: 0.30
Testing evaluation: {'tn': 122, 'fp': 4, 'fn': 34, 'tp': 8}
==============================
Split: 44
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.94, f1: 0.96
Training evaluation: {'tn': 7231, 'fp': 14, 'fn': 49, 'tp': 706}
Testing loss: 0.9600, prevision: 0.08, recall: 0.04, f1: 0.05
Testing evaluation: {'tn': 132, 'fp': 11, 'fn': 24, 'tp': 1}
==============================
Split: 45
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7216, 'fp': 11, 'fn': 28, 'tp': 745}
Testing loss: 0.5800, prevision: 0.30, recall: 0.60, f1: 0.40
Testing evaluation: {'tn': 132, 'fp': 21, 'fn': 6, 'tp': 9}
==============================
Split: 46
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.96, recall: 0.94, f1: 0.95
Training evaluation: {'tn': 7199, 'fp': 27, 'fn': 45, 'tp': 729}
Testing loss: 0.1700, prevision: 0.89, recall: 0.57, f1: 0.70
Testing evaluation: {'tn': 153, 'fp': 1, 'fn': 6, 'tp': 8}
==============================
Split: 47
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7207, 'fp': 15, 'fn': 32, 'tp': 746}
Testing loss: 0.1200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 161, 'fp': 5, 'fn': 2, 'tp': 0}
==============================
Split: 48
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.96, f1: 0.98
Training evaluation: {'tn': 7223, 'fp': 8, 'fn': 27, 'tp': 742}
Testing loss: 0.2100, prevision: 0.82, recall: 0.89, f1: 0.85
Testing evaluation: {'tn': 126, 'fp': 7, 'fn': 4, 'tp': 31}
==============================
Split: 49
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7222, 'fp': 4, 'fn': 40, 'tp': 734}
Testing loss: 0.0200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 167, 'fp': 1, 'fn': 0, 'tp': 0}
==============================
Split: 50
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7231, 'fp': 15, 'fn': 34, 'tp': 720}
Testing loss: 0.2400, prevision: 0.40, recall: 0.89, f1: 0.55
Testing evaluation: {'tn': 147, 'fp': 12, 'fn': 1, 'tp': 8}
==============================
Split: 51
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.93, f1: 0.96
Training evaluation: {'tn': 7292, 'fp': 7, 'fn': 47, 'tp': 654}
Testing loss: 0.0600, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 162, 'fp': 6, 'fn': 0, 'tp': 0}
==============================
# Print the overall performance
print("average precision:", round(np.mean(precision_list), 2))
print("average recall:", round(np.mean(recall_list), 2))
print("average f1-score:", round(np.mean(f1_list), 2))
average precision: 0.44
average recall: 0.32
average f1-score: 0.34