Pytorch Implementation of Smell Prediction#

import pandas as pd
import numpy as np
from os.path import isfile, join
from os import listdir
from copy import deepcopy
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

Below we hide a bunch of functions for preprocessing the data.

def answer_preprocess_sensor(df_list):
    """
    This function is the answer of task 5.
    Preprocess sensor data.
    
    Parameters
    ----------
    df_list : list of pandas.DataFrame
        A list of data frames that contain sensor data from multiple stations.
         
    Returns
    -------
    pandas.DataFrame
        The preprocessed sensor data.
    """
    # Resample all the data frames.
    df_resample_list = []
    for df in df_list:
        # Convert the timestamp to datetime.
        df.index = pd.to_datetime(df.index, unit="s", utc=True)
        # Resample the timestamps by hour and average all the previous values.
        # Because we want data from the past, so label need to be "right".
        df_resample_list.append(df.resample("60Min", label="right").mean())
    
    # Merge all data frames.
    df = df_resample_list.pop(0)
    index_name = df.index.name
    while len(df_resample_list) != 0:
        # We need to use outer merging since we want to preserve data from both data frames.
        df = pd.merge_ordered(df, df_resample_list.pop(0), on=df.index.name, how="outer", fill_method=None)
        # Move the datetime column to index
        df = df.set_index(index_name)

    # Fill in the missing data with value -1.
    df = df.fillna(-1)
    return df


def answer_preprocess_smell(df):
    """
    This function is the answer of task 4.
    Preprocess smell data.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The raw smell reports data.
         
    Returns
    -------
    pandas.DataFrame
        The preprocessed smell data.
    """
    # Copy the dataframe to avoid editing the original one.
    df = df.copy(deep=True)
    
    # Drop the columns that we do not need.
    df = df.drop(columns=["feelings_symptoms", "smell_description", "zipcode"])
    
    # Select only the reports within the range of 3 and 5.
    df = df[(df["smell_value"]>=3)&(df["smell_value"]<=5)]
    
    # Convert the timestamp to datetime.
    df.index = pd.to_datetime(df.index, unit="s", utc=True)

    # Resample the timestamps by hour and sum up all the future values.
    # Because we want data from the future, so label need to be "left".
    df = df.resample("60Min", label="left").sum()
    
    # Fill in the missing data with value 0.
    df = df.fillna(0)
    return df


def answer_sum_current_and_future_data(df, n_hr=0):
    """
    This function is the answer of task 6.
    Sum up data in the current and future hours.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The preprocessed smell data.
    n_hr : int
         Number of hours that we want to sum up the future smell data.
         
    Returns
    -------
    pandas.DataFrame
        The transformed smell data.
    """
    # Copy data frame to prevent editing the original one.
    df = df.copy(deep=True)
    
    # Fast return if n_hr is 0
    if n_hr == 0: return df
    
    # Sum up all smell_values in future hours.
    # The rolling function only works for summing up previous values.
    # So we need to shift back to get the value in the future.
    # Be careful that we need to add 1 to the rolling window size.
    # Becasue window size 1 means only using the current data.
    # Parameter "closed" need to be "right" because we want the current data.
    df = df.rolling(n_hr+1, min_periods=1, closed="right").sum().shift(-1*n_hr)
    
    # Delete the last n_hr rows.
    # These n_hr rows have wrong data due to data shifting.
    df = df.iloc[:-1*n_hr]
    return df


def insert_previous_data_to_cols(df, n_hr=0):
    """
    Insert columns to indicate the data from the previous hours.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The preprocessed sensor data.
    n_hr : int
        Number of hours that we want to insert the previous sensor data.
         
    Returns
    -------
    pandas.DataFrame
        The transformed sensor data.
    """
    # Copy data frame to prevent editing the original one.
    df = df.copy(deep=True)

    # Add the data from the previous hours.
    df_all = []
    for h in range(1, n_hr + 1):
        # Shift the data frame to get previous data.
        df_pre = df.shift(h)
        # Edit the name to indicate it is previous data.
        # The orginal data frame already has data from the previous 1 hour.
        # (as indicated in the preprocessing phase of sensor data)
        # So we need to add 1 here.
        df_pre.columns += "_pre_" + str(h+1) + "h"
        # Add the data to an array for merging.
        df_all.append(df_pre)

    # Rename the columns in the original data frame.
    # The orginal data frame already has data from the previous 1 hour.
    # (as indicated in the preprocessing phase of sensor data)
    df.columns += "_pre_1h"

    # Merge all data.
    df_merge = df
    for d in df_all:
        # The join function merges dataframes by index.
        df_merge = df_merge.join(d)
        
    # Delete the first n_hr rows.
    # These n_hr rows have no data due to data shifting.
    df_merge = df_merge.iloc[n_hr:]
    return df_merge


def convert_wind_direction(df):
    """
    Convert wind directions to sine and cosine components.
    
    Parameters
    ----------
    df : pandas.DataFrame
        The data frame that contains the wind direction data.
         
    Returns
    -------
    pandas.DataFrame
        The transformed data frame.
    """
    # Copy data frame to prevent editing the original one.
    df_cp = df.copy(deep=True)
    
    # Convert columns with wind directions.
    for c in df.columns:
        if "SONICWD_DEG" in c:
            df_c = df[c]
            df_c_cos = np.cos(np.deg2rad(df_c))
            df_c_sin = np.sin(np.deg2rad(df_c))
            df_c_cos.name += "_cosine"
            df_c_sin.name += "_sine"
            df_cp.drop([c], axis=1, inplace=True)
            df_cp[df_c_cos.name] = df_c_cos
            df_cp[df_c_sin.name] = df_c_sin
    return df_cp


def compute_feature_label(df_smell, df_sensor, b_hr_sensor=0, f_hr_smell=0):
    """
    Compute features and labels from the smell and sensor data.
    
    Parameters
    ----------
    df_smell : pandas.DataFrame
        The preprocessed smell data.
    df_sensor : pandas.DataFrame
        The preprocessed sensor data.
    b_hr_sensor : int
        Number of hours that we want to insert the previous sensor data.
    f_hr_smell : int
        Number of hours that we want to sum up the future smell data.
    
    Returns
    -------
    df_x : pandas.DataFrame
        The features that we want to use for modeling.
    df_y : pandas.DataFrame
        The labels that we want to use for modeling.
    """
    # Copy data frames to prevent editing the original ones.
    df_smell = df_smell.copy(deep=True)
    df_sensor = df_sensor.copy(deep=True)
    
    # Replace -1 values in sensor data to NaN
    df_sensor[df_sensor==-1] = np.nan
    
    # Convert all wind directions.
    df_sensor = convert_wind_direction(df_sensor)
    
    # Scale sensor data and fill in missing values
    df_sensor = (df_sensor - df_sensor.mean()) / df_sensor.std()
    df_sensor = df_sensor.round(6)
    df_sensor = df_sensor.fillna(-1)
    
    # Insert previous sensor data as features.
    # Noice that the df_sensor is already using the previous data.
    # So b_hr_sensor=0 means using data from the previous 1 hour.
    # And b_hr_sensor=n means using data from the previous n+1 hours.
    df_sensor = insert_previous_data_to_cols(df_sensor, b_hr_sensor)
    
    # Sum up current and future smell values as label.
    # Notice that the df_smell is already the data from the future 1 hour.
    # (as indicated in the preprocessing phase of smell data)
    # So f_hr_smell=0 means using data from the future 1 hour.
    # And f_hr_smell=n means using data from the future n+1 hours.
    df_smell = answer_sum_current_and_future_data(df_smell, f_hr_smell)
    
    # Add suffix to the column name of the smell data to prevent confusion.
    # See the description above for the reason of adding 1 to the f_hr_smell.
    df_smell.columns += "_future_" + str(f_hr_smell+1) + "h"
    
    # We need to first merge these two timestamps based on the available data.
    # In this way, we synchronize the time stamps in the sensor and smell data.
    # This also means that the sensor and smell data have the same number of data points.
    df = pd.merge_ordered(df_sensor.reset_index(), df_smell.reset_index(), on=df_smell.index.name, how="inner", fill_method=None)
    
    # Sanity check: there should be no missing data.
    assert df.isna().sum().sum() == 0, "Error! There is missing data."
    
    # Separate features (x) and labels (y).
    df_x = df[df_sensor.columns]
    df_y = df[df_smell.columns]
    
    # Add the hour of day and the day of week.
    dow_radian = df["EpochTime"].dt.dayofweek.copy(deep=True) * 2 * np.pi / 6.0
    tod_radian = df["EpochTime"].dt.hour.copy(deep=True) * 2 * np.pi / 23.0
    df_x.loc[:,"day_of_week_sine"] = np.sin(dow_radian)
    df_x.loc[:,"day_of_week_cosine"] = np.cos(dow_radian)
    df_x.loc[:,"hour_of_day_sine"] = np.sin(tod_radian)
    df_x.loc[:,"hour_of_day_cosine"] = np.cos(tod_radian)
    return df_x, df_y

# Load and preprocess sensor data
path = "smellpgh-v1/esdr_raw"
list_of_files = [f for f in listdir(path) if isfile(join(path, f))]
sensor_raw_list = []
for f in list_of_files:
    sensor_raw_list.append(pd.read_csv(join(path, f)).set_index("EpochTime"))
df_sensor = answer_preprocess_sensor(sensor_raw_list)

# Load and preprocess smell data
smell_raw = pd.read_csv("smellpgh-v1/smell_raw.csv").set_index("EpochTime")
df_smell = answer_preprocess_smell(smell_raw)

# Compute features and labels
df_x, df_y = compute_feature_label(df_smell, df_sensor, b_hr_sensor=2, f_hr_smell=7)

/var/folders/xr/ddxdh8x16q53_r8yf2zj9m600000gn/T/ipykernel_13861/4187725043.py:261: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_x.loc[:,"day_of_week_sine"] = np.sin(dow_radian)

df_x

	3.feed_1.SO2_PPM_pre_1h	3.feed_1.H2S_PPM_pre_1h	3.feed_1.SIGTHETA_DEG_pre_1h	3.feed_1.SONICWS_MPH_pre_1h	3.feed_23.CO_PPM_pre_1h	3.feed_23.PM10_UG_M3_pre_1h	3.feed_29.PM10_UG_M3_pre_1h	3.feed_29.PM25_UG_M3_pre_1h	3.feed_11067.CO_PPB..3.feed_43.CO_PPB_pre_1h	3.feed_11067.NO2_PPB..3.feed_43.NO2_PPB_pre_1h	...	3.feed_28.SONICWD_DEG_cosine_pre_3h	3.feed_28.SONICWD_DEG_sine_pre_3h	3.feed_26.SONICWD_DEG_cosine_pre_3h	3.feed_26.SONICWD_DEG_sine_pre_3h	3.feed_3.SONICWD_DEG_cosine_pre_3h	3.feed_3.SONICWD_DEG_sine_pre_3h	day_of_week_sine	day_of_week_cosine	hour_of_day_sine	hour_of_day_cosine
0	-0.273112	-0.403688	-1.520058	-0.599075	-0.388936	-0.777225	-0.406466	-0.395826	-0.716551	-0.585693	...	0.279097	1.746934	-0.383942	1.929446	-0.542867	1.331119	0.000000	1.0	-2.449294e-16	1.000000
1	-0.273112	-0.403688	-1.433654	-0.684709	-0.388936	-0.690974	0.007500	-0.305936	-0.426597	0.488014	...	1.089779	1.481480	0.945548	1.350182	0.512949	1.355712	0.866025	0.5	0.000000e+00	1.000000
2	-0.273112	-0.403688	1.142731	-0.941610	0.147335	-0.173473	-0.147737	-0.216045	-0.444787	0.829648	...	0.799733	1.640186	0.726159	1.603583	0.537757	1.347897	0.866025	0.5	2.697968e-01	0.962917
3	-0.273112	-0.403688	-0.082623	-0.941610	0.147335	-0.432224	-0.302974	-0.216045	-0.796641	0.081306	...	0.960380	1.562966	1.185067	0.816616	0.512949	1.355712	0.866025	0.5	5.195840e-01	0.854419
4	-0.273112	-0.403688	1.527618	-0.984426	0.147335	-0.259723	-0.458211	-0.485717	-0.762976	-0.504352	...	1.623480	0.780539	1.225168	0.602477	0.659294	1.303117	0.866025	0.5	7.308360e-01	0.682553
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
16746	-0.273112	-0.403688	0.011635	-0.128090	-0.925207	-0.604724	-0.561703	-0.575607	0.529598	-0.748376	...	1.707841	0.380565	1.119099	-0.254164	1.210281	-0.738344	-0.866025	0.5	-9.976688e-01	-0.068242
16747	-0.273112	-0.403688	0.443651	0.000361	-0.925207	-0.690974	-0.406466	-0.665498	0.662087	-0.292864	...	1.693445	0.048275	1.098706	-0.303805	1.327922	-0.583204	-0.866025	0.5	-9.790841e-01	0.203456
16748	-0.273112	-0.403688	0.443651	-0.256540	-0.925207	-0.604724	-0.458211	-0.575607	0.181817	-0.862254	...	1.489886	-0.500401	0.609087	-0.931955	0.798657	-1.062282	-0.866025	0.5	-8.878852e-01	0.460065
16749	-0.273112	-0.403688	0.270844	-0.085273	-0.925207	-0.518474	-0.302974	-0.575607	0.856204	-0.439279	...	1.402368	-0.626362	0.237194	-1.124325	0.706601	-1.107672	-0.866025	0.5	-7.308360e-01	0.682553
16750	-0.273112	-0.403688	-0.341833	0.085995	-0.925207	-0.432224	-0.406466	-0.395826	0.798647	-0.309133	...	0.581575	-1.153330	-0.684058	-1.060369	-0.161757	-1.243870	-0.866025	0.5	-5.195840e-01	0.854419

16751 rows × 148 columns

df_y

	smell_value_future_8h
0	8.0
1	5.0
2	5.0
3	5.0
4	5.0
...	...
16746	6.0
16747	6.0
16748	6.0
16749	3.0
16750	11.0

16751 rows × 1 columns

# Set random seed for reproducibility
torch.manual_seed(42)

# Load data
feature = df_x[df_x.columns].to_numpy()
label = (df_y>=40).astype(int)['smell_value_future_8h'].to_numpy()

# Create the dataset object
class SmellPittsburghDataset(Dataset):
    def __init__(self, feature=None, label=None):
        self.feature = feature
        self.label = label

    def __len__(self):
        return len(self.feature)

    def __getitem__(self, idx):
        x = self.feature[idx]
        y = self.label[idx]
        x = torch.from_numpy(x).float()
        y = torch.from_numpy(np.array([y])).float()
        return x, y

def scorer(y_predict, y):
    """
    A customized scoring function to evaluate a PyTorch classifier.
    
    Parameters
    ----------
    y_predict : torch.Tensor
        The predicted labels.
    y : torch.Tensor
        The true labels.
    
    Returns
    -------
    dict of int or float
        A dictionary of evaluation metrics.
    """
    c = confusion_matrix(y, y_predict, labels=[0,1])
    return {"tn": c[0,0], "fp": c[0,1], "fn": c[1,0], "tp": c[1,1]}

def train(model, criterion, optimizer, dataloader_train, dataloader_test, num_epochs=30):
    """Train the model."""
    
    def run_one_epoch(dataloader, phase="train"):
        if phase == "train": model.train() # training mode
        else: model.eval() # evaluation mode
        c = 0 # just a counter
        accu_loss = 0 # accumulated loss
        accu_score = None # accumulated scores
        # Loop the data
        for x, y in dataloader:
            c += 1 # increase the counter
            y_pred = model(x)
            loss = criterion(y_pred, y)
            if phase == "train":
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            # Store statistics for the training set
            accu_loss += loss # add up the loss
            y_label = (y_pred > 0.5).float()
            score = scorer(y_label, y)
            if accu_score is None:
                accu_score = score
            else:
                for k in score:
                    accu_score[k] += score[k]
        # Return statistics
        return accu_loss/c, accu_score
    
    def compute_statistics(score):
        tp_fp = score["tp"] + score["fp"]
        if tp_fp == 0:
            precision = 0
        else:
            precision = round(score["tp"]/tp_fp, 2)
        tp_fn = score["tp"] + score["fn"]
        if tp_fn == 0:
            recall = 0
        else:
            recall = round(score["tp"]/tp_fn, 2)
        tp_tp_fp_fn = tp_fp + tp_fn
        if tp_tp_fp_fn == 0:
            f1 = 0
        else:
            f1 = round(2*score["tp"]/tp_tp_fp_fn, 2)
        return precision, recall, f1
    
    # Run one epoch
    for epoch in range(num_epochs):
        # Run through the entire training set
        loss_train, score_train = run_one_epoch(dataloader_train, phase="train")
        loss_train = torch.round(loss_train, decimals=2)
        p_train, r_train, f1_train = compute_statistics(score_train)
        # Run through the entire testing set
        with torch.no_grad():
            loss_test, score_test = run_one_epoch(dataloader_test, phase="test")
        loss_test = torch.round(loss_test, decimals=2)
        p_test, r_test, f1_test = compute_statistics(score_test)
        # Print loss and scores
        if ((epoch+1)%30 == 0):
            print(f"-"*10)
            print(f"Epoch [{epoch+1}/{num_epochs}]")
            print(f"Training loss: {loss_train:.4f}, prevision: {p_train:.2f}, recall: {r_train:.2f}, f1: {f1_train:.2f}")
            print(f"Training evaluation: {score_train}")
            print(f"Testing loss: {loss_test:.4f}, prevision: {p_test:.2f}, recall: {r_test:.2f}, f1: {f1_test:.2f}")
            print(f"Testing evaluation: {score_test}")
    
    # Return statistics
    return p_test, r_test, f1_test

# Define neural network model
class DeepLogisticRegression(nn.Module):
    def __init__(self, input_size, hidden_size=64, output_size=1):
        super(DeepLogisticRegression, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out

# Create time series splits for cross-validation.
splits = []
dataset_size = df_x.shape[0]
train_size = 8000
test_size = 168
input_size = feature.shape[1]
for i in range(train_size, dataset_size, test_size):
    start = i - train_size
    end = i + test_size
    if (end >= dataset_size): break
    train_index = range(start, i)
    test_index = range(i, end)
    splits.append((list(train_index), list(test_index)))
    
# Cross-validate the model for every split
precision_list = []
recall_list = []
f1_list = []
for i in range(len(splits)):
    print(f"Split: {i}")
    dataset_train = SmellPittsburghDataset(feature=feature[splits[i][0]], label=label[splits[i][0]])
    dataset_test = SmellPittsburghDataset(feature=feature[splits[i][1]], label=label[splits[i][1]])
    dataloader_train = DataLoader(dataset_train, batch_size=1024, shuffle=True)
    dataloader_test = DataLoader(dataset_test, batch_size=1024, shuffle=False)
    model = DeepLogisticRegression(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    p_test, r_test, f1_test = train(model, criterion, optimizer, dataloader_train, dataloader_test)
    precision_list.append(p_test)
    recall_list.append(r_test)
    f1_list.append(f1_test)
    print("="*30)

Split: 0
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.98, recall: 0.97, f1: 0.97
Training evaluation: {'tn': 7363, 'fp': 11, 'fn': 21, 'tp': 605}
Testing loss: 0.1800, prevision: 0.83, recall: 0.71, f1: 0.77
Testing evaluation: {'tn': 144, 'fp': 3, 'fn': 6, 'tp': 15}
==============================
Split: 1
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7385, 'fp': 2, 'fn': 7, 'tp': 606}
Testing loss: 0.2900, prevision: 1.00, recall: 0.30, f1: 0.46
Testing evaluation: {'tn': 158, 'fp': 0, 'fn': 7, 'tp': 3}
==============================
Split: 2
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7400, 'fp': 7, 'fn': 15, 'tp': 578}
Testing loss: 1.0500, prevision: 0.78, recall: 0.57, f1: 0.66
Testing evaluation: {'tn': 95, 'fp': 10, 'fn': 27, 'tp': 36}
==============================
Split: 3
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7391, 'fp': 1, 'fn': 8, 'tp': 600}
Testing loss: 0.4800, prevision: 0.70, recall: 0.52, f1: 0.60
Testing evaluation: {'tn': 135, 'fp': 6, 'fn': 13, 'tp': 14}
==============================
Split: 4
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7382, 'fp': 3, 'fn': 12, 'tp': 603}
Testing loss: 0.0600, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 165, 'fp': 3, 'fn': 0, 'tp': 0}
==============================
Split: 5
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7391, 'fp': 2, 'fn': 19, 'tp': 588}
Testing loss: 0.2700, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 165, 'fp': 0, 'fn': 3, 'tp': 0}
==============================
Split: 6
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7388, 'fp': 2, 'fn': 13, 'tp': 597}
Testing loss: 0.0300, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 166, 'fp': 2, 'fn': 0, 'tp': 0}
==============================
Split: 7
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7399, 'fp': 2, 'fn': 11, 'tp': 588}
Testing loss: 0.6800, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 142, 'fp': 17, 'fn': 9, 'tp': 0}
==============================
Split: 8
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7402, 'fp': 0, 'fn': 13, 'tp': 585}
Testing loss: 0.4400, prevision: 0.93, recall: 0.48, f1: 0.64
Testing evaluation: {'tn': 138, 'fp': 1, 'fn': 15, 'tp': 14}
==============================
Split: 9
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 0.99
Training evaluation: {'tn': 7373, 'fp': 0, 'fn': 8, 'tp': 619}
Testing loss: 1.5300, prevision: 0.83, recall: 0.16, f1: 0.27
Testing evaluation: {'tn': 136, 'fp': 1, 'fn': 26, 'tp': 5}
==============================
Split: 10
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7338, 'fp': 4, 'fn': 14, 'tp': 644}
Testing loss: 0.5000, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 8, 'tp': 0}
==============================
Split: 11
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 1.00
Training evaluation: {'tn': 7333, 'fp': 1, 'fn': 4, 'tp': 662}
Testing loss: 0.1800, prevision: 0.53, recall: 0.82, f1: 0.64
Testing evaluation: {'tn': 149, 'fp': 8, 'fn': 2, 'tp': 9}
==============================
Split: 12
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7335, 'fp': 7, 'fn': 22, 'tp': 636}
Testing loss: 0.0100, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 168, 'fp': 0, 'fn': 0, 'tp': 0}
==============================
Split: 13
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7341, 'fp': 1, 'fn': 11, 'tp': 647}
Testing loss: 0.1400, prevision: 0.83, recall: 0.62, f1: 0.71
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 3, 'tp': 5}
==============================
Split: 14
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7332, 'fp': 6, 'fn': 14, 'tp': 648}
Testing loss: 0.8200, prevision: 0.64, recall: 0.32, f1: 0.42
Testing evaluation: {'tn': 142, 'fp': 4, 'fn': 15, 'tp': 7}
==============================
Split: 15
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.97, f1: 0.99
Training evaluation: {'tn': 7329, 'fp': 0, 'fn': 19, 'tp': 652}
Testing loss: 0.3200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 160, 'fp': 1, 'fn': 7, 'tp': 0}
==============================
Split: 16
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.97, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7339, 'fp': 18, 'fn': 34, 'tp': 609}
Testing loss: 0.6100, prevision: 0.57, recall: 0.42, f1: 0.48
Testing evaluation: {'tn': 127, 'fp': 10, 'fn': 18, 'tp': 13}
==============================
Split: 17
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7347, 'fp': 1, 'fn': 16, 'tp': 636}
Testing loss: 0.4400, prevision: 0.20, recall: 0.12, f1: 0.15
Testing evaluation: {'tn': 156, 'fp': 4, 'fn': 7, 'tp': 1}
==============================
Split: 18
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7347, 'fp': 10, 'fn': 26, 'tp': 617}
Testing loss: 0.4500, prevision: 0.11, recall: 0.06, f1: 0.08
Testing evaluation: {'tn': 144, 'fp': 8, 'fn': 15, 'tp': 1}
==============================
Split: 19
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.96, f1: 0.98
Training evaluation: {'tn': 7338, 'fp': 3, 'fn': 29, 'tp': 630}
Testing loss: 0.1100, prevision: 0.58, recall: 0.78, f1: 0.67
Testing evaluation: {'tn': 154, 'fp': 5, 'fn': 2, 'tp': 7}
==============================
Split: 20
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7342, 'fp': 0, 'fn': 12, 'tp': 646}
Testing loss: 0.6500, prevision: 0.14, recall: 0.04, f1: 0.06
Testing evaluation: {'tn': 138, 'fp': 6, 'fn': 23, 'tp': 1}
==============================
Split: 21
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.94, f1: 0.96
Training evaluation: {'tn': 7307, 'fp': 11, 'fn': 44, 'tp': 638}
Testing loss: 1.2400, prevision: 1.00, recall: 0.37, f1: 0.54
Testing evaluation: {'tn': 130, 'fp': 0, 'fn': 24, 'tp': 14}
==============================
Split: 22
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7266, 'fp': 14, 'fn': 35, 'tp': 685}
Testing loss: 0.0100, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 167, 'fp': 1, 'fn': 0, 'tp': 0}
==============================
Split: 23
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7283, 'fp': 8, 'fn': 29, 'tp': 680}
Testing loss: 0.1300, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 166, 'fp': 0, 'fn': 2, 'tp': 0}
==============================
Split: 24
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7298, 'fp': 13, 'fn': 48, 'tp': 641}
Testing loss: 0.2200, prevision: 0.20, recall: 0.20, f1: 0.20
Testing evaluation: {'tn': 150, 'fp': 8, 'fn': 8, 'tp': 2}
==============================
Split: 25
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7289, 'fp': 12, 'fn': 31, 'tp': 668}
Testing loss: 0.1500, prevision: 0.50, recall: 0.12, f1: 0.20
Testing evaluation: {'tn': 159, 'fp': 1, 'fn': 7, 'tp': 1}
==============================
Split: 26
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7292, 'fp': 16, 'fn': 36, 'tp': 656}
Testing loss: 0.1300, prevision: 0.90, recall: 0.60, f1: 0.72
Testing evaluation: {'tn': 152, 'fp': 1, 'fn': 6, 'tp': 9}
==============================
Split: 27
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7280, 'fp': 13, 'fn': 52, 'tp': 655}
Testing loss: 0.6600, prevision: 0.86, recall: 0.26, f1: 0.40
Testing evaluation: {'tn': 144, 'fp': 1, 'fn': 17, 'tp': 6}
==============================
Split: 28
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.96
Training evaluation: {'tn': 7264, 'fp': 13, 'fn': 37, 'tp': 686}
Testing loss: 0.0100, prevision: 1.00, recall: 1.00, f1: 1.00
Testing evaluation: {'tn': 162, 'fp': 0, 'fn': 0, 'tp': 6}
==============================
Split: 29
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 1.00, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7289, 'fp': 0, 'fn': 15, 'tp': 696}
Testing loss: 0.3000, prevision: 0.38, recall: 0.27, f1: 0.32
Testing evaluation: {'tn': 152, 'fp': 5, 'fn': 8, 'tp': 3}
==============================
Split: 30
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7271, 'fp': 7, 'fn': 25, 'tp': 697}
Testing loss: 1.0500, prevision: 0.95, recall: 0.39, f1: 0.55
Testing evaluation: {'tn': 121, 'fp': 1, 'fn': 28, 'tp': 18}
==============================
Split: 31
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7228, 'fp': 10, 'fn': 21, 'tp': 741}
Testing loss: 1.1600, prevision: 0.88, recall: 0.25, f1: 0.39
Testing evaluation: {'tn': 139, 'fp': 1, 'fn': 21, 'tp': 7}
==============================
Split: 32
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7212, 'fp': 6, 'fn': 27, 'tp': 755}
Testing loss: 0.6200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 162, 'fp': 1, 'fn': 5, 'tp': 0}
==============================
Split: 33
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7227, 'fp': 8, 'fn': 18, 'tp': 747}
Testing loss: 0.2100, prevision: 0.47, recall: 0.78, f1: 0.58
Testing evaluation: {'tn': 151, 'fp': 8, 'fn': 2, 'tp': 7}
==============================
Split: 34
----------
Epoch [30/30]
Training loss: 0.0400, prevision: 0.97, recall: 0.93, f1: 0.95
Training evaluation: {'tn': 7202, 'fp': 24, 'fn': 53, 'tp': 721}
Testing loss: 0.8100, prevision: 0.25, recall: 0.29, f1: 0.27
Testing evaluation: {'tn': 136, 'fp': 15, 'fn': 12, 'tp': 5}
==============================
Split: 35
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7211, 'fp': 10, 'fn': 25, 'tp': 754}
Testing loss: 0.2900, prevision: 0.54, recall: 0.65, f1: 0.59
Testing evaluation: {'tn': 137, 'fp': 11, 'fn': 7, 'tp': 13}
==============================
Split: 36
----------
Epoch [30/30]
Training loss: 0.0100, prevision: 1.00, recall: 0.99, f1: 1.00
Training evaluation: {'tn': 7234, 'fp': 1, 'fn': 6, 'tp': 759}
Testing loss: 0.5700, prevision: 0.67, recall: 0.20, f1: 0.31
Testing evaluation: {'tn': 157, 'fp': 1, 'fn': 8, 'tp': 2}
==============================
Split: 37
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.93, f1: 0.96
Training evaluation: {'tn': 7225, 'fp': 10, 'fn': 54, 'tp': 711}
Testing loss: 0.3100, prevision: 0.42, recall: 0.50, f1: 0.46
Testing evaluation: {'tn': 141, 'fp': 11, 'fn': 8, 'tp': 8}
==============================
Split: 38
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.98
Training evaluation: {'tn': 7224, 'fp': 10, 'fn': 18, 'tp': 748}
Testing loss: 0.0700, prevision: 0.90, recall: 0.75, f1: 0.82
Testing evaluation: {'tn': 155, 'fp': 1, 'fn': 3, 'tp': 9}
==============================
Split: 39
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7217, 'fp': 16, 'fn': 28, 'tp': 739}
Testing loss: 0.3900, prevision: 0.26, recall: 0.75, f1: 0.39
Testing evaluation: {'tn': 143, 'fp': 17, 'fn': 2, 'tp': 6}
==============================
Split: 40
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7235, 'fp': 12, 'fn': 24, 'tp': 729}
Testing loss: 0.8100, prevision: 0.91, recall: 0.31, f1: 0.47
Testing evaluation: {'tn': 135, 'fp': 1, 'fn': 22, 'tp': 10}
==============================
Split: 41
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7238, 'fp': 9, 'fn': 13, 'tp': 740}
Testing loss: 0.9700, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 132, 'fp': 32, 'fn': 4, 'tp': 0}
==============================
Split: 42
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.98, f1: 0.99
Training evaluation: {'tn': 7270, 'fp': 4, 'fn': 15, 'tp': 711}
Testing loss: 0.6800, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 148, 'fp': 6, 'fn': 14, 'tp': 0}
==============================
Split: 43
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.97, f1: 0.98
Training evaluation: {'tn': 7266, 'fp': 6, 'fn': 19, 'tp': 709}
Testing loss: 1.3900, prevision: 0.67, recall: 0.19, f1: 0.30
Testing evaluation: {'tn': 122, 'fp': 4, 'fn': 34, 'tp': 8}
==============================
Split: 44
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.94, f1: 0.96
Training evaluation: {'tn': 7231, 'fp': 14, 'fn': 49, 'tp': 706}
Testing loss: 0.9600, prevision: 0.08, recall: 0.04, f1: 0.05
Testing evaluation: {'tn': 132, 'fp': 11, 'fn': 24, 'tp': 1}
==============================
Split: 45
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7216, 'fp': 11, 'fn': 28, 'tp': 745}
Testing loss: 0.5800, prevision: 0.30, recall: 0.60, f1: 0.40
Testing evaluation: {'tn': 132, 'fp': 21, 'fn': 6, 'tp': 9}
==============================
Split: 46
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.96, recall: 0.94, f1: 0.95
Training evaluation: {'tn': 7199, 'fp': 27, 'fn': 45, 'tp': 729}
Testing loss: 0.1700, prevision: 0.89, recall: 0.57, f1: 0.70
Testing evaluation: {'tn': 153, 'fp': 1, 'fn': 6, 'tp': 8}
==============================
Split: 47
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.96, f1: 0.97
Training evaluation: {'tn': 7207, 'fp': 15, 'fn': 32, 'tp': 746}
Testing loss: 0.1200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 161, 'fp': 5, 'fn': 2, 'tp': 0}
==============================
Split: 48
----------
Epoch [30/30]
Training loss: 0.0200, prevision: 0.99, recall: 0.96, f1: 0.98
Training evaluation: {'tn': 7223, 'fp': 8, 'fn': 27, 'tp': 742}
Testing loss: 0.2100, prevision: 0.82, recall: 0.89, f1: 0.85
Testing evaluation: {'tn': 126, 'fp': 7, 'fn': 4, 'tp': 31}
==============================
Split: 49
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7222, 'fp': 4, 'fn': 40, 'tp': 734}
Testing loss: 0.0200, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 167, 'fp': 1, 'fn': 0, 'tp': 0}
==============================
Split: 50
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.98, recall: 0.95, f1: 0.97
Training evaluation: {'tn': 7231, 'fp': 15, 'fn': 34, 'tp': 720}
Testing loss: 0.2400, prevision: 0.40, recall: 0.89, f1: 0.55
Testing evaluation: {'tn': 147, 'fp': 12, 'fn': 1, 'tp': 8}
==============================
Split: 51
----------
Epoch [30/30]
Training loss: 0.0300, prevision: 0.99, recall: 0.93, f1: 0.96
Training evaluation: {'tn': 7292, 'fp': 7, 'fn': 47, 'tp': 654}
Testing loss: 0.0600, prevision: 0.00, recall: 0.00, f1: 0.00
Testing evaluation: {'tn': 162, 'fp': 6, 'fn': 0, 'tp': 0}
==============================

# Print the overall performance
print("average precision:", round(np.mean(precision_list), 2))
print("average recall:", round(np.mean(recall_list), 2))
print("average f1-score:", round(np.mean(f1_list), 2))

average precision: 0.44
average recall: 0.32
average f1-score: 0.34

Data Science

Pytorch Implementation of Smell Prediction

Pytorch Implementation of Smell Prediction#