Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
505 changes: 505 additions & 0 deletions notebooks/Evaluation - noise.ipynb

Large diffs are not rendered by default.

150 changes: 150 additions & 0 deletions notebooks/Evaluation - time.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"#Basic Imports\n",
"import os,sys\n",
"os.chdir(\"..\")\n",
"\n",
"from tqdm import tqdm,trange\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import classification_report,confusion_matrix, ConfusionMatrixDisplay\n",
"import torch\n",
"import pandas\n",
"\n",
"from datasets.ssl_dataset import SSL_Dataset\n",
"from datasets.data_utils import get_data_loader\n",
"from utils import get_model_checkpoints_and_timing_info\n",
"from utils import net_builder\n",
"from utils import clean_results_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Path to the runs to load\n",
"csv_folder = \"/home/gabrielemeoni/project/SSLRS/\"\n",
"dataset=\"eurosat_ms\"\n",
"folder = \"/scratch/fixmatch_results/sandy_runs/nr_of_labels/\"\n",
"sort_criterion = \"net\" # Accepted net, numlabels\n",
"seed_wanted = 2# Seed wanted (the others will be filtered)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"folder=os.path.join(folder, dataset)\n",
"checkpoints, run_args, timing_info = get_model_checkpoints_and_timing_info(folder)\n",
"timing_info_dict = dict(zip(checkpoints, timing_info))\n",
"if os.name == 'nt':\n",
" [print(_.split(\"\\\\\")[1]) for _ in checkpoints];\n",
"else:\n",
" [print(_.split(\"/\")[1]) for _ in checkpoints];"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run all models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results=[]\n",
"for checkpoint, args in zip(checkpoints,run_args):\n",
" print(\"------------ RUNNING \", checkpoint, \" -----------------\")\n",
" args[\"batch_size\"] = 256\n",
" args[\"data_dir\"] = \"./data/\"\n",
" args[\"use_train_model\"] = False\n",
" args[\"load_path\"] = checkpoint\n",
" if args[\"seed\"] == seed_wanted:\n",
" results.append({\"params\" : args, \"aumgent_time\" : timing_info_dict[checkpoint][0], \"run_time\" : timing_info_dict[checkpoint][1], \"training_time\" : timing_info_dict[checkpoint][2]})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"big_df = pd.DataFrame()\n",
"pd.set_option('display.max_columns', None)\n",
"for result in results:\n",
" result_dict=result[\"params\"].copy()\n",
" result_dict.update({\"aumgent_time\" : result[\"aumgent_time\"]})\n",
" result_dict.update({\"run_time\" : result[\"run_time\"]} )\n",
" result_dict.update({\"training_time\" : result[\"training_time\"]})\n",
" df = pd.DataFrame(result_dict, index=[0])\n",
" df = df.set_index([\"dataset\"])\n",
" df = df.drop(labels=[\"batch_size\",\"data_dir\", \"opt\", \"use_train_model\", \"seed\", \"confidence\", \"lr\"], axis=1)\n",
" big_df = big_df.append(df)\n",
"\n",
"big_df=big_df.sort_values(by=[sort_criterion], axis=0)\n",
"big_df.to_csv(os.path.join(csv_folder,\"time_results_\"+dataset+\"_seed_\"+str(seed_wanted)+\".csv\"))\n",
"print(\"File saved at: \", os.path.join(csv_folder,\"time_results_\"+dataset+\"_seed_\"+str(seed_wanted)+\".csv\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
83 changes: 78 additions & 5 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from datetime import datetime


def plot_cmatrix(preds,labels,encoding, figsize=(8, 5),dpi=150, class_names_font_scale=1.2, matrix_font_size=12, save_fig_name=None):
"""Plotting the confusion matrix for one or three dataset seeds.
Expand Down Expand Up @@ -275,6 +277,32 @@ def get_model_checkpoints(folderpath):
return checkpoints, params


def get_model_checkpoints_and_timing_info(folderpath):
"""Returns all the latest checkpoint files, used parameters, and timing info in the below folders

Args:
folderpath (str): path to search (note only depth 1 below will be searched.)

Returns:
list,list,list: lists of checkpoint names, params, and associated timing info.
"""
# Find present models
folderpath = folderpath.replace("\\", "/")
model_files = glob.glob(folderpath + "/**/model_best.pth", recursive=True)
folders = [model_file.split("model_best.pth")[0] for model_file in model_files]

checkpoints = []
timing_info = []
params = []
for file, folder in zip(model_files, folders):
checkpoints.append(file)
params.append(decode_parameters_from_path(folder))
timing_info.append(_return_training_time_info(folder))

return checkpoints, params, timing_info



def _read_best_iteration_number(folder):
"""Reads from the run log file at which iteration the best result was obtained.

Expand All @@ -294,6 +322,49 @@ def _read_best_iteration_number(folder):
return int(iteration_str.split(" iters")[0])


def _return_training_time_info(folder):
"""Return average augmentation and runtime for iteration [ms] and total training time [s] from the run log file.

Args:
folder (str): results folder

Returns:
float: average augmentation time per iteration [ms].
float: average runtime (forward + backward) per iteration [ms].
float: total training time [s].
"""
with open(folder + "log.txt", "r") as file:
lines = file.read().splitlines()[5:-2]

date_i=[]
total_time_i_ms=[]
run_time_i_ms=[]
prefetch_time_i_ms=[]
t=0
for l in lines:
if "model saved" in l:
continue
elif "data loader keys" in l:
continue

date_i.append(l.split(' ')[0])
total_time_i_ms.append(l.split(' ')[1])
prefetch_time_i_ms.append(float(l.split(' ')[22].replace(",","")))
run_time_i_ms.append(float(l.split(' ')[24].replace(",","")))
t+=1

start_date=date_i[0].split('-')
last_date=date_i[-1].split('-')

start_time=total_time_i_ms[0].split(':')
last_time=total_time_i_ms[-1].split(':')
start_date_tot=datetime(int(start_date[0][1:]), int(start_date[1]), int(start_date[2]), int(start_time[0]), int(start_time[1]), int(start_time[2][:2]))
last_date_tot=datetime(int(last_date[0][1:]), int(last_date[1]), int(last_date[2]), int(last_time[0]), int(last_time[1]), int(last_time[2][:2]))
training_time=last_date_tot-start_date_tot
training_time_s = int(training_time.total_seconds())
return np.mean(np.array(prefetch_time_i_ms)), np.mean(np.array(run_time_i_ms)), training_time_s


def decode_parameters_from_path(filepath):
"""Decodes the parameters encoded in the filepath to a checkpoint

Expand Down Expand Up @@ -333,15 +404,16 @@ def decode_parameters_from_path(filepath):


def clean_results_df(
original_df, data_folder_name, sort_criterion="net", keep_per_class=False
original_df, data_folder_name, sort_criterion="net", keep_per_class=False, swap_accuracy_position=True
):
"""Removing unnecessary columns to save into the csv file, sorting rows according to the sort_criterion, sorting colums according to the csv file format.

Args:
original_df ([df]): original dataframe to clean.
data_folder_name ([str]): string containing experiment results
sort_criterion (str, optional): Default criterion for rows sorting. Defaults to "net".
keep_per_class (bool, optional): If True will not discard class-wise accuracy
keep_per_class (bool, optional): If True will not discard class-wise accuracy. Defaults to False.
swap_accuracy_position (bool, optional): If True, accuracy position is swapped. Defaults to True.

Returns:
[cleaned outputdata]: [df]
Expand Down Expand Up @@ -422,9 +494,10 @@ def clean_results_df(
)

# Swap accuracy positions to sort it as in the final results file
keys = new_df.columns.tolist()
keys = keys[1:-1] + [keys[0]] + [keys[-1]]
new_df = new_df.reindex(columns=keys)
if swap_accuracy_position:
keys = new_df.columns.tolist()
keys = keys[1:-1] + [keys[0]] + [keys[-1]]
new_df = new_df.reindex(columns=keys)

net = new_df["net"]
if "pretrained" in new_df:
Expand Down