From 92afd95347b12792556b2d751f331bdaefb91653 Mon Sep 17 00:00:00 2001 From: Abdallah Ahmed Date: Thu, 10 Dec 2020 18:30:52 +0200 Subject: [PATCH 1/5] Add model checkpoint This commit adds checkpoint.py file which allows: 1. Saving weights based on lowest val_loss from flag save_best_weights in config.yml 2. Saving weights frequently with specified frequency int in the confing.yml --- config.yaml | 3 ++ main.py | 5 ++++ pytorch_ner/model_checkpoint.py | 51 +++++++++++++++++++++++++++++++++ pytorch_ner/save.py | 8 ++++-- pytorch_ner/train.py | 27 +++++++++++++++++ 5 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 pytorch_ner/model_checkpoint.py diff --git a/config.yaml b/config.yaml index f770aa6..c1ba0a1 100644 --- a/config.yaml +++ b/config.yaml @@ -52,4 +52,7 @@ train: save: path_to_folder: 'models/test_main/' + model_checkpoint: + save_frequency: 2 + save_best_weights: true export_onnx: true diff --git a/main.py b/main.py index 289d551..4f35865 100644 --- a/main.py +++ b/main.py @@ -8,6 +8,7 @@ from torch.utils.data import DataLoader from pytorch_ner.dataset import NERCollator, NERDataset +from pytorch_ner.model_checkpoint import model_checkpoint from pytorch_ner.nn_modules.architecture import BiLSTM from pytorch_ner.nn_modules.embedding import Embedding from pytorch_ner.nn_modules.linear import LinearHead @@ -201,6 +202,10 @@ def main(path_to_config: str): optimizer=optimizer, device=device, n_epoch=config["train"]["n_epoch"], + export_onnx=config["save"]["export_onnx"], + path_to_folder=config["save"]["path_to_folder"], + save_frequency=config["save"]["model_checkpoint"]["save_frequency"], + save_best_weights=config["save"]["model_checkpoint"]["save_best_weights"], verbose=config["train"]["verbose"], ) diff --git a/pytorch_ner/model_checkpoint.py b/pytorch_ner/model_checkpoint.py new file mode 100644 index 0000000..91cdf89 --- /dev/null +++ b/pytorch_ner/model_checkpoint.py @@ -0,0 +1,51 @@ +import json +import os +from typing import Dict + +import torch +import torch.nn as nn +import yaml + +from pytorch_ner.onnx import onnx_export_and_check +from pytorch_ner.utils import mkdir, rmdir +import numpy as np + +def model_checkpoint( + model: nn.Module, + epoch: int, + save_best_weights: bool, + val_metrics, + val_losses, + path_to_folder: str, + export_onnx: bool, + save_frequency: int, + ): + + ''' + This function creates check point based on either one of the two scenarios: + 1. Save best weights regarding the val_loss + 2. Save weights frequently with save_frequency int + + ''' + if save_best_weights: + if np.mean(val_metrics['loss']) < min(val_losses): + # This iteration has lower val_loss, let's save it + val_losses.append(np.mean(val_metrics['loss'])) + pth_file_name = "best_model.pth" + onnx_file_name = "best_model.onnx" + else: + # No need to save weights + return + else: + if epoch % save_frequency == 0: + # We're at multiple of save_frequency, let's save weights + pth_file_name = "model_epoch_" + str(epoch) + ".pth" + onnx_file_name = "model_epoch_" + str(epoch) + ".onnx" + else: + # No need to save weights + return + + + torch.save(model.state_dict(),os.path.join(path_to_folder, pth_file_name)) + if export_onnx: + onnx_export_and_check(model=model, path_to_save=os.path.join(path_to_folder, onnx_file_name)) \ No newline at end of file diff --git a/pytorch_ner/save.py b/pytorch_ner/save.py index fa0fd7f..c2bc05b 100644 --- a/pytorch_ner/save.py +++ b/pytorch_ner/save.py @@ -18,9 +18,11 @@ def save_model( config: Dict, export_onnx: bool = False, ): - # make empty dir - rmdir(path_to_folder) - mkdir(path_to_folder) + + # if os.path.exists(path_to_folder): + # # make empty dir + # rmdir(path_to_folder) + # mkdir(path_to_folder) model.cpu() model.eval() diff --git a/pytorch_ner/train.py b/pytorch_ner/train.py index 60e1d52..f353303 100644 --- a/pytorch_ner/train.py +++ b/pytorch_ner/train.py @@ -2,6 +2,7 @@ from typing import Callable, DefaultDict, List, Optional import numpy as np +import os import torch import torch.nn as nn import torch.optim as optim @@ -11,6 +12,9 @@ from pytorch_ner.metrics import calculate_metrics from pytorch_ner.utils import to_numpy +from pytorch_ner.model_checkpoint import model_checkpoint +from pytorch_ner.utils import mkdir, rmdir +from pytorch_ner.onnx import onnx_export_and_check def masking(lengths: torch.Tensor) -> torch.Tensor: """ @@ -144,12 +148,23 @@ def train( optimizer: optim.Optimizer, device: torch.device, n_epoch: int, + export_onnx: bool, + path_to_folder: str, + save_frequency: int, + save_best_weights: bool, testloader: Optional[DataLoader] = None, verbose: bool = True, ): """ Training / validation loop for n_epoch with final testing. """ + if os.path.exists(path_to_folder): + # delete any previous versions of models + rmdir(path_to_folder) + mkdir(path_to_folder) + + # List that tracks val_loss over training to save best weights + val_losses = [np.inf] for epoch in range(n_epoch): @@ -183,6 +198,18 @@ def train( print(f"val {metric_name}: {np.mean(metric_list)}") print() + # Model Checkpoint + model_checkpoint( + model=model, + epoch=epoch, + save_best_weights=save_best_weights, + val_metrics=val_metrics, + val_losses=val_losses, + path_to_folder=path_to_folder, + export_onnx=export_onnx, + save_frequency=save_frequency, + ) + if testloader is not None: test_metrics = validate_loop( From bffaaa224876df1e1eb26a1bd917dec417408e0b Mon Sep 17 00:00:00 2001 From: Abdallah Ahmed Date: Thu, 10 Dec 2020 18:34:20 +0200 Subject: [PATCH 2/5] Fix Black Formatting --- pytorch_ner/model_checkpoint.py | 36 +++++++++++++++++---------------- pytorch_ner/save.py | 2 +- pytorch_ner/train.py | 5 +++-- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/pytorch_ner/model_checkpoint.py b/pytorch_ner/model_checkpoint.py index 91cdf89..3e5952b 100644 --- a/pytorch_ner/model_checkpoint.py +++ b/pytorch_ner/model_checkpoint.py @@ -10,27 +10,28 @@ from pytorch_ner.utils import mkdir, rmdir import numpy as np + def model_checkpoint( - model: nn.Module, - epoch: int, - save_best_weights: bool, - val_metrics, - val_losses, - path_to_folder: str, - export_onnx: bool, - save_frequency: int, - ): - - ''' + model: nn.Module, + epoch: int, + save_best_weights: bool, + val_metrics, + val_losses, + path_to_folder: str, + export_onnx: bool, + save_frequency: int, +): + + """ This function creates check point based on either one of the two scenarios: 1. Save best weights regarding the val_loss 2. Save weights frequently with save_frequency int - ''' + """ if save_best_weights: - if np.mean(val_metrics['loss']) < min(val_losses): + if np.mean(val_metrics["loss"]) < min(val_losses): # This iteration has lower val_loss, let's save it - val_losses.append(np.mean(val_metrics['loss'])) + val_losses.append(np.mean(val_metrics["loss"])) pth_file_name = "best_model.pth" onnx_file_name = "best_model.onnx" else: @@ -44,8 +45,9 @@ def model_checkpoint( else: # No need to save weights return - - torch.save(model.state_dict(),os.path.join(path_to_folder, pth_file_name)) + torch.save(model.state_dict(), os.path.join(path_to_folder, pth_file_name)) if export_onnx: - onnx_export_and_check(model=model, path_to_save=os.path.join(path_to_folder, onnx_file_name)) \ No newline at end of file + onnx_export_and_check( + model=model, path_to_save=os.path.join(path_to_folder, onnx_file_name) + ) diff --git a/pytorch_ner/save.py b/pytorch_ner/save.py index c2bc05b..f6ead13 100644 --- a/pytorch_ner/save.py +++ b/pytorch_ner/save.py @@ -18,7 +18,7 @@ def save_model( config: Dict, export_onnx: bool = False, ): - + # if os.path.exists(path_to_folder): # # make empty dir # rmdir(path_to_folder) diff --git a/pytorch_ner/train.py b/pytorch_ner/train.py index f353303..1a97f49 100644 --- a/pytorch_ner/train.py +++ b/pytorch_ner/train.py @@ -16,6 +16,7 @@ from pytorch_ner.utils import mkdir, rmdir from pytorch_ner.onnx import onnx_export_and_check + def masking(lengths: torch.Tensor) -> torch.Tensor: """ Convert lengths tensor to binary mask @@ -202,13 +203,13 @@ def train( model_checkpoint( model=model, epoch=epoch, - save_best_weights=save_best_weights, + save_best_weights=save_best_weights, val_metrics=val_metrics, val_losses=val_losses, path_to_folder=path_to_folder, export_onnx=export_onnx, save_frequency=save_frequency, - ) + ) if testloader is not None: From 33d8f0ca2f987c97a31c87e1e2c86b9540a03c94 Mon Sep 17 00:00:00 2001 From: Abdallah Ahmed Date: Thu, 10 Dec 2020 18:37:07 +0200 Subject: [PATCH 3/5] Solves Black and isort formatting --- pytorch_ner/model_checkpoint.py | 2 +- pytorch_ner/train.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pytorch_ner/model_checkpoint.py b/pytorch_ner/model_checkpoint.py index 3e5952b..1c07cbb 100644 --- a/pytorch_ner/model_checkpoint.py +++ b/pytorch_ner/model_checkpoint.py @@ -2,13 +2,13 @@ import os from typing import Dict +import numpy as np import torch import torch.nn as nn import yaml from pytorch_ner.onnx import onnx_export_and_check from pytorch_ner.utils import mkdir, rmdir -import numpy as np def model_checkpoint( diff --git a/pytorch_ner/train.py b/pytorch_ner/train.py index 1a97f49..a0a9432 100644 --- a/pytorch_ner/train.py +++ b/pytorch_ner/train.py @@ -1,8 +1,8 @@ +import os from collections import defaultdict from typing import Callable, DefaultDict, List, Optional import numpy as np -import os import torch import torch.nn as nn import torch.optim as optim @@ -10,11 +10,9 @@ from tqdm import tqdm from pytorch_ner.metrics import calculate_metrics -from pytorch_ner.utils import to_numpy - from pytorch_ner.model_checkpoint import model_checkpoint -from pytorch_ner.utils import mkdir, rmdir from pytorch_ner.onnx import onnx_export_and_check +from pytorch_ner.utils import mkdir, rmdir, to_numpy def masking(lengths: torch.Tensor) -> torch.Tensor: From 3c55d36dc39aaaf988aa327deeacac45bb7ec27a Mon Sep 17 00:00:00 2001 From: Abdallah Ahmed Date: Thu, 10 Dec 2020 18:48:05 +0200 Subject: [PATCH 4/5] Change train function in test_train.py Since train() function was changed in previous commit to include checkpoint train function needed to be changed in test_train.py as well --- tests/test_train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_train.py b/tests/test_train.py index 4158166..3efab0e 100644 --- a/tests/test_train.py +++ b/tests/test_train.py @@ -75,6 +75,10 @@ optimizer=optimizer, device=device, n_epoch=5, + export_onnx=True, + path_to_folder="models/test_main/", + save_frequency=1, + save_best_weights=True, verbose=False, ) From 8c95794ef3b736abb247b58b0c6d166d3ed65135 Mon Sep 17 00:00:00 2001 From: Abdallah Ahmed Date: Thu, 10 Dec 2020 18:58:24 +0200 Subject: [PATCH 5/5] Edit in save function to solve FileNotFoundError This error was because I moved creating new folder to the train function to cope with the checkpoint addition. However, test_save works independantly thus I creating new directory (folder) is created in save function with a condition --- pytorch_ner/save.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pytorch_ner/save.py b/pytorch_ner/save.py index f6ead13..50a5b61 100644 --- a/pytorch_ner/save.py +++ b/pytorch_ner/save.py @@ -19,10 +19,9 @@ def save_model( export_onnx: bool = False, ): - # if os.path.exists(path_to_folder): - # # make empty dir - # rmdir(path_to_folder) - # mkdir(path_to_folder) + if not os.path.exists(path_to_folder): + # make empty dir + mkdir(path_to_folder) model.cpu() model.eval()