From 78dd4bd700878bc35bb6d8714acf8408824f81c2 Mon Sep 17 00:00:00 2001 From: Francesco Paissan <46992226+fpaissan@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:30:13 +0100 Subject: [PATCH 1/2] Release 0.2.0 (#60) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Extended unit tests to classifier and fixed pooling (#17) * Extended unit tests to classifier and fixed pooling * Changed trigger of doctest workflow * Fixing issue #18 * fixed linters * Add pre-commit hooks * Doctest only on PRs * Fixed network conversion from GPU Also tested on Windows machine. * Create python_versions.yml * Update and rename python_versions.yml to tests.yml * Update export.yaml * CI fix (#21) * Create pre-commit.yaml * remove code.yaml * fixing pre-commit * Doctest with pytest * change trigger * change trigger * Delete LICENSE * checkpoint from filesystem (#20) * checkpoint from filesystem * fixed deps * Update README.md * Update LICENSE * Updating LICENSE --------- Co-authored-by: fpaissan Co-authored-by: Francesco Paissan <46992226+fpaissan@users.noreply.github.com> * Create LICENSE (#22) * Update README.md (#23) * new min python version to 3.8 * 🐛 extra_requirements now have a version - fixed CI (#24) * 🐛 extra_requirements now have a version * fixed linter errors * testing actions * fixed linter * removing tf_probability * fixed tf prob version --------- Co-authored-by: fpaissan * Documentation upgrade - guide for contribution (#25) * add contribution guide to docs * documentation with contribution guide * cosmetic * bump version 0.0.4 -> 0.0.5 * Bump requests from 2.28.2 to 2.31.0 (#27) Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * fix pypi release * Update README.md (#29) * Patch for faster GPU inference (#35) * Patch for faster GPU inference * remove unused zeropad def --------- Co-authored-by: fpaissan * initial commit * add eval loop * add acceleration * modules as dict * add checkpointer * minor * load best checkpoint * restore epoch, optimizer, lr sched * fix logging on multi-gpu * minor fixes * working on single gpu * fix checkpointer + multi-gpu * fp16 might not be ok yet * load_modules and unwrap_model * fixed convert and export * cosmetic on export * add argparse * add metrics -- check something is off with acc * its print strange * fixed checkpointer viz * fix checkpointers and metrics * cosmetic * linters * add credits * fix requirements * fix unittest * remove recipes * remove unused files * remove unused fuctions from networks * fix tests * hot fix * onnx conversion without convert * fix requirements * add default class config and temp folder for debug mode * add doc for class Metric * finish doc MicroMind * update docs * linters fix * new initial page * bump version 0.0.5 -> 0.1.0 * final touches and bumpver * Create .readthedocs.yaml (#42) * Create .readthedocs.yaml * Create requirements.txt * Update .readthedocs.yaml * Update README.md (#43) * Update README.md * Update index.rst * Update README.md * Update index.rst * bump version 0.1.0 -> 0.1.1 * Update core.py - fix test set loading * fix dataloader prepare * Fix checkpointing issues (#48) * fix log hanging * fix checkpointer loading * fix linters * remove breakpoint * remove breakpoint (#49) * remove opt step from test * add metrics in test * Adding image classification recipe (#57) * YOLODataset * it's training, but not really * sth wrong with the loss * remove files after saving better checkpoint * same as last time * fix dict magic * fix dict magic * trying exp train * fix yolo training * yolo ok, but need to fix checkpoints * first refactor * metrics problem * add mAP metric * fix map metric * fix map * working map * refactor and batch/single_image management * metrics code refactor in torch * yolo inference optim * config file management * refactor * major refactor * Starting merge of fp_yolopatch_1 into refactor_yolo (#51) * fix load_state + fix pbar train_loss * remove mAP computation * minor * coco training of yolov8l * increase number of workers * remove breakpoint (#49) * added working mAP * added working mAP * phinet object det - not converging * fix Metric reduction with drop_last * fixed train/val split * little train optim * Fix and improvements to the Metric class (#53) * fix metrics and some polishing * minor bug fixes * cosmetic * fix inference script * bug fix on bbox rescaling * minor * starting training - franz * fix scale boxes * modules parametrization * minor refactor * Fixed Checkpointer logic and uncoupled it from the MicroMind (#54) * separate checkpointer * add status dict * add new checkpointer logic and hooks for save_state * minor to fix importing * polishing training code * ordered imports * moved loss in another file * add recover state to checkpointer * removed breakpoint * fix linters * fix loguru style * fix pytest * moved load_params * update reqs * fix linters * adding some docstrings * add docstrings to prepare_data * Added UserWarning * Delete micromind/co * fix debug option * cosmetic * add credits * fix linters * fix data location * fix data location * fix inference script * supports optional scheduler * minor * started YOLO training * switched to cos scheduler * Experiment configuration system (#55) * conf from python * fixed paths * handles configuration override * refactor + cosmetic * fix linters + remove old parsing * started training with timm * distributed working * works on cifar10 with mixup * now selecting bce_loss * linters * Add credis and some docstrings * cosmetic * fix config * add README, remove inference * fix command line script * removing obj det stuff * fix linters * Add inference script * linters * fix readme * add inference time * add inference time to viz * remove cifar10 * fix linters --------- Co-authored-by: Matteo Beltrami * Update inference.py * Official YOLO implementation for micromind (#56) * YOLODataset * it's training, but not really * sth wrong with the loss * remove files after saving better checkpoint * same as last time * fix dict magic * fix dict magic * trying exp train * fix yolo training * yolo ok, but need to fix checkpoints * first refactor * metrics problem * add mAP metric * fix map metric * fix map * working map * refactor and batch/single_image management * metrics code refactor in torch * yolo inference optim * config file management * refactor * major refactor * Starting merge of fp_yolopatch_1 into refactor_yolo (#51) * fix load_state + fix pbar train_loss * remove mAP computation * minor * coco training of yolov8l * increase number of workers * remove breakpoint (#49) * added working mAP * added working mAP * phinet object det - not converging * fix Metric reduction with drop_last * fixed train/val split * little train optim * Fix and improvements to the Metric class (#53) * fix metrics and some polishing * minor bug fixes * cosmetic * fix inference script * bug fix on bbox rescaling * minor * starting training - franz * fix scale boxes * modules parametrization * minor refactor * Fixed Checkpointer logic and uncoupled it from the MicroMind (#54) * separate checkpointer * add status dict * add new checkpointer logic and hooks for save_state * minor to fix importing * polishing training code * ordered imports * moved loss in another file * add recover state to checkpointer * removed breakpoint * fix linters * fix loguru style * fix pytest * moved load_params * update reqs * fix linters * adding some docstrings * add docstrings to prepare_data * Added UserWarning * Delete micromind/co * fix debug option * cosmetic * add credits * fix linters * fix data location * fix data location * fix inference script * supports optional scheduler * minor * started YOLO training * switched to cos scheduler * Experiment configuration system (#55) * conf from python * fixed paths * handles configuration override * refactor + cosmetic * fix linters + remove old parsing * update yaml config * passing iou_threshold to average precision function * initial version of xinet * needs fixing for exportability * add xinet train config * add xinet train config * add xinet in classification recipe * xinet training running * add xinet tests to CI * Update README.md * cosmetic * removed main for xinet's file * removed object detection file * fix linters * fix linters * fix credits * add coco training configuration files * add object detection README * Add object detection files to XiNet as well (#59) * fix linters * fix credits * add coco training configuration files * replace micromind.utils.yolo * update inference script * linters * add hparams to checkpointer * fix validation metrics * export in inference script * update changelog * add tinyCLAP folder * fix linters * better docs * fix docs, pls check * fix doc * fix linters --------- Co-authored-by: Matteo Beltrami * bump version 0.1.1 -> 0.2.0 --------- Signed-off-by: dependabot[bot] Co-authored-by: Matteo Beltrami <71525176+matteobeltrami@users.noreply.github.com> Co-authored-by: SebastianCavada Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matteo Tremonti <102596472+Tremo8@users.noreply.github.com> Co-authored-by: Matteo Beltrami --- CHANGELOG | 14 +- docs/source/micromind.networks.rst | 18 + docs/source/micromind.utils.rst | 6 +- examples/mind.py | 67 -- micromind/__init__.py | 17 +- micromind/convert.py | 78 +- micromind/core.py | 263 ++--- micromind/networks/__init__.py | 3 +- micromind/networks/phinet.py | 323 ++++-- micromind/networks/xinet.py | 383 ++++++++ micromind/networks/yolo.py | 520 ++++++++++ micromind/utils/__init__.py | 3 + micromind/utils/checkpointer.py | 280 ++++-- micromind/utils/helpers.py | 83 +- micromind/utils/parse.py | 28 - micromind/utils/yolo.py | 930 ++++++++++++++++++ pyproject.toml | 4 +- recipes/image_classification/README.md | 63 ++ recipes/image_classification/cfg/phinet.py | 93 ++ recipes/image_classification/cfg/xinet.py | 90 ++ .../extra_requirements.txt | 1 + recipes/image_classification/inference.py | 110 +++ recipes/image_classification/prepare_data.py | 169 ++++ recipes/image_classification/train.py | 207 ++++ recipes/object_detection/README.md | 58 ++ recipes/object_detection/cfg/data/coco.names | 80 ++ recipes/object_detection/cfg/data/coco.yaml | 151 +++ recipes/object_detection/cfg/data/coco8.yaml | 144 +++ recipes/object_detection/cfg/yolo_phinet.py | 26 + recipes/object_detection/inference.py | 108 ++ recipes/object_detection/prepare_data.py | 88 ++ recipes/object_detection/train.py | 234 +++++ recipes/object_detection/yolo_loss.py | 137 +++ recipes/tinyCLAP/README.md | 12 + tests/test_networks.py | 72 +- 35 files changed, 4440 insertions(+), 423 deletions(-) delete mode 100644 examples/mind.py create mode 100644 micromind/networks/xinet.py create mode 100644 micromind/networks/yolo.py delete mode 100644 micromind/utils/parse.py create mode 100644 micromind/utils/yolo.py create mode 100644 recipes/image_classification/README.md create mode 100644 recipes/image_classification/cfg/phinet.py create mode 100644 recipes/image_classification/cfg/xinet.py create mode 100644 recipes/image_classification/extra_requirements.txt create mode 100644 recipes/image_classification/inference.py create mode 100644 recipes/image_classification/prepare_data.py create mode 100644 recipes/image_classification/train.py create mode 100644 recipes/object_detection/README.md create mode 100644 recipes/object_detection/cfg/data/coco.names create mode 100644 recipes/object_detection/cfg/data/coco.yaml create mode 100644 recipes/object_detection/cfg/data/coco8.yaml create mode 100644 recipes/object_detection/cfg/yolo_phinet.py create mode 100644 recipes/object_detection/inference.py create mode 100644 recipes/object_detection/prepare_data.py create mode 100644 recipes/object_detection/train.py create mode 100644 recipes/object_detection/yolo_loss.py create mode 100644 recipes/tinyCLAP/README.md diff --git a/CHANGELOG b/CHANGELOG index ede4804c..adca266b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,9 @@ -# Release 0.0.5 +# Release 0.2.0 -1. Improved unit tests; -2. Extended CI to multiple python versions (including 3.8 compatibility); -3. Updated LICENSE to Apache 2.0; -4. Minor fixes to image classification recipes; -5. Added guide to contribution in the documentation; +1. Added metrics; +2. Improved checkpointing logic; +3. Added XiNet (https://shorturl.at/mtHT0); +4. dded image classification recipe; +5. Added object detection recipe; +6. Added object detection recipe; +7. New parse configuration that exploits python files for config; diff --git a/docs/source/micromind.networks.rst b/docs/source/micromind.networks.rst index 327a2b4c..84d42e1f 100644 --- a/docs/source/micromind.networks.rst +++ b/docs/source/micromind.networks.rst @@ -11,3 +11,21 @@ micromind.networks.phinet module :members: :undoc-members: :show-inheritance: + + +micromind.networks.xinet module +------------------------------- + +.. automodule:: micromind.networks.xinet + :members: + :undoc-members: + :show-inheritance: + + +micromind.networks.yolo module +------------------------------ + +.. automodule:: micromind.networks.yolo + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/micromind.utils.rst b/docs/source/micromind.utils.rst index fd2a7b7c..9ff9bceb 100644 --- a/docs/source/micromind.utils.rst +++ b/docs/source/micromind.utils.rst @@ -20,10 +20,10 @@ micromind.utils.helpers module :undoc-members: :show-inheritance: -micromind.utils.parse module ----------------------------- +micromind.utils.yolo module +--------------------------- -.. automodule:: micromind.utils.parse +.. automodule:: micromind.utils.yolo :members: :undoc-members: :show-inheritance: diff --git a/examples/mind.py b/examples/mind.py deleted file mode 100644 index 958d6127..00000000 --- a/examples/mind.py +++ /dev/null @@ -1,67 +0,0 @@ -from micromind import MicroMind, Metric -from micromind.networks import PhiNet -from micromind.utils.parse import parse_arguments - -import torch -import torch.nn as nn -import torchvision -import torchvision.transforms as transforms - -batch_size = 128 - - -class ImageClassification(MicroMind): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.modules["classifier"] = PhiNet( - (3, 32, 32), include_top=True, num_classes=10 - ) - - def forward(self, batch): - return self.modules["classifier"](batch[0]) - - def compute_loss(self, pred, batch): - return nn.CrossEntropyLoss()(pred, batch[1]) - - -if __name__ == "__main__": - hparams = parse_arguments() - m = ImageClassification(hparams) - - def compute_accuracy(pred, batch): - tmp = (pred.argmax(1) == batch[1]).float() - return tmp - - transform = transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] - ) - - trainset = torchvision.datasets.CIFAR10( - root="data/cifar-10", train=True, download=True, transform=transform - ) - trainloader = torch.utils.data.DataLoader( - trainset, batch_size=batch_size, shuffle=True, num_workers=1 - ) - - testset = torchvision.datasets.CIFAR10( - root="data/cifar-10", train=False, download=True, transform=transform - ) - testloader = torch.utils.data.DataLoader( - testset, batch_size=batch_size, shuffle=False, num_workers=1 - ) - - acc = Metric(name="accuracy", fn=compute_accuracy) - - m.train( - epochs=10, - datasets={"train": trainloader, "val": testloader, "test": testloader}, - metrics=[acc], - debug=hparams.debug, - ) - - m.test( - datasets={"test": testloader}, - ) - - m.export("output_onnx", "onnx", (3, 32, 32)) diff --git a/micromind/__init__.py b/micromind/__init__.py index efdf2a83..c709fb24 100644 --- a/micromind/__init__.py +++ b/micromind/__init__.py @@ -1,17 +1,4 @@ -from .core import MicroMind, Metric, Stage +from .core import Metric, MicroMind, Stage # Package version -__version__ = "0.1.1" - - -"""datasets_info is a dictionary that contains information about the attributes -of the datasets. -This dictionary is used in networks.py inside the from_pretrained class method -in order to examine the inputs and initialize the PhiNet or, in case of -mismatching between dataset and Nclasses, raise an AssertionError.""" -datasets_info = { - "CIFAR-100": {"Nclasses": 100, "NChannels": 3, "ext": ".pth.tar"}, - "CIFAR-10": {"Nclasses": 10, "NChannels": 3, "ext": ".pth.tar"}, - "ImageNet-1k": {"Nclasses": 1000, "NChannels": 3, "ext": ".pth.tar"}, - "MNIST": {"Nclasses": 10, "NChannels": 1, "ext": ".pth.tar"}, -} +__version__ = "0.2.0" diff --git a/micromind/convert.py b/micromind/convert.py index 25878fc0..0a2bafb4 100644 --- a/micromind/convert.py +++ b/micromind/convert.py @@ -6,23 +6,45 @@ - Francesco Paissan, 2023 - Alberto Ancilotto, 2023 """ +import os from pathlib import Path -from loguru import logger from typing import Union -import torch.nn as nn + import torch -import os +import torch.nn as nn + +from .utils.helpers import get_logger +import micromind as mm + +logger = get_logger() @torch.no_grad() def convert_to_onnx( - net: nn.Module, + net: Union[nn.Module, mm.MicroMind], save_path: Union[Path, str] = "model.onnx", simplify: bool = False, replace_forward: bool = False, ): """Converts nn.Module to onnx and saves it to save_path. - Optionally simplifies it.""" + Optionally simplifies it. This function is internally used from `mm.MicroMind`. + + Arguments + --------- + net : Union[nn.Module, mm.MicroMind] + PyTorch module to be exported. + save_path : Union[Path, str] + Output path for the ONNX model. + simplify : bool + `True` if you want to simplify the model. Defaults to False. + replace_forward : bool + Used if you want to replace the forward method. It is need if you are calling + this function on a `mm.MicroMind`. Defaults to False. + + Returns + ------- + The path of the ONNX model. : Path + """ save_path = Path(save_path) os.makedirs(save_path.parent, exist_ok=True) x = torch.zeros([1] + list(net.input_shape)) @@ -61,18 +83,35 @@ def convert_to_onnx( @torch.no_grad() def convert_to_openvino( - net: nn.Module, save_path: Path, replace_forward: bool = False + net: Union[nn.Module, mm.MicroMind], save_path: Path, replace_forward: bool = False ) -> str: - """Converts nn.Module to OpenVINO.""" + """Converts model to OpenVINO. Uses ONNX in the process and converts networks + from channel-first to channel-last (for optimized inference). + + Arguments + --------- + net : nn.Module + PyTorch module to be exported. + save_path : Union[Path, str] + Output path for the OpenVINO model. + replace_forward : bool + Used if you want to replace the forward method. It is need if you are calling + this function on a `mm.MicroMind`. Defaults to False. + + Returns + ------- + The path of the XML model. : str + + """ try: import os os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" import sys from pathlib import Path - from loguru import logger import onnx + from loguru import logger from onnx_tf.backend import prepare from openvino.tools.mo import main as mo_main @@ -121,12 +160,27 @@ def convert_to_openvino( @torch.no_grad() def convert_to_tflite( - net: nn.Module, - save_path: Path, + net: Union[nn.Module, mm.MicroMind], + save_path: Union[Path, str], batch_quant: torch.Tensor = None, replace_forward: bool = False, ) -> None: - """Converts nn.Module to tf_lite, optionally quantizes it.""" + """Converts nn.Module to tf_lite, optionally quantizes it. + + Arguments + --------- + net : nn.Module + PyTorch module to be exported. + save_path : Union[Path, str] + Output path for the OpenVINO model. + batch_quant : torch.Tensor + Optional batch for quantization. When passed, it is used to create the + statistics of the quantized activations. + replace_forward : bool + Used if you want to replace the forward method. It is need if you are calling + this function on a `mm.MicroMind`. Defaults to False. + + """ try: import os @@ -134,10 +188,10 @@ def convert_to_tflite( import shutil import sys from pathlib import Path - from loguru import logger import numpy as np import tensorflow as tf + from loguru import logger except Exception as e: print(str(e)) diff --git a/micromind/core.py b/micromind/core.py index 9c50a623..c87b5c7e 100644 --- a/micromind/core.py +++ b/micromind/core.py @@ -5,21 +5,20 @@ Authors: - Francesco Paissan, 2023 """ -from typing import Dict, Union, Tuple, Callable, List from abc import ABC, abstractmethod -from dataclasses import dataclass from argparse import Namespace +from dataclasses import dataclass from pathlib import Path -from loguru import logger -from tqdm import tqdm -import shutil +from typing import Callable, Dict, List, Optional, Tuple, Union -from accelerate import Accelerator import torch -import os +from accelerate import Accelerator +from tqdm import tqdm +import warnings + +from .utils.helpers import get_logger -from .utils.helpers import select_and_load_checkpoint, get_random_string -from .utils.checkpointer import Checkpointer +logger = get_logger() # This is used ONLY if you are not using argparse to get the hparams default_cfg = { @@ -82,27 +81,35 @@ class Metric: 0.5 """ - def __init__(self, name: str, fn: Callable, reduction="mean"): + def __init__( + self, + name: str, + fn: Callable, + reduction: Optional[str] = "mean", + eval_only: Optional[bool] = False, + eval_period: Optional[int] = 1, + ): self.name = name self.fn = fn self.reduction = reduction + self.eval_only = eval_only + self.eval_period = eval_period + self.history = {s: [] for s in [Stage.train, Stage.val, Stage.test]} def __call__(self, pred, batch, stage, device="cpu"): - if pred.device != device: - pred = pred.to(device) dat = self.fn(pred, batch) if dat.ndim == 0: dat = dat.unsqueeze(0) - self.history[stage].append(self.fn(pred, batch)) + self.history[stage].append(dat) def reduce(self, stage, clear=False): """ Compute and return the metric for a given prediction and batch data. Arguments - ------- + --------- pred : torch.Tensor The model's prediction. batch : torch.Tensor @@ -114,23 +121,13 @@ def reduce(self, stage, clear=False): """ if self.reduction == "mean": - if clear or ( - self.history[stage][-1].shape[0] != self.history[stage][0].shape[0] - ): - tmp = torch.stack(self.history[stage][:-1]).mean() - else: - tmp = torch.stack(self.history[stage]).mean() + tmp = torch.cat(self.history[stage], dim=0).mean() elif self.reduction == "sum": - if ( - clear - or self.history[stage][-1].shape[0] != self.history[stage][0].shape[0] - ): - tmp = torch.stack(self.history[stage][:-1]).sum() - else: - tmp = torch.stack(self.history[stage]).sum() + tmp = torch.cat(self.history[stage], dim=0).sum() if clear: self.history[stage] = [] + return tmp.item() @@ -158,8 +155,10 @@ def __init__(self, hparams=None): self.hparams = hparams self.input_shape = None - self.device = "cpu" # used just to init the models self.accelerator = Accelerator() + self.device = self.accelerator.device + + self.current_epoch = 0 @abstractmethod def forward(self, batch): @@ -205,8 +204,8 @@ def set_input_shape(self, input_shape: Tuple = (3, 224, 224)): Arguments --------- - input_shape : Tuple - Input shape of the forward step. + input_shape : Tuple + Input shape of the forward step. """ self.input_shape = input_shape @@ -216,8 +215,8 @@ def load_modules(self, checkpoint_path: Union[Path, str]): Arguments --------- - checkpoint_path : Union[Path, str] - Path to the checkpoint where the modules are stored. + checkpoint_path : Union[Path, str] + Path to the checkpoint where the modules are stored. """ dat = torch.load(checkpoint_path) @@ -229,8 +228,6 @@ def load_modules(self, checkpoint_path: Union[Path, str]): modules_keys.remove(k) if len(modules_keys) != 0: - print(modules_keys) - breakpoint() logger.info(f"Couldn't find a state_dict for modules {modules_keys}.") def export( @@ -276,11 +273,12 @@ def configure_optimizers(self): """Configures and defines the optimizer for the task. Defaults to adam with lr=0.001; It can be overwritten by either passing arguments from the command line, or by overwriting this entire method. + Scheduler step is called every optimization step. Returns - --------- - Optimizer and learning rate scheduler - (not implemented yet). : Tuple[torch.optim.Adam, None] + ------- + Optimizer and learning rate scheduler. + : Union[Tuple[torch.optim.Adam, None], torch.optim.Adam] """ assert self.hparams.opt in [ @@ -291,7 +289,8 @@ def configure_optimizers(self): opt = torch.optim.Adam(self.modules.parameters(), self.hparams.lr) elif self.hparams.opt == "sgd": opt = torch.optim.SGD(self.modules.parameters(), self.hparams.lr) - return opt, None # None is for learning rate sched + + return opt def __call__(self, *x, **xv): """Just forwards everything to the forward method.""" @@ -303,74 +302,79 @@ def on_train_start(self): This function gets executed at the beginning of every training. """ - self.experiment_folder = os.path.join( - self.hparams.output_folder, self.hparams.experiment_name - ) - if self.hparams.debug: - self.experiment_folder = "tmp_" + get_random_string() - logger.info(f"Created temporary folder for debug {self.experiment_folder}.") - - save_dir = os.path.join(self.experiment_folder, "save") - if os.path.exists(save_dir): - if len(os.listdir(save_dir)) != 0: - # select which checkpoint and load it. - checkpoint, path = select_and_load_checkpoint(save_dir) - self.opt = checkpoint["optimizer"] - self.lr_sched = checkpoint["lr_scheduler"] - self.start_epoch = checkpoint["epoch"] + 1 - - self.load_modules(path) - - if self.accelerator.is_local_main_process: - self.checkpointer = Checkpointer( - checkpoint["key"], - mode=checkpoint["mode"], - checkpoint_path=self.experiment_folder, - ) - logger.info(f"Loaded existing checkpoint from {path}.") - else: - self.opt, self.lr_sched = self.configure_optimizers() - self.start_epoch = 0 + # pass debug status to checkpointer + self.checkpointer.debug = self.hparams.debug - self.checkpointer = Checkpointer( - "val_loss", checkpoint_path=self.experiment_folder - ) + init_opt = self.configure_optimizers() + if isinstance(init_opt, list) or isinstance(init_opt, tuple): + self.opt, self.lr_sched = init_opt else: - os.makedirs(self.experiment_folder, exist_ok=True) + self.opt = init_opt - self.opt, self.lr_sched = self.configure_optimizers() - self.start_epoch = 0 + self.init_devices() - self.checkpointer = Checkpointer( - "val_loss", checkpoint_path=self.experiment_folder - ) + self.start_epoch = 0 + if self.checkpointer is not None: + # recover state + ckpt = self.checkpointer.recover_state() + if ckpt is not None: + accelerate_path, self.start_epoch = ckpt + self.accelerator.load_state(accelerate_path) + else: + tmp = """ + You are not passing a checkpointer to the training function, \ + thus no status will be saved. If this is not the intended behaviour \ + please check https://micromind-toolkit.github.io/docs/"). + """ + warnings.warn(" ".join(tmp.split())) - self.accelerator = Accelerator() - self.device = self.accelerator.device - self.modules.to(self.device) - print("Set device to ", self.device) + def init_devices(self): + """Initializes the data pipeline and modules for DDP and accelerated inference. + To control the device selection, use `accelerate config`.""" + + convert = [self.modules] + if hasattr(self, "opt"): + convert += [self.opt] + + if hasattr(self, "lr_sched"): + convert += [self.lr_sched] + + if hasattr(self, "datasets"): + # if the datasets are store here, prepare them for DDP + convert += list(self.datasets.values()) - convert = [self.modules, self.opt, self.lr_sched] + list(self.datasets.values()) accelerated = self.accelerator.prepare(convert) - self.modules, self.opt, self.lr_sched = accelerated[:3] - for i, key in enumerate(self.datasets): - self.datasets[key] = accelerated[-(i + 1)] + self.modules = accelerated[0] + self.accelerator.register_for_checkpointing(self.modules) + + if hasattr(self, "opt"): + self.opt = accelerated[1] + self.accelerator.register_for_checkpointing(self.opt) + + if hasattr(self, "lr_sched"): + self.lr_sched = accelerated[2] + self.accelerator.register_for_checkpointing(self.lr_sched) + + if hasattr(self, "datasets"): + for i, key in enumerate(list(self.datasets.keys())[::-1]): + self.datasets[key] = accelerated[-(i + 1)] + + self.modules.to(self.device) def on_train_end(self): """Runs at the end of each training. Cleans up before exiting.""" - if self.hparams.debug: - logger.info(f"Removed temporary folder {self.experiment_folder}.") - shutil.rmtree(self.experiment_folder) + pass - if self.accelerator.is_local_main_process: - self.checkpointer.close() + def eval(self): + self.modules.eval() def train( self, epochs: int = 1, datasets: Dict = {}, metrics: List[Metric] = [], + checkpointer=None, # fix type hints debug: bool = False, ) -> None: """ @@ -396,6 +400,7 @@ def train( """ self.datasets = datasets self.metrics = metrics + self.checkpointer = checkpointer assert "train" in self.datasets, "Training dataloader was not specified." assert epochs > 0, "You must specify at least one epoch." @@ -405,11 +410,12 @@ def train( if self.accelerator.is_local_main_process: logger.info( - f"Starting from epoch {self.start_epoch}." + f"Starting from epoch {self.start_epoch + 1}." + f" Training is scheduled for {epochs} epochs." ) with self.accelerator.autocast(): - for e in range(self.start_epoch, epochs): + for e in range(self.start_epoch + 1, epochs + 1): + self.current_epoch = e pbar = tqdm( self.datasets["train"], unit="batches", @@ -418,7 +424,7 @@ def train( disable=not self.accelerator.is_local_main_process, ) loss_epoch = 0 - pbar.set_description(f"Running epoch {e + 1}/{epochs}") + pbar.set_description(f"Running epoch {self.current_epoch}/{epochs}") self.modules.train() for idx, batch in enumerate(pbar): if isinstance(batch, list): @@ -428,20 +434,29 @@ def train( model_out = self(batch) loss = self.compute_loss(model_out, batch) + loss_epoch += loss.item() self.accelerator.backward(loss) self.opt.step() + if hasattr(self, "lr_sched"): + # ok for cos_lr + self.lr_sched.step() for m in self.metrics: - m(model_out, batch, Stage.train, self.device) + if ( + self.current_epoch + 1 + ) % m.eval_period == 0 and not m.eval_only: + m(model_out, batch, Stage.train, self.device) - running_train = { - "train_" + m.name: m.reduce(Stage.train) for m in self.metrics - } + running_train = {} + for m in self.metrics: + if ( + self.current_epoch + 1 + ) % m.eval_period == 0 and not m.eval_only: + running_train["train_" + m.name] = m.reduce(Stage.train) running_train.update({"train_loss": loss_epoch / (idx + 1)}) - loss_epoch += loss.item() pbar.set_postfix(**running_train) if self.debug and idx > 10: @@ -449,20 +464,25 @@ def train( pbar.close() - train_metrics = { - "train_" + m.name: m.reduce(Stage.train, True) for m in self.metrics - } + train_metrics = {} + for m in self.metrics: + if ( + self.current_epoch + 1 + ) % m.eval_period == 0 and not m.eval_only: + train_metrics["train_" + m.name] = m.reduce(Stage.train, True) + train_metrics.update({"train_loss": loss_epoch / (idx + 1)}) if "val" in datasets: val_metrics = self.validate() - if self.accelerator.is_local_main_process: + if ( + self.accelerator.is_local_main_process + and self.checkpointer is not None + ): self.checkpointer( self, - e, train_metrics, val_metrics, - lambda x: self.accelerator.unwrap_model(x), ) else: val_metrics = train_metrics.update( @@ -500,7 +520,8 @@ def validate(self) -> Dict: model_out = self(batch) loss = self.compute_loss(model_out, batch) for m in self.metrics: - m(model_out, batch, Stage.val, self.device) + if (self.current_epoch + 1) % m.eval_period == 0: + m(model_out, batch, Stage.val, self.device) loss_epoch += loss.item() pbar.set_postfix(loss=loss_epoch / (idx + 1)) @@ -508,7 +529,11 @@ def validate(self) -> Dict: if self.debug and idx > 10: break - val_metrics = {"val_" + m.name: m.reduce(Stage.val, True) for m in self.metrics} + val_metrics = {} + for m in self.metrics: + if (self.current_epoch + 1) % m.eval_period == 0: + val_metrics["val_" + m.name] = m.reduce(Stage.val, True) + val_metrics.update({"val_loss": loss_epoch / (idx + 1)}) pbar.close() @@ -516,13 +541,26 @@ def validate(self) -> Dict: return val_metrics @torch.no_grad() - def test(self, datasets: Dict = {}) -> None: - """Runs the test steps.""" - assert "test" in self.datasets, "Test dataloader was not specified." + def test(self, datasets: Dict = {}, metrics: List[Metric] = []) -> None: + """Runs the test steps. + + Arguments + --------- + datasets : Dict + Dictionary with the test DataLoader. Should be present in the key + `test`. + metrics : List[Metric] + List of metrics to compute during test step. + + Returns + ------- + Metrics computed on test set. : Dict[torch.Tensor] + """ + assert "test" in datasets, "Test dataloader was not specified." self.modules.eval() pbar = tqdm( - self.datasets["test"], + datasets["test"], unit="batches", ascii=True, dynamic_ncols=True, @@ -534,11 +572,10 @@ def test(self, datasets: Dict = {}) -> None: for idx, batch in enumerate(pbar): if isinstance(batch, list): batch = [b.to(self.device) for b in batch] - self.opt.zero_grad() model_out = self(batch) loss = self.compute_loss(model_out, batch) - for m in self.metrics: + for m in metrics: m(model_out, batch, Stage.test, self.device) loss_epoch += loss.item() @@ -546,9 +583,7 @@ def test(self, datasets: Dict = {}) -> None: pbar.close() - test_metrics = { - "test_" + m.name: m.reduce(Stage.test, True) for m in self.metrics - } + test_metrics = {"test_" + m.name: m.reduce(Stage.test, True) for m in metrics} test_metrics.update({"test_loss": loss_epoch / (idx + 1)}) s_out = ( "Testing " @@ -558,4 +593,4 @@ def test(self, datasets: Dict = {}) -> None: logger.info(s_out) - return None + return test_metrics diff --git a/micromind/networks/__init__.py b/micromind/networks/__init__.py index c4627bb7..625620c9 100644 --- a/micromind/networks/__init__.py +++ b/micromind/networks/__init__.py @@ -1 +1,2 @@ -from .phinet import PhiNet +from .phinet import PhiNet, PhiNetConvBlock +from .xinet import XiNet, XiConv diff --git a/micromind/networks/phinet.py b/micromind/networks/phinet.py index 4d84262f..d859bf96 100644 --- a/micromind/networks/phinet.py +++ b/micromind/networks/phinet.py @@ -10,10 +10,10 @@ from typing import List import torch +import torch.ao.nn.quantized as nnq import torch.nn as nn import torch.nn.functional as F from torchinfo import summary -import torch.ao.nn.quantized as nnq def _make_divisible(v, divisor=8, min_value=None): @@ -23,6 +23,19 @@ def _make_divisible(v, divisor=8, min_value=None): It ensures that all layers have a channel number that is divisible by divisor. + Arguments + --------- + v : int + The original number of channels. + divisor : int, optional + The divisor to ensure divisibility (default is 8). + min_value : int or None, optional + The minimum value for the divisible channels (default is None). + + Returns + ------- + int + The adjusted number of channels. """ if min_value is None: min_value = divisor @@ -34,7 +47,20 @@ def _make_divisible(v, divisor=8, min_value=None): def correct_pad(input_shape, kernel_size): - """Returns a tuple for zero-padding for 2D convolution with downsampling""" + """Returns a tuple for zero-padding for 2D convolution with downsampling. + + Arguments + --------- + input_shape : tuple or list + Shape of the input tensor (height, width). + kernel_size : int or tuple + Size of the convolution kernel. + + Returns + ------- + tuple + A tuple representing the zero-padding in the format (left, right, top, bottom). + """ if isinstance(kernel_size, int): kernel_size = (kernel_size, kernel_size) @@ -54,38 +80,91 @@ def correct_pad(input_shape, kernel_size): def preprocess_input(x, **kwargs): - """Normalise channels between [-1, 1]""" + """Normalize input channels between [-1, 1]. + + Arguments + --------- + x : torch.Tensor + Input tensor to be preprocessed. + + Returns + ------- + torch.Tensor + Normalized tensor with values between [-1, 1]. + """ return (x / 128.0) - 1 def get_xpansion_factor(t_zero, beta, block_id, num_blocks): - """Compute expansion factor based on the formula from the paper""" + """Compute the expansion factor based on the formula from the paper. + + Arguments + --------- + t_zero : float + The base expansion factor. + beta : float + The shape factor. + block_id : int + The identifier of the current block. + num_blocks : int + The total number of blocks. + + Returns + ------- + float + The computed expansion factor. + """ return (t_zero * beta) * block_id / num_blocks + t_zero * ( num_blocks - block_id ) / num_blocks class ReLUMax(torch.nn.Module): + """Implements ReLUMax. + + Arguments + --------- + max_value : float + The maximum value for the clamp operation. + + """ + def __init__(self, max): super(ReLUMax, self).__init__() self.max = max def forward(self, x): + """Forward pass of ReLUMax. + + Arguments + --------- + x : torch.Tensor + Input tensor. + + Returns + ------- + torch.Tensor + Output tensor after applying ReLU with max value. + """ return torch.clamp(x, min=0, max=self.max) class SEBlock(torch.nn.Module): - """Implements squeeze-and-excitation block""" + """Implements squeeze-and-excitation block. - def __init__(self, in_channels, out_channels, h_swish=True): - """Constructor of SEBlock + Arguments + --------- + in_channels : int + Input number of channels. + out_channels : int + Output number of channels. + h_swish : bool, optional + Whether to use the h_swish (default is True). - Args: - in_channels ([int]): [Input number of channels] - out_channels ([int]): [Output number of channels] - h_swish (bool, optional): [Whether to use the h_swish]. Defaults to True. - """ + """ + + def __init__(self, in_channels, out_channels, h_swish=True): super(SEBlock, self).__init__() self.se_conv = nn.Conv2d( @@ -110,13 +189,17 @@ def __init__(self, in_channels, out_channels, h_swish=True): self.mult = nnq.FloatFunctional() def forward(self, x): - """Executes SE Block + """Executes the squeeze-and-excitation block. - Args: - x ([Tensor]): [input tensor] + Arguments + --------- + x : torch.Tensor + Input tensor. - Returns: - [Tensor]: [output of squeeze-and-excitation block] + Returns + ------- + torch.Tensor + Output of the squeeze-and-excitation block. """ inp = x @@ -130,6 +213,29 @@ def forward(self, x): class DepthwiseConv2d(torch.nn.Conv2d): + """Depthwise 2D convolution layer. + + Arguments + --------- + in_channels : int + Number of input channels. + depth_multiplier : int, optional + The channel multiplier for the output channels (default is 1). + kernel_size : int or tuple, optional + Size of the convolution kernel (default is 3). + stride : int or tuple, optional + Stride of the convolution (default is 1). + padding : int or tuple, optional + Zero-padding added to both sides of the input (default is 0). + dilation : int or tuple, optional + Spacing between kernel elements (default is 1). + bias : bool, optional + If True, adds a learnable bias to the output (default is False). + padding_mode : str, optional + 'zeros' or 'circular'. Padding mode for convolution (default is 'zeros'). + + """ + def __init__( self, in_channels, @@ -156,7 +262,32 @@ def __init__( class SeparableConv2d(torch.nn.Module): - """Implements SeparableConv2d""" + """Implements SeparableConv2d. + + Arguments + --------- + in_channels : int + Input number of channels. + out_channels : int + Output number of channels. + activation : function, optional + Activation function to apply (default is torch.nn.functional.relu). + kernel_size : int, optional + Kernel size (default is 3). + stride : int, optional + Stride for convolution (default is 1). + padding : int, optional + Padding for convolution (default is 0). + dilation : int, optional + Dilation factor for convolution (default is 1). + bias : bool, optional + If True, adds a learnable bias to the output (default is True). + padding_mode : str, optional + Padding mode for convolution (default is 'zeros'). + depth_multiplier : int, optional + Depth multiplier (default is 1). + + """ def __init__( self, @@ -171,19 +302,6 @@ def __init__( padding_mode="zeros", depth_multiplier=1, ): - """Constructor of SeparableConv2d - - Args: - in_channels ([int]): [Input number of channels] - out_channels ([int]): [Output number of channels] - kernel_size (int, optional): [Kernel size]. Defaults to 3. - stride (int, optional): [Stride for conv]. Defaults to 1. - padding (int, optional): [Padding for conv]. Defaults to 0. - dilation (int, optional): []. Defaults to 1. - bias (bool, optional): []. Defaults to True. - padding_mode (str, optional): []. Defaults to 'zeros'. - depth_multiplier (int, optional): [Depth multiplier]. Defaults to 1. - """ super().__init__() self._layers = torch.nn.ModuleList() @@ -220,13 +338,17 @@ def __init__( self._layers.append(activation) def forward(self, x): - """Executes SeparableConv2d block + """Executes the SeparableConv2d block. - Args: - x ([Tensor]): [Input tensor] + Arguments + --------- + x : torch.Tensor + Input tensor. - Returns: - [Tensor]: [Output of convolution] + Returns + ------- + torch.Tensor + Output of the convolution. """ for layer in self._layers: x = layer(x) @@ -235,7 +357,30 @@ def forward(self, x): class PhiNetConvBlock(nn.Module): - """Implements PhiNet's convolutional block""" + """Implements PhiNet's convolutional block. + + Arguments + --------- + in_shape : tuple + Input shape of the conv block. + expansion : float + Expansion coefficient for this convolutional block. + stride: int + Stride for the conv block. + filters : int + Output channels of the convolutional block. + block_id : int + ID of the convolutional block. + has_se : bool + Whether to include use Squeeze and Excite or not. + res : bool + Whether to use the residual connection or not. + h_swish : bool + Whether to use HSwish or not. + k_size : int + Kernel size for the depthwise convolution. + + """ def __init__( self, @@ -251,30 +396,6 @@ def __init__( dp_rate=0.05, divisor=1, ): - """Defines the structure of a PhiNet convolutional block. - - Arguments - ------- - in_shape : tuple - Input shape of the conv block. - expansion : float - Expansion coefficient for this convolutional block. - stride: int - Stride for the conv block. - filters : int - Output channels of the convolutional block. - block_id : int - ID of the convolutional block. - has_se : bool - Whether to include use Squeeze and Excite or not. - res : bool - Whether to use the residual connection or not. - h_swish : bool - Whether to use HSwish or not. - k_size : int - Kernel size for the depthwise convolution. - - """ super(PhiNetConvBlock, self).__init__() self.param_count = 0 @@ -380,14 +501,17 @@ def __init__( self.op = nnq.FloatFunctional() def forward(self, x): - """Executes PhiNet convolutional block + """Executes the PhiNet convolutional block. - Arguments: - x : torch.Tensor - Input to the convolutional block. + Arguments + --------- + x : torch.Tensor + Input to the convolutional block. - Returns: - Ouput of the convolutional block : torch.Tensor + Returns + ------- + torch.Tensor + Output of the convolutional block. """ if self.skip_conn: @@ -403,6 +527,30 @@ def forward(self, x): class PhiNet(nn.Module): + """ + This class implements the PhiNet architecture. + + Arguments + --------- + input_shape : tuple + Input resolution as (C, H, W). + num_layers : int + Number of convolutional blocks. + alpha: float + Width multiplier for PhiNet architecture. + beta : float + Shape factor of PhiNet. + t_zero : float + Base expansion factor for PhiNet. + include_top : bool + Whether to include classification head or not. + num_classes : int + Number of classes for the classification head. + compatibility : bool + `True` to maximise compatibility among embedded platforms (changes network). + + """ + def get_complexity(self): """Returns MAC and number of parameters of initialized architecture. @@ -480,35 +628,15 @@ def __init__( h_swish: bool = True, # S1 squeeze_excite: bool = True, # S1 divisor: int = 1, + return_layers=None, ) -> None: - """This class implements the PhiNet architecture. - - Arguments - ------- - input_shape : tuple - Input resolution as (C, H, W). - num_layers : int - Number of convolutional blocks. - alpha: float - Width multiplier for PhiNet architecture. - beta : float - Shape factor of PhiNet. - t_zero : float - Base expansion factor for PhiNet. - include_top : bool - Whether to include classification head or not. - num_classes : int - Number of classes for the classification head. - compatibility : bool - `True` to maximise compatibility among embedded platforms (changes network). - - """ super(PhiNet, self).__init__() self.alpha = alpha self.beta = beta self.t_zero = t_zero self.num_layers = num_layers self.num_classes = num_classes + self.return_layers = return_layers if compatibility: # disables operations hard for some platforms h_swish = False @@ -686,22 +814,33 @@ def __init__( ), ) + if self.return_layers is not None: + print(f"PhiNet configured to return layers {self.return_layers}:") + for i in self.return_layers: + print(f"Layer {i} - {self._layers[i].__class__}") + def forward(self, x): """Executes PhiNet network Arguments ------- - x : torch.Tensor - Network input. + x : torch.Tensor + Network input. Returns ------ Logits if `include_top=True`, otherwise embeddings : torch.Tensor """ - for layers in self._layers: + ret = [] + for i, layers in enumerate(self._layers): x = layers(x) + if self.return_layers is not None: + if i in self.return_layers: + ret.append(x) if self.classify: x = self.classifier(x) + if self.return_layers is not None: + return x, ret return x diff --git a/micromind/networks/xinet.py b/micromind/networks/xinet.py new file mode 100644 index 00000000..949f6aa4 --- /dev/null +++ b/micromind/networks/xinet.py @@ -0,0 +1,383 @@ +""" +Code for XiNet (https://shorturl.at/mtHT0) + +Authors: + - Francesco Paissan, 2023 + - Alberto Ancilotto, 2023 +""" +import torch +import torch.nn as nn + +from typing import Union, Tuple, Optional, List + + +def autopad(k: int, p: Optional[int] = None): + """Implements padding to mimic "same" behaviour. + Arguments + --------- + k : int + Kernel size for the convolution. + p : Optional[int] + Padding value to be applied. + + """ + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class XiConv(nn.Module): + """Implements XiNet's convolutional block as presented in the original paper. + + Arguments + --------- + c_in: int + Number of input channels. + c_out: int + Number of output channels. + kernel_size: Union[int, Tuple] + Kernel size for the main convolution. + stride: Union[int, Tuple] + Stride for the main convolution. + padding: Optional[Union[int, Tuple]] + Padding that is applied in the main convolution. + groups: Optional[int] + Number of groups for the main convolution. + act: Optional[bool] + When True, uses SiLU activation function after + the main convolution. + gamma: Optional[float] + Compression factor for the convolutional block. + attention: Optional[bool] + When True, uses attention. + skip_tensor_in: Optional[bool] + When True, defines broadcasting skip connection block. + skip_res : Optional[List] + Spatial resolution of the skip connection, such that + average pooling is statically defined. + skip_channels: Optional[int] + Number of channels for the input block. + pool: Optional[bool] + When True, applies pooling after the main convolution. + attention_k: Optional[int] + Kernel for the attention module. + attention_lite: Optional[bool] + When True, uses efficient attention implementation. + batchnorm: Optional[bool] + When True, uses batch normalization inside the ConvBlock. + dropout_rate: Optional[int] + Dropout probability. + skip_k: Optional[int] + Kernel for the broadcast skip connection. + """ + + def __init__( + self, + c_in: int, + c_out: int, + kernel_size: Union[int, Tuple] = 3, + stride: Union[int, Tuple] = 1, + padding: Optional[Union[int, Tuple]] = None, + groups: Optional[int] = 1, + act: Optional[bool] = True, + gamma: Optional[float] = 4, + attention: Optional[bool] = True, + skip_tensor_in: Optional[bool] = True, + skip_res: Optional[List] = None, + skip_channels: Optional[int] = 1, + pool: Optional[bool] = None, + attention_k: Optional[int] = 3, + attention_lite: Optional[bool] = True, + batchnorm: Optional[bool] = True, + dropout_rate: Optional[int] = 0, + skip_k: Optional[int] = 1, + ): + super().__init__() + self.compression = int(gamma) + self.attention = attention + self.attention_lite = attention_lite + self.attention_lite_ch_in = c_out // self.compression // 2 + self.pool = pool + self.batchnorm = batchnorm + self.dropout_rate = dropout_rate + + if skip_tensor_in: + assert skip_res is not None, "Specifcy shape of skip tensor." + self.adaptive_pooling = nn.AdaptiveAvgPool2d( + (int(skip_res[0]), int(skip_res[1])) + ) + + if self.compression > 1: + self.compression_conv = nn.Conv2d( + c_in, c_out // self.compression, 1, 1, groups=groups, bias=False + ) + self.main_conv = nn.Conv2d( + c_out // self.compression if self.compression > 1 else c_in, + c_out, + kernel_size, + stride, + groups=groups, + padding=autopad(kernel_size, padding), + bias=False, + ) + self.act = ( + nn.SiLU() + if act is True + else (act if isinstance(act, nn.Module) else nn.Identity()) + ) + + if attention: + if attention_lite: + self.att_pw_conv = nn.Conv2d( + c_out, self.attention_lite_ch_in, 1, 1, groups=groups, bias=False + ) + self.att_conv = nn.Conv2d( + c_out if not attention_lite else self.attention_lite_ch_in, + c_out, + attention_k, + 1, + groups=groups, + padding=autopad(attention_k, None), + bias=False, + ) + self.att_act = nn.Sigmoid() + + if pool: + self.mp = nn.MaxPool2d(pool) + if skip_tensor_in: + self.skip_conv = nn.Conv2d( + skip_channels, + c_out // self.compression, + skip_k, + 1, + groups=groups, + padding=autopad(skip_k, None), + bias=False, + ) + if batchnorm: + self.bn = nn.BatchNorm2d(c_out) + if dropout_rate > 0: + self.do = nn.Dropout(dropout_rate) + + def forward(self, x: torch.Tensor): + """Computes the forward step of the XiNet's convolutional block. + Arguments + --------- + x : torch.Tensor + Input tensor. + + Returns + ------- + ConvBlock output. : torch.Tensor + """ + s = None + # skip connection + if isinstance(x, list): + # s = F.adaptive_avg_pool2d(x[1], output_size=x[0].shape[2:]) + s = self.adaptive_pooling(x[1]) + s = self.skip_conv(s) + x = x[0] + + # compression convolution + if self.compression > 1: + x = self.compression_conv(x) + + if s is not None: + x = x + s + + if self.pool: + x = self.mp(x) + + # main conv and activation + x = self.main_conv(x) + if self.batchnorm: + x = self.bn(x) + x = self.act(x) + + # attention conv + if self.attention: + if self.attention_lite: + att_in = self.att_pw_conv(x) + else: + att_in = x + y = self.att_act(self.att_conv(att_in)) + x = x * y + + if self.dropout_rate > 0: + x = self.do(x) + + return x + + +class XiNet(nn.Module): + """Defines a XiNet. + + Arguments + --------- + input_shape : List + Shape of the input tensor. + alpha: float + Width multiplier. + gamma : float + Compression factor. + num_layers : int = 5 + Number of convolutional blocks. + num_classes : int + Number of classes. It is used only when include_top is True. + include_top : Optional[bool] + When True, defines an MLP for classification. + base_filters : int + Number of base filters for the ConvBlock. + return_layers : Optional[List] + Ids of the layers to be returned after processing the foward + step. + + Example + ------- + .. doctest:: + + >>> from micromind.networks import XiNet + >>> model = XiNet((3, 224, 224)) + """ + + def __init__( + self, + input_shape: List, + alpha: float = 1.0, + gamma: float = 4.0, + num_layers: int = 5, + num_classes=1000, + include_top=False, + base_filters: int = 16, + return_layers: Optional[List] = None, + ): + super().__init__() + + self._layers = nn.ModuleList([]) + self.input_shape = torch.Tensor(input_shape) + self.include_top = include_top + self.return_layers = return_layers + count_downsample = 0 + + self.conv1 = nn.Sequential( + nn.Conv2d( + input_shape[0], + int(base_filters * alpha), + 7, + padding=7 // 2, + stride=2, + bias=False, + ), + nn.BatchNorm2d(int(base_filters * alpha)), + nn.SiLU(), + ) + count_downsample += 1 + + num_filters = [ + int(2 ** (base_filters**0.5 + i)) for i in range(0, num_layers) + ] + skip_channels_num = int(base_filters * 2 * alpha) + + for i in range( + len(num_filters) - 2 + ): # Account for the last two layers separately + self._layers.append( + XiConv( + int(num_filters[i] * alpha), + int(num_filters[i + 1] * alpha), + kernel_size=3, + stride=1, + pool=2, + skip_tensor_in=(i != 0), + skip_res=self.input_shape[1:] / (2**count_downsample), + skip_channels=skip_channels_num, + gamma=gamma, + ) + ) + count_downsample += 1 + self._layers.append( + XiConv( + int(num_filters[i + 1] * alpha), + int(num_filters[i + 1] * alpha), + kernel_size=3, + stride=1, + skip_tensor_in=True, + skip_res=self.input_shape[1:] / (2**count_downsample), + skip_channels=skip_channels_num, + gamma=gamma, + ) + ) + + # Adding the last two layers with attention=False + self._layers.append( + XiConv( + int(num_filters[-2] * alpha), + int(num_filters[-1] * alpha), + kernel_size=3, + stride=1, + skip_tensor_in=True, + skip_res=self.input_shape[1:] / (2**count_downsample), + skip_channels=skip_channels_num, + attention=False, + ) + ) + # count_downsample += 1 + self._layers.append( + XiConv( + int(num_filters[-1] * alpha), + int(num_filters[-1] * alpha), + kernel_size=3, + stride=1, + skip_tensor_in=True, + skip_res=self.input_shape[1:] / (2**count_downsample), + skip_channels=skip_channels_num, + attention=False, + ) + ) + + if self.return_layers is not None: + print(f"XiNet configured to return layers {self.return_layers}:") + for i in self.return_layers: + print(f"Layer {i} - {self._layers[i].__class__}") + + self.input_shape = input_shape + if self.include_top: + self.classifier = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + nn.Flatten(), + nn.Linear(int(num_filters[-1] * alpha), num_classes), + ) + + def forward(self, x): + """Computes the forward step of the XiNet. + Arguments + --------- + x : torch.Tensor + Input tensor. + + Returns + ------- + Output of the network, as defined from + the init. : Union[torch.Tensor, Tuple] + """ + x = self.conv1(x) + skip = None + ret = [] + for layer_id, layer in enumerate(self._layers): + if layer_id == 0: + x = layer(x) + skip = x + else: + x = layer([x, skip]) + + if self.return_layers is not None: + if layer_id in self.return_layers: + ret.append(x) + + if self.include_top: + x = self.classifier(x) + + if self.return_layers is not None: + return x, ret + + return x diff --git a/micromind/networks/yolo.py b/micromind/networks/yolo.py new file mode 100644 index 00000000..81cedb87 --- /dev/null +++ b/micromind/networks/yolo.py @@ -0,0 +1,520 @@ +""" +YOLOv8 building blocks. + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 + +This file contains the definition of the building blocks of the yolov8 network. +Model architecture has been taken from +https://github.com/ultralytics/ultralytics/issues/189 +""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from micromind.utils.yolo import autopad, dist2bbox, make_anchors + + +class Upsample: + def __init__(self, scale_factor, mode="nearest"): + assert mode == "nearest" + self.scale_factor = scale_factor + self.mode = mode + + def __call__(self, x: torch.Tensor): + assert ( + len(x.shape) > 2 and len(x.shape) <= 5 + ), "Input tensor must have 3 to 5 dimensions" + upsampled = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) + return upsampled + + +class Conv(nn.Module): + """Implements YOLOv8's convolutional block + + Arguments + --------- + c1 : int + Input channels of the convolutional block. + c2 : int + Output channels of the convolutional block. + kernel_size : int + Kernel size for the convolutional block. + stride : int + Stride for the convolutional block. + padding : int + Padding for the convolutional block. + dilation : int + Dilation for the convolutional block. + groups : int + Groups for the convolutional block. + """ + + def __init__( + self, c1, c2, kernel_size=1, stride=1, padding=None, dilation=1, groups=1 + ): + super().__init__() + self.conv = nn.Conv2d( + c1, + c2, + kernel_size=kernel_size, + stride=stride, + padding=autopad(kernel_size, padding, dilation), + dilation=dilation, + groups=groups, + bias=False, + ) + self.bn = nn.BatchNorm2d(c2, eps=0.001, momentum=0.03) + self.silu = nn.SiLU() + + def forward(self, x): + """Executes YOLOv8 convolutional block. + + Arguments + --------- + x : torch.Tensor + Input to the convolutional block. + + Returns + ------- + Ouput of the convolutional block : torch.Tensor + """ + x = self.conv(x) + x = self.bn(x) + x = self.silu(x) + return x + + +class Bottleneck(nn.Module): + """Implements YOLOv8's bottleneck block. + + Arguments + --------- + c1 : int + Input channels of the bottleneck block. + c2 : int + Output channels of the bottleneck block. + shortcut : bool + Decides whether to perform a shortcut in the bottleneck block. + groups : int + Groups for the bottleneck block. + kernels : list + Kernel size for the bottleneck block. + channel_factor : float + Decides the number of channels of the intermediate result + between the two convolutional blocks. + """ + + def __init__( + self, + c1, + c2, + shortcut: bool, + groups=1, + kernels: list = (3, 3), + channel_factor=0.5, + ): + super().__init__() + c_ = int(c2 * channel_factor) + self.cv1 = Conv(c1, c_, kernel_size=kernels[0], stride=1, padding=None) + self.cv2 = Conv( + c_, c2, kernel_size=kernels[1], stride=1, padding=None, groups=groups + ) + self.residual = c1 == c2 and shortcut + + def forward(self, x): + """Executes YOLOv8 bottleneck block. + + Arguments + --------- + x : torch.Tensor + Input to the bottleneck block. + + Returns + ------- + Ouput of the bottleneck block : torch.Tensor + """ + if self.residual: + return x + self.cv2(self.cv1(x)) + else: + return self.cv2(self.cv1(x)) + + +class C2f(nn.Module): + """Implements YOLOv8's C2f block. + + Arguments + --------- + c1 : int + Input channels of the C2f block. + c2 : int + Output channels of the C2f block. + n : int + Number of bottleck blocks executed in the C2f block. + shortcut : bool + Decides whether to perform a shortcut in the bottleneck blocks. + groups : int + Groups for the C2f block. + e : float + Factor for cancatenating intermeidate results. + """ + + def __init__(self, c1, c2, n=1, shortcut=False, groups=1, e=0.5): + super().__init__() + self.c = int(c2 * e) + self.cv1 = Conv( + c1, + 2 * self.c, + 1, + ) + self.cv2 = Conv((2 + n) * self.c, c2, 1) + self.bottleneck = nn.ModuleList( + [ + Bottleneck( + self.c, + self.c, + shortcut, + groups, + kernels=[(3, 3), (3, 3)], + channel_factor=1.0, + ) + for _ in range(n) + ] + ) + + def forward(self, x): + """Executes YOLOv8 C2f block. + + Arguments + --------- + x : torch.Tensor + Input to the C2f block. + + Returns + ------- + Ouput of the C2f block : torch.Tensor + """ + x = self.cv1(x) + y = list(torch.chunk(x, chunks=2, dim=1)) + y.extend(m(y[-1]) for m in self.bottleneck) + z = y[0] + for i in y[1:]: + z = torch.cat((z, i), dim=1) + return self.cv2(z) + + +class SPPF(nn.Module): + """Implements YOLOv8's SPPF block. + + Arguments + --------- + c1 : int + Input channels of the SPPF block. + c2 : int + Output channels of the SPPF block. + k : int + Kernel size for the SPPF block Maxpooling operations + """ + + def __init__(self, c1, c2, k=5): + super().__init__() + c_ = c1 // 2 + self.cv1 = Conv(c1, c_, 1, 1, padding=None) + self.cv2 = Conv(c_ * 4, c2, 1, 1, padding=None) + self.maxpool = nn.MaxPool2d( + kernel_size=k, stride=1, padding=2, dilation=1, ceil_mode=False + ) + + def forward(self, x): + """Executes YOLOv8 SPPF block. + + Arguments + --------- + x : torch.Tensor + Input to the SPPF block. + + Returns + ------- + Ouput of the SPPF block : torch.Tensor + """ + x = self.cv1(x) + x2 = self.maxpool(x) + x3 = self.maxpool(x2) + x4 = self.maxpool(x3) + + y = torch.cat((x, x2, x3, x4), dim=1) + return self.cv2(y) + + +class DFL(nn.Module): + """Implements YOLOv8's DFL block. + + Arguments + --------- + c1 : int + Input channels of the DFL block. + """ + + def __init__(self, c1=16): + super().__init__() + self.conv = nn.Conv2d(c1, 1, kernel_size=1, bias=False) + weight = torch.arange(c1).reshape(1, c1, 1, 1).float() + self.conv.weight.requires_grad = False + self.conv.weight.copy_(weight) + self.c1 = c1 + + @torch.no_grad() # TODO: check when training + def forward(self, x): + """Executes YOLOv8 DFL block. + + Arguments + --------- + x : torch.Tensor + Input to the DFL block. + + Returns + ------- + Ouput of the DFL block : torch.Tensor + """ + b, _, a = x.shape + y = x.reshape(b, 4, self.c1, a).transpose(2, 1) + y = F.softmax(y, dim=1) + y = self.conv(y) + y = y.reshape(b, 4, a) + return y + + +class Darknet(nn.Module): + """Implements YOLOv8's convolutional backbone. + + Arguments + --------- + w : float + Width multiple of the Darknet. + r : float + Ratio multiple of the Darknet. + d : float + Depth multiple of the Darknet. + """ + + def __init__(self, w, r, d): + super().__init__() + self.b1 = nn.Sequential( + Conv(c1=3, c2=int(64 * w), kernel_size=3, stride=2, padding=1), + Conv(int(64 * w), int(128 * w), kernel_size=3, stride=2, padding=1), + ) + self.b2 = nn.Sequential( + C2f(c1=int(128 * w), c2=int(128 * w), n=round(3 * d), shortcut=True), + Conv(int(128 * w), int(256 * w), 3, 2, 1), + C2f(int(256 * w), int(256 * w), round(6 * d), True), + ) + self.b3 = nn.Sequential( + Conv(int(256 * w), int(512 * w), kernel_size=3, stride=2, padding=1), + C2f(int(512 * w), int(512 * w), round(6 * d), True), + ) + self.b4 = nn.Sequential( + Conv(int(512 * w), int(512 * w * r), kernel_size=3, stride=2, padding=1), + C2f(int(512 * w * r), int(512 * w * r), round(3 * d), True), + ) + + self.b5 = SPPF(int(512 * w * r), int(512 * w * r), 5) + + def forward(self, x): + """Executes YOLOv8 convolutional backbone. + + Arguments + --------- + x : torch.Tensor + Input to the Darknet. + + Returns + ------- + Three intermediate representations with different resolutions : tuple + """ + x1 = self.b1(x) + x2 = self.b2(x1) + x3 = self.b3(x2) + x4 = self.b4(x3) + x5 = self.b5(x4) + return (x2, x3, x5) + + +class Yolov8Neck(nn.Module): + """Implements YOLOv8's neck. + + Arguments + --------- + w : float + Width multiple of the Darknet. + r : float + Ratio multiple of the Darknet. + d : float + Depth multiple of the Darknet. + """ + + def __init__(self, filters=[256, 512, 768], up=[2, 2], d=1): + super().__init__() + self.up1 = Upsample(up[0], mode="nearest") + self.up2 = Upsample(up[1], mode="nearest") + self.n1 = C2f( + c1=int(filters[1] + filters[2]), + c2=int(filters[1]), + n=round(3 * d), + shortcut=False, + ) + self.n2 = C2f( + c1=int(filters[0] + filters[1]), + c2=int(filters[0]), + n=round(3 * d), + shortcut=False, + ) + self.n3 = Conv( + c1=int(filters[0]), c2=int(filters[0]), kernel_size=3, stride=2, padding=1 + ) + self.n4 = C2f( + c1=int(filters[0] + filters[1]), + c2=int(filters[1]), + n=round(3 * d), + shortcut=False, + ) + self.n5 = Conv( + c1=int(filters[1]), c2=int(filters[1]), kernel_size=3, stride=2, padding=1 + ) + self.n6 = C2f( + c1=int(filters[1] + filters[2]), + c2=int(filters[2]), + n=round(3 * d), + shortcut=False, + ) + + def forward(self, p3, p4, p5): + """Executes YOLOv8 neck. + + Arguments + --------- + x : tuple + Input to the neck. + + Returns + ------- + Three intermediate representations with different resolutions : list + """ + x = self.up1(p5) + x = torch.cat((x, p4), dim=1) + x = self.n1(x) + h1 = self.up2(x) + h1 = torch.cat((h1, p3), dim=1) + head_1 = self.n2(h1) + h2 = self.n3(head_1) + h2 = torch.cat((h2, x), dim=1) + head_2 = self.n4(h2) + h3 = self.n5(head_2) + h3 = torch.cat((h3, p5), dim=1) + head_3 = self.n6(h3) + return [head_1, head_2, head_3] + + +class DetectionHead(nn.Module): + """Implements YOLOv8's detection head. + + Arguments + --------- + nc : int + Number of classes to predict. + filters : tuple + Number of channels of the three inputs of the detection head. + """ + + def __init__(self, nc=80, filters=()): + super().__init__() + self.reg_max = 16 + self.nc = nc + self.nl = len(filters) + self.no = nc + self.reg_max * 4 + self.stride = torch.tensor([8.0, 16.0, 32.0], dtype=torch.float16) + c2, c3 = max((16, filters[0] // 4, self.reg_max * 4)), max( + filters[0], min(self.nc, 104) + ) # channels + self.cv2 = nn.ModuleList( + nn.Sequential( + Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1) + ) + for x in filters + ) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) + for x in filters + ) + self.dfl = DFL(self.reg_max) + + def forward(self, x): + """Executes YOLOv8 detection head. + + Arguments + --------- + x : list + Input to the detection head. + + Returns + ------- + Output of the detection head : torch.Tensor + """ + for i in range(self.nl): + a = self.cv2[i](x[i]) + b = self.cv3[i](x[i]) + x[i] = torch.cat((a, b), dim=1) + self.anchors, self.strides = ( + xl.transpose(0, 1) for xl in make_anchors(x, self.stride, 0.5) + ) + + y = [(i.reshape(x[0].shape[0], self.no, -1)) for i in x] + x_cat = torch.cat((y[0], y[1], y[2]), dim=2) + box, cls = x_cat[:, : self.reg_max * 4], x_cat[:, self.reg_max * 4 :] + dbox = ( + dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) + * self.strides + ) + z = torch.cat((dbox, nn.Sigmoid()(cls)), dim=1) + return z, x + + +class YOLOv8(nn.Module): + """Implements YOLOv8 network. + + Arguments + --------- + w : float + Width multiple of the Darknet. + r : float + Ratio multiple of the Darknet. + d : float + Depth multiple of the Darknet. + num_classes : int + Number of classes to predict. + """ + + def __init__(self, w, r, d, num_classes=80): + super().__init__() + self.net = Darknet(w, r, d) + self.fpn = Yolov8Neck(w, r, d) + self.head = DetectionHead( + num_classes, filters=(int(256 * w), int(512 * w), int(512 * w * r)) + ) + + def forward(self, x): + """Executes YOLOv8 network. + + Arguments + --------- + x : torch.Tensor + Input to the YOLOv8 network. + + Returns + ------- + Output of the YOLOv8 network : torch.Tensor + """ + x = self.net(x) + x = self.fpn(*x) + x = self.head(x) + return x diff --git a/micromind/utils/__init__.py b/micromind/utils/__init__.py index e69de29b..cb961836 100644 --- a/micromind/utils/__init__.py +++ b/micromind/utils/__init__.py @@ -0,0 +1,3 @@ +from . import yolo +from . import checkpointer +from .helpers import parse_configuration diff --git a/micromind/utils/checkpointer.py b/micromind/utils/checkpointer.py index 09a867fd..35bd96a2 100644 --- a/micromind/utils/checkpointer.py +++ b/micromind/utils/checkpointer.py @@ -5,95 +5,265 @@ Authors: - Francesco Paissan, 2023 """ -from typing import Union, Dict, Callable -from loguru import logger -from pathlib import Path import os +import shutil +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional, Union +from argparse import Namespace +import warnings import torch +import yaml + +from .helpers import get_logger + +logger = get_logger() + + +def create_experiment_folder( + output_folder: Union[Path, str], exp_name: Union[Path, str] +) -> Path: + """Creates the experiment folder used to log data. + + Arguments + --------- + output_folder : Union[Path, str] + General output folder (can be shared between more experiments). + exp_name : Union[Path, str] + Name of the experiment, to be concatenated to the output_folder. + + Returns + ------- + Experiment folder : Union[Path, str] + """ + exp_folder = os.path.join(output_folder, exp_name) + + os.makedirs(exp_folder, exist_ok=True) + os.makedirs(os.path.join(exp_folder, "save"), exist_ok=True) + + return exp_folder class Checkpointer: + """Checkpointer class. Supports min/max modes for arbitrary keys (Metrics or loss). + Always saves best and last in the experiment folder. + + Arguments + --------- + + experiment_folder : Union[str, Path] + Experiment folder. Used to load / store checkpoints. + key: Optional[str] + Key to be logged. It should be the name of the Metric, or "loss". + Defaults to "loss". + mode: Optional[str] + Either `min` or `max`. If min, will store the checkpoint with the lowest + value for key. If max, it does the opposite. + + Example + ------- + .. doctest:: + + >>> from micromind.utils.checkpointer import Checkpointer + >>> from micromind.utils.checkpointer import create_experiment_folder + >>> exp_folder = create_experiment_folder("/tmp", "test_mm") + >>> check = Checkpointer(exp_folder) + """ + def __init__( self, - key: str, - mode: str = "min", - top_k: int = 5, - checkpoint_path: Union[str, Path] = ".", + experiment_folder: Union[str, Path], + key: Optional[str] = "loss", + mode: Optional[str] = "min", + hparams: Optional[Namespace] = None, ) -> None: + assert experiment_folder != "", "You should pass a valid experiment folder." + assert os.path.exists( + os.path.join(experiment_folder, "save") + ), "Invalid experiment folder." assert mode in ["max", "min"], "Checkpointer mode can be only max or min." - self.key = key + self.key = "val_" + key self.mode = mode - self.top_k = 5 - self.bests = [torch.inf] * self.top_k - self.check_paths = [""] * self.top_k - self.root_dir = checkpoint_path + self.bests = torch.inf if mode == "min" else -torch.inf + self.check_paths = "" + self.root_dir = experiment_folder self.save_dir = os.path.join(self.root_dir, "save") - os.makedirs(self.save_dir, exist_ok=True) - self.fstream = open(os.path.join(self.root_dir, "train_log.txt"), "a") + self.last_dir = "default" + + # dump hparams to yaml when passed + if hparams is not None and os.path.exists(self.root_dir): + with open(os.path.join(self.root_dir, "args.yaml"), "w") as args_f: + args_f.write(yaml.safe_dump(vars(hparams))) + else: + warnings.warn( + "You did not specify the configuration to the checkpointer, \ + so it won't be saved. You can pass one using the hparams \ + argument. Ignore this if you are in debug mode." + ) + + # if true, does not write on disk + self.debug = False + + def recover_state(self): + """Recovers last corrected state of the training. If found, returns + the accelerate dump folder (for recovery) and the last epoch logged. + + Returns + ------- + Checkpoint path and last epoch logged. : Tuple[str, int] + """ + available_ckpts = list(Path(self.save_dir).iterdir()) + if len(available_ckpts) < 1: + return + dates = [ + datetime.strptime(str(ckpt.name), "%Y-%m-%d+%H-%M-%S") + for ckpt in available_ckpts + ] + dates = sorted(dates, reverse=True) + + for date in dates: + oldest_name = os.path.join( + self.save_dir, date.strftime("%Y-%m-%d+%H-%M-%S") + ) + try: + print(os.path.join(oldest_name, "status.yaml")) + with open(os.path.join(oldest_name, "status.yaml"), "r") as f: + dat = yaml.safe_load(f) + + epoch = dat["epoch"] + self.bests = dat["metric"] + self.key = dat["metric_key"] + + accelerate_path = os.path.join(oldest_name, "accelerate_dump") + logger.info( + f"Recovered info from checkpoint {oldest_name} at epoch {epoch}." + ) + logger.info(f"{self.key} was {self.bests:.4f} for this checkpoint.") + + return accelerate_path, epoch + except Exception as e: + logger.info( + " ".join( + f"Tried to recover checkpoint {oldest_name}, \ + but it appears corrupted.".split() + ) + ) + logger.debug(str(e)) + return + + @staticmethod + def dump_modules(modules, out_folder): + """Dumps state dict for all elements in the modules.""" + base_save = {k: v.state_dict() for k, v in modules.items()} + + torch.save(base_save, os.path.join(out_folder, "state-dict.pth.tar")) + + @staticmethod + def dump_status(status, out_dir): + """Dumps the status of the training.""" + yaml_status = yaml.dump(status) + + with open(os.path.join(out_dir, "status.yaml"), "w") as f: + f.write(yaml_status) def __call__( self, mind, - epoch: int, train_metrics: Dict, metrics: Dict, - unwrap: Callable = lambda x: x, ) -> Union[Path, str]: + """Does one checkpointing step. + Arguments + --------- + mind : mm.Micromind + Mind to be saved, eventually. + train_metrics : Dict + Training metrics, used only for the `train_log.txt` and the `stdout`. + metrics : Dict + Validation metrics, used to check if the checkpoint improved. + + Returns + ------- + Current best checkpoint : Union[str, Path] + """ + current_folder = datetime.now().strftime("%Y-%m-%d+%H-%M-%S") + current_folder = os.path.join(self.save_dir, current_folder) + os.makedirs(current_folder, exist_ok=True) + + status_dict = { + "epoch": mind.current_epoch, + "metric": metrics[self.key], + "metric_key": self.key, + } + + self.fstream = open(os.path.join(self.root_dir, "train_log.txt"), "a") s_out = ( - f"Epoch {epoch}: " + f"Epoch {mind.current_epoch}: " + " - ".join([f"{k}: {v:.2f}" for k, v in train_metrics.items()]) + "; " ) s_out += " - ".join([f"{k2}: {v2:.4f}" for k2, v2 in metrics.items()]) + ".\n" - self.fstream.write(s_out) + if not self.debug: + self.fstream.write(s_out) logger.info(s_out) - base_save = { - "key": self.key, - "mode": self.mode, - "epoch": epoch, - "optimizer": mind.opt, - "lr_scheduler": mind.lr_sched, - } + + if not self.debug: + mind.accelerator.save_state(os.path.join(current_folder, "accelerate_dump")) + self.dump_modules(mind.modules, current_folder) + self.dump_status(status_dict, current_folder) + + # remove previous last dir after saving the current version + if ( + os.path.exists(self.last_dir) + and self.last_dir != self.check_paths + and not self.debug + ): + shutil.rmtree(self.last_dir) + + self.last_dir = current_folder + to_remove = None if self.mode == "min": - if metrics[self.key] <= min(self.bests): - id_best = self.bests.index(min(self.bests)) - to_remove = self.check_paths[id_best] + if metrics[self.key] <= self.bests: + to_remove = self.check_paths - self.check_paths[id_best] = os.path.join( - self.save_dir, - f"epoch_{epoch}_{self.key}_{metrics[self.key]:.4f}.ckpt", + if not self.debug: + mind.accelerator.save_state( + os.path.join(current_folder, "accelerate_dump") + ) + self.dump_modules(mind.modules, current_folder) + self.dump_status(status_dict, current_folder) + + self.bests = metrics[self.key] + self.check_paths = current_folder + logger.info( + f"Generated better checkpoint at epoch {mind.current_epoch}." ) - base_save.update( - {k: unwrap(v).state_dict() for k, v in mind.modules.items()} - ), - torch.save(base_save, self.check_paths[id_best]) elif self.mode == "max": - if metrics[self.key] >= max(self.bests): - id_best = self.bests.index(min(self.bests)) - to_remove = self.check_paths[id_best] - - self.check_paths[id_best] = os.path.join( - self.save_dir, - f"epoch_{epoch}_{self.key}_{metrics[self.key]:.4f}.ckpt", - ) + if metrics[self.key] >= self.bests: + to_remove = self.check_paths - base_save.update( - {k: unwrap(v).state_dict() for k, v in mind.modules.items()} - ), - torch.save(base_save, self.check_paths[id_best]) + if not self.debug: + mind.accelerator.save_state( + os.path.join(current_folder, "accelerate_dump") + ) + self.dump_modules(mind.modules, current_folder) + self.dump_status(status_dict, current_folder) - if to_remove is not None and to_remove != "": - logger.info(f"Generated better checkpoint. Deleting {to_remove}.") - os.remove(to_remove) + self.bests = metrics[self.key] + self.check_paths = current_folder + logger.info( + f"Generated better checkpoint at epoch {mind.current_epoch}." + ) - if self.mode == "max": - return self.check_paths[self.bests.index(max(self.bests))] - elif self.mode == "min": - return self.check_paths[self.bests.index(min(self.bests))] + if to_remove is not None and to_remove != "" and not self.debug: + logger.info(f"Deleting {to_remove}.") + if os.path.exists(to_remove): + shutil.rmtree(to_remove) - def close(self): self.fstream.close() + + return self.check_paths diff --git a/micromind/utils/helpers.py b/micromind/utils/helpers.py index f0c1c166..43b51d75 100644 --- a/micromind/utils/helpers.py +++ b/micromind/utils/helpers.py @@ -4,40 +4,77 @@ Authors: - Francesco Paissan, 2023 """ -from typing import Union, Dict, Tuple +import sys from pathlib import Path -import random -import string -import torch -import os +from typing import Dict, Union +from argparse import Namespace +from loguru import logger +import micromind as mm +import argparse -def get_value_from_key(s: str, key: str, cast=float) -> float: - dat = s.split(f"{key}_")[-1] - if "ckpt" in dat: - dat = dat.split(".ckpt")[0] +def override_conf(hparams: Dict): + """Handles command line overrides. Takes as input a configuration + and defines all the keys as arguments. If passed from command line, + these arguments override the default configuration. - return cast(dat) + Arguments + --------- + hparams : Dict + Dictionary containing current configuration. + Returns + ------- + Configuration agumented with overrides. : Namespace -def select_and_load_checkpoint(path: Union[Path, str]) -> Tuple[Dict, str]: - checkpoints = os.listdir(path) - checkpoints = [os.path.join(path, c) for c in checkpoints] + """ + parser = argparse.ArgumentParser(description="MicroMind experiment configuration.") + for key, value in hparams.items(): + parser.add_argument(f"--{key}", type=type(value), default=value) - dat = torch.load(checkpoints[0]) - selected_key, selected_mode = dat["key"], dat["mode"] + args, extra_args = parser.parse_known_args() + for key, value in vars(args).items(): + if value is not None: + hparams[key] = value - values = [get_value_from_key(str(c), selected_key) for c in checkpoints] + return Namespace(**hparams) - best_key = min(values) if selected_mode == "min" else max(values) - best_checkpoint = checkpoints[values.index(best_key)] - return torch.load(best_checkpoint), best_checkpoint +def parse_configuration(cfg: Union[str, Path]): + """Parses default configuration and compares it with user defined. + It processes a user-defined python file that creates the configuration. + Additionally, it handles eventual overrides from command line. + Arguments + --------- + cfg : Union[str, Path] + Configuration file defined by the user -def get_random_string(length=10): - letters = string.ascii_lowercase - result_str = "".join(random.choice(letters) for i in range(length)) + Returns + ------- + Configuration Namespace. : argparse.Namespace - return result_str + """ + with open(cfg, "r") as f: + conf = f.read() + + local_vars = {} + + exec(conf, {}, local_vars) + for key in mm.core.default_cfg: + if key not in local_vars: + local_vars[key] = mm.core.default_cfg[key] + + return override_conf(local_vars) + + +def get_logger(): + """Default loguru logger config. It is called inside micromind's files.""" + fmt = "{time:YYYY-MM-DD HH:mm:ss} | \ + {level: <8} | \ + {message}" + logger.remove() + logger.add(sys.stderr, format=fmt) + + return logger diff --git a/micromind/utils/parse.py b/micromind/utils/parse.py deleted file mode 100644 index debbbed1..00000000 --- a/micromind/utils/parse.py +++ /dev/null @@ -1,28 +0,0 @@ -import argparse - - -def parse_arguments(): - parser = argparse.ArgumentParser(description="General configuration for micromind.") - - parser.add_argument("--lr", type=float, default=0.001, help="Learning rate.") - parser.add_argument( - "--optimizer", - dest="opt", - default="adam", - choices=["adam", "sgd"], - help="Optimizer name.", - ) - parser.add_argument( - "--experiment_name", default="exp", help="Name of the experiment." - ) - parser.add_argument( - "--output_folder", default="results", help="Output folder path." - ) - parser.add_argument( - "--debug", - action="store_true", - help="Run in debug mode to check train and validation steps.", - ) - - args = parser.parse_args() - return args diff --git a/micromind/utils/yolo.py b/micromind/utils/yolo.py new file mode 100644 index 00000000..bfcac70b --- /dev/null +++ b/micromind/utils/yolo.py @@ -0,0 +1,930 @@ +""" +Helper functions. + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 +""" +import time +import types +from collections import defaultdict +from pathlib import Path + +import cv2 +import numpy as np +import torch +import torchvision +import yaml + + +def get_variant_multiples(variant): + tmp = { + "n": (0.33, 0.25, 2.0), + "s": (0.33, 0.50, 2.0), + "m": (0.67, 0.75, 1.5), + "l": (1.0, 1.0, 1.0), + "x": (1, 1.25, 1.0), + }.get(variant, None) + + return tmp[1], tmp[2], tmp[0] + + +def load_config(file_path): + """ + Load configuration from a YAML file and preprocess it for training. + + Arguments + --------- + file_path : str + Path to the YAML configuration file. + + Returns + ------- + m_cfg : types.SimpleNamespace + Model configuration containing task-specific parameters. + data_cfg : dict + Data configuration containing paths and settings for train, val and test. + """ + with open(file_path, "r") as file: + config = yaml.safe_load(file) + path = Path(Path.cwd() / config["path"]).resolve() + if "train" in config: + if not isinstance(config["train"], list): + train = Path(path / config["train"]) + else: + train = [Path(path / p) for p in config["train"]] + else: + train = None + + if "val" in config: + if not isinstance(config["val"], list): + val = Path(path / config["val"]) + else: + val = [Path(path / p) for p in config["val"]] + else: + val = None + # val = Path(path / config["val"]) if "val" in config else None + if ("test" not in config) or (config["test"] is None): + test = None + else: + test = Path(path / config["test"]) + + data_cfg = { + "path": path, + "train": train, + "val": val, + "test": test, + "names": config["names"], + "download": config.get("download"), + "yaml_file": file_path, + "nc": len(config["names"]), + } + m_cfg = { + "task", + "mode", + "imgsz", + "rect", + "cache", + "single_cls", + "fraction", + "overlap_mask", + "mask_ratio", + "classes", + "box", + "cls", + "dfl", + "hsv_h", + "hsv_s", + "hsv_v", + "degrees", + "translate", + "scale", + "shear", + "perspective", + "flipud", + "fliplr", + "mosaic", + "mixup", + "copy_paste", + } + + m_cfg = {key: config[key] for key in m_cfg if key in config} + m_cfg = types.SimpleNamespace(**m_cfg) + + return m_cfg, data_cfg + + +def autopad(k, p=None, d=1): # kernel, padding, dilation + """Calculate padding value for a convolution operation based on kernel + size and dilation. + + This function computes the padding value for a convolution operation to + maintain the spatial size of the input tensor. + + Arguments + --------- + k : int + Kernel size for the convolution operation. If a single integer + is provided, it's assumed that all dimensions have the same kernel size. + p : int, optional + Padding value for the convolution operation. If not provided, + it will be calculated to maintain the spatial size of the input tensor. + d : int, optional + Dilation for the convolution operation. Default is 1. + + Returns + ------- + The padding value to maintain the spatial size of the input tensor : int + """ + if d > 1: + k = ( + d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] + ) # actual kernel-size + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + + return p + + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchor points and stride tensors. + + This function generates anchor points for each feature map and stride + combination. + It is commonly used in object detection tasks to define anchor boxes. + + Arguments + --------- + feats : torch.Tensor + A feature map (tensor) from which anchor points will be generated. + strides : torch.Tensor + Stride values corresponding to each feature map. + Strides define the spacing between anchor points. + grid_cell_offset : float, optional + Offset to be added to the grid cell coordinates when + generating anchor points. Default is 0.5. + + Returns + ------- + anchor_points : torch.Tensor + Concatenated anchor points for all feature maps as a 2D tensor. + stride_tensor : torch.Tensor + Concatenated stride values for all anchor points as a 2D tensor. + """ + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype, device = feats[0].dtype, feats[0].device + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = ( + torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset + ) # shift x + sy = ( + torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset + ) # shift y + sy, sx = torch.meshgrid(sy, sx, indexing="ij") + anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) + stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + return torch.cat(anchor_points), torch.cat(stride_tensor) + + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Convert distance predictions to bounding box coordinates. + + This function takes distance predictions and anchor points to calculate + bounding box coordinates. + + Arguments + --------- + distance : torch.Tensor + Tensor containing distance predictions. + It should be in the format [lt, rb] if `xywh` is True, + or [x1y1, x2y2] if `xywh` is False. + anchor_points : torch.Tensor + Tensor containing anchor points used for the conversion. + xywh : bool, optional + If True, the function returns bounding boxes in the format + [center_x, center_y, width, height]. + If False, it returns bounding boxes in the format [x1, y1, x2, y2]. + Default is True. + dim : int, optional + The dimension along which the tensor is split into lt and rb. + Default is -1. + + Returns + ------- + Converted bounding box coordinates in the specified format : torch.Tensors + """ + lt, rb = torch.chunk(distance, chunks=2, dim=dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return torch.cat((c_xy, wh), dim=1) + return torch.cat((x1y1, x2y2), dim=1) + + +def compute_transform( + image, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32 +): + """Compute a transformation of an image to the specified size and format. + + This function computes a transformation of the input image to the specified + new size and format, while optionally maintaining the aspect ratio or adding + padding as needed. + + Arguments + --------- + image : torch.Tensor + The input image to be transformed. + new_shape : int or tuple, optional + The target size of the transformed image. If an integer is provided, + the image is resized to have the same width and height. + If a tuple of two integers is provided, it represents the new width + and height. Default is (640, 640). + auto : bool, optional + If True, automatically calculates padding to ensure the output size + is divisible by the specified `stride`. Default is False. + scaleFill : bool, optional + If True, scales the image to completely fill the target size without + maintaining the aspect ratio. Default is False. + scaleup : bool, optional + If True, allows the image to be scaled up (enlarged) if necessary. + Default is True. + stride : int, optional + The stride value used for padding calculation when `auto` is True. + Default is 32. + + Returns + ------- + The transformed image : numpy.ndarray + """ + shape = image.shape[-2:] # current shape [height, width] + new_shape = (new_shape, new_shape) if isinstance(new_shape, int) else new_shape + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + r = min(r, 1.0) if not scaleup else r + new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r))) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] + dw, dh = (dw % stride, dh % stride) if auto else (0.0, 0.0) + new_unpad = (new_shape[1], new_shape[0]) if scaleFill else new_unpad + new_unpad = (new_unpad[1], new_unpad[0]) + dw /= 2 + dh /= 2 + image = torch.nn.functional.interpolate( + image.unsqueeze(0), size=new_unpad, mode="bilinear", align_corners=False + ).squeeze(0) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + image = torch.nn.functional.pad(image, (left, right, top, bottom), value=114) + return image + + +def preprocess(im, imgsz=640, model_stride=32, model_pt=True): + """Preprocess a batch of images for inference. + + This function preprocesses a batch of images for inference by + resizing, transforming, and normalizing them. + + Arguments + --------- + im : torch.Tensor or list of torch.Tensor + An input image or a batch of images to be preprocessed. + imgsz : int, optional + The target size of the images after preprocessing. + Default is 640. + model_stride : int, optional + The stride value used for padding calculation when `auto` is True + in `compute_transform`. Default is 32. + model_pt : bool, optional + If True, the function automatically calculates the padding to + maintain the same shapes for all input images in the batch. + Default is True. + + Returns + ------- + torch.Tensor + The preprocessed batch of images as a torch.Tensor with shape + (n, 3, h, w), where n is the number of images, 3 represents the + RGB channels, and h and w are the height and width of the images. + """ + auto = model_pt + im = compute_transform(im, new_shape=imgsz, auto=auto, stride=model_stride) + im = im.float() / 255.0 # 0 - 255 to 0.0 - 1.0 + im = im.unsqueeze(0) + return im + + +def box_area(box): + """Calculate the area of bounding boxes. + + This function calculates the area of bounding boxes + represented as [x1, y1, x2, y2]. + + Arguments + --------- + box : torch.Tensor + A tensor containing bounding boxes in the format [x1, y1, x2, y2]. + + Returns + ------- + A tensor containing the area of each bounding box : torch.Tensor + """ + return (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) + + +def box_iou(box1, box2): + """Calculate the Intersection over Union (IoU) between two sets + of bounding boxes. + + This function computes the IoU between two sets of bounding boxes. + + Arguments + --------- + box1 : numpy.ndarray + The first set of bounding boxes in the format [x1, y1, x2, y2]. + box2 : numpy.ndarray + The second set of bounding boxes in the format [x1, y1, x2, y2]. + + Returns + ------- + numpy.ndarray + A 2D numpy array containing the IoU between each pair of bounding + boxes in box1 and box2. + """ + lt = np.maximum(box1[:, None, :2], box2[:, :2]) + rb = np.minimum(box1[:, None, 2:], box2[:, 2:]) + wh = np.clip(rb - lt, 0, None) + inter = wh[:, :, 0] * wh[:, :, 1] + area1 = box_area(box1)[:, None] + area2 = box_area(box2)[None, :] + iou = inter / (area1 + area2 - inter) + return iou + + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nc=0, # number of classes (optional) + max_time_img=0.05, + max_nms=30000, + max_wh=7680, +): + """ + Perform non-maximum suppression (NMS) on a set of boxes, with support for masks + and multiple labels per box. + + Parameters + ---------- + prediction : torch.Tensor + A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes) + containing the predicted boxes, classes, and masks. The tensor should + be in the format output by a model, such as YOLO. + conf_thres : float, optional + The confidence threshold below which boxes will be filtered out. + Valid values are between 0.0 and 1.0. Default is 0.25. + iou_thres : float, optional + The IoU threshold below which boxes will be filtered out during NMS. + Valid values are between 0.0 and 1.0. Default is 0.45. + classes : List[int], optional + A list of class indices to consider. If None, all classes will be considered. + agnostic : bool, optional + If True, the model is agnostic to the number of classes, and all classes + will be considered as one. Default is False. + multi_label : bool, optional + If True, each box may have multiple labels. Default is False. + labels : List[List[Union[int, float, torch.Tensor]]], optional + A list of lists, where each inner list contains the apriori labels for a + given image. The list should be in the format output by a dataloader, with + each label being a tuple of (class_index, x1, y1, x2, y2). + max_det : int, optional + The maximum number of boxes to keep after NMS. Default is 300. + nc : int, optional + The number of classes output by the model. Any indices after this will be + considered masks. Default is 0. + max_time_img : float, optional + The maximum time (seconds) for processing one image. Default is 0.05. + max_nms : int, optional + The maximum number of boxes into torchvision.ops.nms(). Default is 30000. + max_wh : int, optional + The maximum box width and height in pixels. Default is 7680. + + Returns + ------- + List[torch.Tensor] + A list of length batch_size, where each element is a tensor of + shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns + (x1, y1, x2, y2, confidence, class, mask1, mask2, ...). + """ + + # Checks + assert ( + 0 <= conf_thres <= 1 + ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0 and 1.0" + assert ( + 0 <= iou_thres <= 1 + ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + if isinstance( + prediction, (list, tuple) + ): # YOLOv8 model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + bs = prediction.shape[0] # batch size + nc = nc or (prediction.shape[1] - 4) # number of classes + nm = prediction.shape[1] - nc - 4 + mi = 4 + nc # mask start index + xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + time_limit = 0.5 + max_time_img * bs # seconds to quit after + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + + prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84) + prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy + + t = time.time() + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 4), device=x.device) + v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box + v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Detections matrix nx6 (xyxy, conf, cls) + box, cls, mask = x.split((4, nc, nm), 1) + + if multi_label: + i, j = torch.where(cls > conf_thres) + x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = cls.max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + if n > max_nms: # excess boxes + x = x[ + x[:, 4].argsort(descending=True)[:max_nms] + ] # sort by confidence and remove excess boxes + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + i = i[:max_det] # limit detections + + output[xi] = x[i] + if (time.time() - t) > time_limit: + # LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + + +@torch.no_grad() +def postprocess(preds, img, orig_imgs): + """Perform post-processing on the predictions. + + This function applies post-processing to the predictions, + including Non-Maximum Suppression (NMS) and scaling of bounding boxes. + + Arguments + --------- + preds : list of numpy.ndarray + A list of prediction arrays from the object detection model. + img : numpy.ndarray + The input image on which the predictions were made. + orig_imgs : numpy.ndarray or list of numpy.ndarray + The original image(s) before any preprocessing. + + Returns + ------- + list of numpy.ndarray + A list of post-processed prediction arrays, each containing bounding + boxes and associated information. + """ + preds = non_max_suppression( + prediction=preds, + conf_thres=0.25, + iou_thres=0.7, + agnostic=False, + max_det=300, + multi_label=True, + ) + + all_preds = [] + for i, pred in enumerate(preds): + orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs + if isinstance(orig_img, dict): + pred[:, :4] = scale_boxes( + tuple(img["img"].shape[2:4]), pred[:, :4], orig_img["ori_shape"][i] + ) # batch + else: + pred[:, :4] = scale_boxes( + img.shape[2:], pred[:, :4], orig_img.shape[1:] + ) # single img + all_preds.append(pred) + return all_preds + + +def draw_bounding_boxes_and_save( + orig_img_paths, output_img_paths, all_predictions, class_labels, iou_threshold=0.5 +): + """Draw bounding boxes on images based on object detection predictions and + save the result. + + This function draws bounding boxes on images based on object detection + predictions and saves the result. It also prints the number of objects + detected for each class. + + Arguments + --------- + orig_img_paths : list of str + A list of file paths to the original input images. + output_img_paths : list of str + A list of file paths to save the images with bounding boxes. + all_predictions : list of list of numpy.ndarray + A list of lists of prediction arrays from the object detection model. + class_labels : list of str + A list of class labels corresponding to the object classes. + iou_threshold : float, optional + The IoU threshold used for non-maximum suppression to remove + overlapping bounding boxes. Default is 0.5. + + Returns + ------- + None + """ + color_dict = { + label: tuple( + (((i + 1) * 50) % 256, ((i + 1) * 100) % 256, ((i + 1) * 150) % 256) + ) + for i, label in enumerate(class_labels) + } + font = cv2.FONT_HERSHEY_SIMPLEX + + def is_bright_color(color): + r, g, b = color + brightness = (r * 299 + g * 587 + b * 114) / 1000 + return brightness > 127 + + for img_idx, (orig_img_path, output_img_path, predictions) in enumerate( + zip(orig_img_paths, output_img_paths, all_predictions) + ): + predictions = np.array(predictions) + orig_img = cv2.imread(orig_img_path) + height, width, _ = orig_img.shape + box_thickness = int((height + width) / 400) + font_scale = (height + width) / 2500 + + grouped_preds = defaultdict(list) + object_count = defaultdict(int) + + for pred_np in predictions: + grouped_preds[int(pred_np[-1])].append(pred_np) + + def draw_box_and_label(pred, color): + x1, y1, x2, y2, conf, _ = pred + x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) + cv2.rectangle(orig_img, (x1, y1), (x2, y2), color, box_thickness) + label = f"{class_labels[class_id]} {conf:.2f}" + text_size, _ = cv2.getTextSize(label, font, font_scale, 1) + label_y, bg_y = ( + (y1 - 4, y1 - text_size[1] - 4) + if y1 - text_size[1] - 4 > 0 + else (y1 + text_size[1], y1) + ) + cv2.rectangle( + orig_img, + (x1, bg_y), + (x1 + text_size[0], bg_y + text_size[1]), + color, + -1, + ) + font_color = (0, 0, 0) if is_bright_color(color) else (255, 255, 255) + cv2.putText( + orig_img, + label, + (x1, label_y), + font, + font_scale, + font_color, + 1, + cv2.LINE_AA, + ) + + for class_id, pred_list in grouped_preds.items(): + pred_list = np.array(pred_list) + while len(pred_list) > 0: + max_conf_idx = np.argmax(pred_list[:, 4]) + max_conf_pred = pred_list[max_conf_idx] + pred_list = np.delete(pred_list, max_conf_idx, axis=0) + color = color_dict[class_labels[class_id]] + draw_box_and_label(max_conf_pred, color) + object_count[class_labels[class_id]] += 1 + iou_scores = box_iou(np.array([max_conf_pred[:4]]), pred_list[:, :4]) + low_iou_indices = np.where(iou_scores[0] < iou_threshold)[0] + pred_list = pred_list[low_iou_indices] + for low_conf_pred in pred_list: + draw_box_and_label(low_conf_pred, color) + + print(f"Image {img_idx + 1}:") + print("Objects detected:") + for obj, count in object_count.items(): + print(f"- {obj}: {count}") + + cv2.imwrite(output_img_path, orig_img) + print(f"saved detections at {output_img_path}") + + +def clip_boxes(boxes, shape): + """Clip bounding boxes to stay within image boundaries. + + This function clips bounding boxes to ensure that they stay within the + boundaries of the image. + + Arguments + --------- + boxes : torch.Tensor + A tensor containing bounding boxes in the format [x1, y1, x2, y2]. + shape : tuple + A tuple representing the shape of the image in the format (height, width). + + Returns + ------- + A tensor containing the clipped bounding boxes : torch.Tensor + """ + boxes[..., [0, 2]] = torch.clip(boxes[..., [0, 2]], 0, shape[1]) # x1, x2 + boxes[..., [1, 3]] = torch.clip(boxes[..., [1, 3]], 0, shape[0]) # y1, y2 + return boxes + + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): + """Scale bounding boxes to match a different image shape. + + This function scales bounding boxes to match a different image + shape while maintaining their aspect ratio. + + Arguments + --------- + img1_shape : tuple + A tuple representing the shape of the target image in the + format (height, width). + boxes : torch.Tensor + A tensor containing bounding boxes in the + format [x1, y1, x2, y2]. + img0_shape : tuple + A tuple representing the shape of the source image in the + format (height, width). + ratio_pad : float or None, optional + A scaling factor for the bounding boxes. + If None, it is calculated based on the aspect ratio of the images. + Default is None. + + Returns + ------- + A tensor containing the scaled bounding boxes : torch.Tensor + """ + gain = ( + ratio_pad + if ratio_pad + else min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) + ) + pad = ( + (img1_shape[1] - img0_shape[1] * gain) / 2, + (img1_shape[0] - img0_shape[0] * gain) / 2, + ) + + boxes[..., [0, 2]] -= pad[0] + boxes[..., [1, 3]] -= pad[1] + boxes /= gain + boxes = clip_boxes(boxes, img0_shape) + + return boxes + + +def xywh2xyxy(x): + """Convert bounding box coordinates from (x, y, width, height) + to (x1, y1, x2, y2) format. + + This function converts bounding box coordinates from the format + (center_x, center_y, width, height) to the format (x1, y1, x2, y2), + where (x1, y1) represents the top-left corner and (x2, y2) represents + the bottom-right corner of the bounding box. + + Arguments + --------- + x : torch.Tensor + A tensor containing bounding box coordinates in the + format (center_x, center_y, width, height). + + Returns + ------- + torch.Tensor + A tensor containing bounding box coordinates in the + format (x1, y1, x2, y2). + """ + xy = x[..., :2] # center x, y + wh = x[..., 2:4] # width, height + xy1 = xy - wh / 2 # top left x, y + xy2 = xy + wh / 2 # bottom right x, y + result = torch.cat((xy1, xy2), dim=-1) + return result + + +def bbox_format(box): + """ + Convert a tensor of coordinates [x1, y1, x2, y2] representing two points + defining a rectangle to the format [x_min, y_min, x_max, y_max], where + x_min, y_min represent the top-left corner, and x_max, y_max represent the + bottom-right corner of the rectangle. + + Arguments + --------- + box : torch.Tensor + A tensor of coordinates in the format [x1, y1, x2, y2] where x1, y1, x2, y2 + represent the coordinates of two points defining a rectangle. + + Returns + ------- + torch.Tensor + The coordinates in the format [x_min, y_min, x_max, y_max] where x_min, y_min + represent the top-left vertex, and x_max, y_max represent the bottom-right + vertex of the rectangle. + """ + x1, y1, x2, y2 = box[0], box[1], box[2], box[3] + + x_min = torch.min(x1, x2) + x_max = torch.max(x1, x2) + y_min = torch.min(y1, y2) + y_max = torch.max(y1, y2) + + return torch.tensor([x_min, y_min, x_max, y_max]) + + +def calculate_iou(box1, box2): + """ + Calculate the Intersection over Union (IoU) between two bounding boxes. + + Arguments + --------- + box1 : torch.Tensor + First bounding box in the format [x1, y1, x2, y2]. + box2 : torch.Tensor + Second bounding box in the format [x1, y1, x2, y2]. + + Returns + ------- + float + The intersection over union of the two bounding boxes. + """ + + x1 = torch.max(box1[0], box2[0]) + y1 = torch.max(box1[1], box2[1]) + x2 = torch.min(box1[2], box2[2]) + y2 = torch.min(box1[3], box2[3]) + + intersection = torch.clamp(x2 - x1, min=0) * torch.clamp(y2 - y1, min=0) + area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) + area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) + union = area_box1 + area_box2 - intersection + + iou = intersection / union + return iou.item() + + +def average_precision(predictions, ground_truth, class_id, iou_threshold=0.5): + """ + Calculate the average precision (AP) for a specific class in YOLO predictions. + + Arguments + --------- + predictions : list + List of prediction boxes in the format [x1, y1, x2, y2, confidence, class_id]. + ground_truth : list + List of ground truth boxes in the same format. + class_id : int + The class ID for which to calculate AP. + iou_threshold : float + The IoU threshold for considering a prediction as correct. + + Returns + ------- + float + The average precision for the specified class. + """ + predictions = predictions[predictions[:, 5] == class_id] + ground_truth = ground_truth[ground_truth[:, 5] == class_id] + + _, indices = torch.sort(predictions[:, 4], descending=True) + predictions = predictions[indices] + tp = torch.zeros(len(predictions)) + fp = torch.zeros(len(predictions)) + gt_count = len(ground_truth) + + for i, pred in enumerate(predictions): + best_iou = 0 + for j, gt in enumerate(ground_truth): + iou = calculate_iou(pred[:4], gt[:4]) + if iou > best_iou and iou >= iou_threshold: + best_iou = iou + best_gt_idx = j + if best_iou > 0: + tp[i] = 1 + tmp = torch.ones(ground_truth.shape[0]) + tmp[best_gt_idx] = 0 + ground_truth = ground_truth[tmp.bool()] + # ground_truth.pop(best_gt_idx) + else: + fp[i] = 1 + + precision = torch.cumsum(tp, dim=0) / ( + torch.cumsum(tp, dim=0) + torch.cumsum(fp, dim=0) + ) + recall = torch.cumsum(tp, dim=0) / gt_count + + # Compute the average precision using the 11-point interpolation + ap = torch.tensor(0.0) + for t in torch.arange(0.0, 1.1, 0.1): + recall_greater = recall >= t + num_true = torch.sum(recall_greater).item() + if num_true == 0: + p = torch.tensor(0.0) + else: + p = torch.max(precision[recall_greater]) + ap += p / 11.0 + + return ap.item() + + +def mean_average_precision(post_predictions, batch, batch_bboxes, iou_threshold=0.5): + """ + Calculate the mean average precision (mAP) for all classes in YOLO predictions. + + Arguments + --------- + post_predictions : list + List of post-processed predictions for bounding boxes. + batch : dict + A dictionary containing batch information, including image files, batch indices. + batch_bboxes : torch.Tensor + Tensor containing batch bounding boxes. + iou_threshold : float + The IoU threshold for considering a prediction as correct. + + Returns + ------- + float + The mean average precision (mAP). + """ + batch_size = len(batch["im_file"]) + + mmAP = [] + for batch_el in range(batch_size): + ap_sum = 0 + + num_obj = torch.sum(batch["batch_idx"] == batch_el).item() + bboxes = batch_bboxes[batch["batch_idx"] == batch_el] + classes = batch["cls"][batch["batch_idx"] == batch_el] + gt = torch.cat((bboxes, torch.ones((num_obj, 1)), classes), dim=1) + + for class_id in range(80): + ap = average_precision( + post_predictions[batch_el], gt, class_id, iou_threshold + ) + ap_sum += ap + + div = torch.unique(gt[:, -1]).size(0) + if div == 0: + mAP = 0 + else: + mAP = ap_sum / div + + mmAP.append(mAP) + mmAP = sum(mmAP) / len(mmAP) + + return mmAP diff --git a/pyproject.toml b/pyproject.toml index 10b2b50a..9d61cf78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,8 @@ classifiers = [ keywords = ["feed", "reader", "tutorial"] dependencies = [ "torch", + "torchvision", + "torchaudio", "torchinfo", "huggingface_hub", "accelerate==0.23.0", @@ -56,7 +58,7 @@ profile = "black" py-modules = [] [tool.bumpver] -current_version = "0.1.1" +current_version = "0.2.0" version_pattern = "MAJOR.MINOR.PATCH" commit_message = "bump version {old_version} -> {new_version}" diff --git a/recipes/image_classification/README.md b/recipes/image_classification/README.md new file mode 100644 index 00000000..3d441e6c --- /dev/null +++ b/recipes/image_classification/README.md @@ -0,0 +1,63 @@ +## Image classification + +**Disclaimer**: we will shortly releease HuggingFace checkpoints for ImageNet, CIFAR-100, and CIFAR-10 for both PhiNet and XiNet. + +This image classification recipe uses the PyTorch image models library (`timm`) to augment the data. It supports most data augmentation strategies, and datasets of the original implementation. However, it is implemented using `micromind` and thus, it exploits all the exportability and functionalities of the library. + +To reproduce our results, you can follow these steps: + +1. install PhiNets with `pip install git+https://github.com/fpaissan/micromind` +2. install the additional dependencies for this recipe with `pip install -r extra_requirements.txt` +3. start a training! + +### Training + +The experiment's configuration is stored inside the files in the `cfg` folder. They can be overridden simply from the command line by providing a new value. For example, if you want to start a training on CIFAR-10, you just need to execute the following command: +``` +python train.py cfg/phinet.py +``` + +For CIFAR-100 instead, you can use: +``` +python train.py cfg/phinet.py --dataset torch/cifar100 --data_dir data/cifar100 +``` + +### Inference +In order to export the model and/or run an inference using PyTorch, you can pass an image and the path to a pretrained model to the inference script. +For this, you can use this command: +``` +python inference.py cfg/phinet.py IMG_PATH --ckpt_pretrained MODEL_PATH +``` + +This will print the predicted output, and save an ONNX model in `model.onnx`. + + +The script will also save an ONNX model at the end of the training. To export the checkpoint in a different format, please read [our documentation](https://micromind-toolkit.github.io/docs/). + +#### Referencing PhiNet +If you use PhiNet or `micromind`, please cite our work: +``` +@article{Paissan_2022_TECS, + author = {Paissan, Francesco and Ancilotto, Alberto and Farella, Elisabetta}, + title = {PhiNets: A Scalable Backbone for Low-Power AI at the Edge}, + year = {2022}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3510832}, + doi = {10.1145/3510832}, + journal = {ACM Trans. Embed. Comput. Syst.}, +} +``` + +#### Referencing XiNet +If you use XiNet or `micromind`, please cite our work: +``` +@InProceedings{Ancilotto_2023_ICCV, + author = {Ancilotto, Alberto and Paissan, Francesco and Farella, Elisabetta}, + title = {XiNet: Efficient Neural Networks for tinyML}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2023}, + pages = {16968-16977} +} +``` diff --git a/recipes/image_classification/cfg/phinet.py b/recipes/image_classification/cfg/phinet.py new file mode 100644 index 00000000..72d5e2a7 --- /dev/null +++ b/recipes/image_classification/cfg/phinet.py @@ -0,0 +1,93 @@ +""" +Configuration file image classification with PhiNet. + +Authors: + - Francesco Paissan, 2023 +""" + +# Model configuration +model = "phinet" +input_shape = (3, 32, 32) +alpha = 3 +num_layers = 7 +beta = 1 +t_zero = 5 +divisor = 8 +downsampling_layers = [5, 7] +return_layers = None + +ckpt_pretrained = "" + +# Basic training loop +epochs = 50 + +# Basic data +data_dir = "data/cifar10/" +dataset = "torch/cifar10" +batch_size = 256 +dataset_download = True + +# Dataloading config +num_workers = 4 +pin_memory = True +persistent_workers = True + +# Loss function +bce_loss = False +bce_target_thresh = None + +# Data augmentation config +aa = "rand-m8-inc1-mstd101" +aug_repeats = 0 +aug_splits = 0 +class_map = "" +color_jitter = 0.4 +cutmix = 0.0 +cutmix_minmax = None +drop = 0.0 +drop_block = None +drop_connect = None +drop_path = 0.1 +epoch_repeats = 0.0 +hflip = 0.5 +img_size = None +in_chans = None +initial_checkpoint = "" +interpolation = "bilinear" +jsd_loss = False +layer_decay = 0.65 +local_rank = 0 +log_interval = 50 +log_wandb = False +lr = 0.001 +lr_base = 0.1 +lr_base_scale = "" +lr_base_size = 256 +lr_cycle_decay = 0.5 +lr_cycle_limit = 1 +lr_cycle_mul = 1.0 +lr_k_decay = 1.0 +lr_noise = None +lr_noise_pct = 0.67 +lr_noise_std = 1.0 +mean = [0.485, 0.456, 0.406] +std = [0.229, 0.224, 0.225] +mixup = 0.0 +mixup_mode = "batch" +mixup_off_epoch = 0 +mixup_prob = 1.0 +mixup_switch_prob = 0.5 +no_aug = False +num_classes = 100 +ratio = [0.75, 1.3333333333333333] +recount = 1 +recovery_interval = 0 +remode = "pixel" +reprob = 0.3 +scale = [0.08, 1.0] +smoothing = 0.1 +train_interpolation = "bilinear" +train_split = "train" +use_multi_epochs_loader = False +val_split = "validation" +vflip = 0.0 diff --git a/recipes/image_classification/cfg/xinet.py b/recipes/image_classification/cfg/xinet.py new file mode 100644 index 00000000..e131675d --- /dev/null +++ b/recipes/image_classification/cfg/xinet.py @@ -0,0 +1,90 @@ +""" +Configuration file image classification with PhiNet. + +Authors: + - Francesco Paissan, 2023 +""" + +# Model configuration +model = "xinet" +input_shape = (3, 128, 128) +alpha = 1 +num_layers = 7 +return_layers = None +gamma = 4 + +ckpt_pretrained = "" + +# Basic training loop +epochs = 50 + +# Basic data +data_dir = "data/cifar10/" +dataset = "torch/cifar10" +batch_size = 256 +dataset_download = True + +# Dataloading config +num_workers = 4 +pin_memory = True +persistent_workers = True + +# Loss function +bce_loss = False +bce_target_thresh = None + +# Data augmentation config +aa = "rand-m8-inc1-mstd101" +aug_repeats = 0 +aug_splits = 0 +class_map = "" +color_jitter = 0.4 +cutmix = 0.0 +cutmix_minmax = None +drop = 0.0 +drop_block = None +drop_connect = None +drop_path = 0.1 +epoch_repeats = 0.0 +hflip = 0.5 +img_size = None +in_chans = None +initial_checkpoint = "" +interpolation = "bilinear" +jsd_loss = False +layer_decay = 0.65 +local_rank = 0 +log_interval = 50 +log_wandb = False +lr = 0.001 +lr_base = 0.1 +lr_base_scale = "" +lr_base_size = 256 +lr_cycle_decay = 0.5 +lr_cycle_limit = 1 +lr_cycle_mul = 1.0 +lr_k_decay = 1.0 +lr_noise = None +lr_noise_pct = 0.67 +lr_noise_std = 1.0 +mean = [0.485, 0.456, 0.406] +std = [0.229, 0.224, 0.225] +mixup = 0.0 +mixup_mode = "batch" +mixup_off_epoch = 0 +mixup_prob = 1.0 +mixup_switch_prob = 0.5 +no_aug = False +num_classes = 100 +ratio = [0.75, 1.3333333333333333] +recount = 1 +recovery_interval = 0 +remode = "pixel" +reprob = 0.3 +scale = [0.08, 1.0] +smoothing = 0.1 +train_interpolation = "bilinear" +train_split = "train" +use_multi_epochs_loader = False +val_split = "validation" +vflip = 0.0 diff --git a/recipes/image_classification/extra_requirements.txt b/recipes/image_classification/extra_requirements.txt new file mode 100644 index 00000000..bdbf074c --- /dev/null +++ b/recipes/image_classification/extra_requirements.txt @@ -0,0 +1 @@ +timm==0.6.13 diff --git a/recipes/image_classification/inference.py b/recipes/image_classification/inference.py new file mode 100644 index 00000000..03ae91e7 --- /dev/null +++ b/recipes/image_classification/inference.py @@ -0,0 +1,110 @@ +""" +This code runs the image classification training loop. It tries to support as much +as timm's functionalities as possible. + +For compatibility the prefetcher, re_split and JSDLoss are disabled. + +To run the training script, use this command: + python inference.py cfg/phinet.py IMG_PATH --ckpt_pretrained MODEL_PATH + +You can change the configuration or override the parameters as you see fit. + +Authors: + - Francesco Paissan, 2023 +""" + +import sys +import time +import torch +from train import ImageClassification +from micromind.utils import parse_configuration +import torchvision + + +class ImageClassification(ImageClassification): + """Implements an image classification class for inference.""" + + def forward(self, batch): + """Computes forward step for image classifier. + + Arguments + --------- + batch : List[torch.Tensor, torch.Tensor] + Batch containing the images and labels. + + Returns + ------- + Predicted logits. + """ + return self.modules["classifier"](batch[0]) + + def compute_loss(self, pred, batch): + """Ignoring because it's inference.""" + pass + + def configure_optimizers(self): + """Ignoring because it's inference.""" + pass + + +def top_k_accuracy(k=1): + """ + Computes the top-K accuracy. + + Arguments + --------- + k : int + Number of top elements to consider for accuracy. + + Returns + ------- + accuracy : Callable + Top-K accuracy. + """ + + def acc(pred, batch): + if pred[1].ndim == 2: + target = pred[1].argmax(1) + else: + target = pred[1] + _, indices = torch.topk(pred[0], k, dim=1) + correct = torch.sum(indices == target.view(-1, 1)) + accuracy = correct.item() / target.size(0) + + return torch.Tensor([accuracy]).to(pred[0].device) + + return acc + + +if __name__ == "__main__": + assert len(sys.argv) > 1, "Please pass the configuration file to the script." + hparams = parse_configuration(sys.argv[1]) + + mind = ImageClassification(hparams=hparams) + if hparams.ckpt_pretrained != "": + mind.load_modules(hparams.ckpt_pretrained) + mind.eval() + + # read, resize, and normalize image + img = torchvision.io.read_image(sys.argv[2]) + preprocess = torchvision.transforms.Compose( + [ + torchvision.transforms.Resize(size=hparams.input_shape[1:]), + torchvision.transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ) + + img = preprocess(img.float() / 255) + now = time.time() + logits = mind((img[None],)) + print("Inference took %.5f ms." % ((time.time() - now) * 1e-3)) + + print( + "Model prediction: %d with probability: %.2f." + % (logits.argmax(1).item(), logits.softmax(1)[0, logits.argmax(1)].item()) + ) + + print("Saving exported model to model.onnx...") + mind.export("model.onnx", "onnx", (3, 32, 32)) diff --git a/recipes/image_classification/prepare_data.py b/recipes/image_classification/prepare_data.py new file mode 100644 index 00000000..f31649f4 --- /dev/null +++ b/recipes/image_classification/prepare_data.py @@ -0,0 +1,169 @@ +""" +This code prepares the DataLoader compatible with HF accelerate and exploiting +timm data augmentation. +For compatibility, the prefetcher, JSDLoss and re_split options where disabled. + +Authors: + - Francesco Paissan, 2023 + +""" +import torch + +from timm.data import ( + AugMixDataset, + Mixup, + create_dataset, + create_transform, +) +from argparse import Namespace + + +def setup_mixup(args: Namespace): + """Setup of Mixup data augmentation based on input configuration. + + Arguments + --------- + args : Namespace + Input configuration for the experiment. + + Returns + ------- + Mixup function and respective collate_fn. : Union[Callable, Callable]""" + collate_fn = None + mixup_fn = None + mixup_active = args.mixup > 0 or args.cutmix > 0.0 or args.cutmix_minmax is not None + if mixup_active: + mixup_args = dict( + mixup_alpha=args.mixup, + cutmix_alpha=args.cutmix, + cutmix_minmax=args.cutmix_minmax, + prob=args.mixup_prob, + switch_prob=args.mixup_switch_prob, + mode=args.mixup_mode, + label_smoothing=args.smoothing, + num_classes=args.num_classes, + ) + mixup_fn = Mixup(**mixup_args) + + return mixup_fn, collate_fn + + +def create_loaders(args: Namespace): + """Creates DataLoaders for dataset specified in the configuration file. + Refer to ... for how to select the proper configuration. + + Arguments + --------- + args : Namespace + Input configuration for the experiment. + """ + # args.prefetcher = not args.no_prefetcher + args.prefetcher = False + args.distributed = False + + num_aug_splits = 0 + if args.aug_splits > 0: + assert args.aug_splits > 1, "A split of 1 makes no sense" + num_aug_splits = args.aug_splits + + # create the train and eval datasets + dataset_train = create_dataset( + args.dataset, + root=args.data_dir, + # split=args.train_split, + is_training=True, + class_map=args.class_map, + download=args.dataset_download, + batch_size=args.batch_size, + repeats=args.epoch_repeats, + ) + dataset_eval = create_dataset( + args.dataset, + root=args.data_dir, + split=args.val_split, + is_training=False, + class_map=args.class_map, + download=args.dataset_download, + batch_size=args.batch_size, + ) + + mixup_fn, collate_fn = setup_mixup(args) + + # wrap dataset in AugMix helper + if num_aug_splits > 1: + dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits) + + # create data loaders w/ augmentation pipeiine + train_interpolation = args.train_interpolation + if args.no_aug or not train_interpolation: + train_interpolation = args.interpolation + re_num_splits = 0 + dataset_train.transform = create_transform( + input_size=args.input_shape, + is_training=True, + use_prefetcher=args.prefetcher, + no_aug=args.no_aug, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + scale=args.scale, + ratio=args.ratio, + hflip=args.hflip, + vflip=args.vflip, + color_jitter=args.color_jitter, + auto_augment=args.aa, + interpolation=train_interpolation, + mean=args.mean, + std=args.std, + tf_preprocessing=False, + re_num_splits=re_num_splits, + separate=num_aug_splits > 0, + ) + + dataset_eval.transform = create_transform( + input_size=args.input_shape, + is_training=False, + use_prefetcher=args.prefetcher, + no_aug=args.no_aug, + re_prob=args.reprob, + re_mode=args.remode, + re_count=args.recount, + scale=args.scale, + ratio=args.ratio, + hflip=args.hflip, + vflip=args.vflip, + color_jitter=args.color_jitter, + auto_augment=args.aa, + interpolation=train_interpolation, + mean=args.mean, + std=args.std, + tf_preprocessing=False, + re_num_splits=re_num_splits, + separate=num_aug_splits > 0, + ) + + if collate_fn is None: + collate_fn = torch.utils.data.dataloader.default_collate + + loader_class = torch.utils.data.DataLoader + + loader_args = dict( + batch_size=args.batch_size, + shuffle=True, + num_workers=args.num_workers, + collate_fn=collate_fn, + pin_memory=args.pin_memory, + drop_last=True, + persistent_workers=args.persistent_workers, + ) + try: + loader_train = loader_class(dataset_train, **loader_args) + loader_args["drop_last"] = False + loader_eval = loader_class(dataset_eval, **loader_args) + except TypeError: + loader_args.pop("persistent_workers") # only in Pytorch 1.7+ + loader_train = loader_class(dataset_train, **loader_args) + loader_args["drop_last"] = False + loader_eval = loader_class(dataset_eval, **loader_args) + + return loader_train, loader_eval diff --git a/recipes/image_classification/train.py b/recipes/image_classification/train.py new file mode 100644 index 00000000..f5c8e8e7 --- /dev/null +++ b/recipes/image_classification/train.py @@ -0,0 +1,207 @@ +""" +This code runs the image classification training loop. It tries to support as much +as timm's functionalities as possible. + +For compatibility the prefetcher, re_split and JSDLoss are disabled. + +To run the training script, use this command: + python train.py cfg/phinet.py + +You can change the configuration or override the parameters as you see fit. + +Authors: + - Francesco Paissan, 2023 +""" + +import torch +import torch.nn as nn +from prepare_data import create_loaders, setup_mixup +from torchinfo import summary +from timm.loss import ( + BinaryCrossEntropy, + LabelSmoothingCrossEntropy, + SoftTargetCrossEntropy, +) + +import micromind as mm +from micromind.networks import PhiNet, XiNet +from micromind.utils import parse_configuration +import sys + + +class ImageClassification(mm.MicroMind): + """Implements an image classification class. Provides support + for timm augmentation and loss functions.""" + + def __init__(self, hparams, *args, **kwargs): + super().__init__(hparams, *args, **kwargs) + + if hparams.model == "phinet": + self.modules["classifier"] = PhiNet( + input_shape=hparams.input_shape, + alpha=hparams.alpha, + num_layers=hparams.num_layers, + beta=hparams.beta, + t_zero=hparams.t_zero, + compatibility=False, + divisor=hparams.divisor, + downsampling_layers=hparams.downsampling_layers, + return_layers=hparams.return_layers, + # classification-specific + include_top=True, + num_classes=hparams.num_classes, + ) + elif hparams.model == "xinet": + self.modules["classifier"] = XiNet( + input_shape=hparams.input_shape, + alpha=hparams.alpha, + gamma=hparams.gamma, + num_layers=hparams.num_layers, + return_layers=hparams.return_layers, + # classification-specific + include_top=True, + num_classes=hparams.num_classes, + ) + + tot_params = 0 + for m in self.modules.values(): + temp = summary(m, verbose=0) + tot_params += temp.total_params + + self.mixup_fn, _ = setup_mixup(hparams) + + print(f"Total parameters of model: {tot_params * 1e-6:.2f} M") + + def setup_criterion(self): + """Setup of the loss function based on augmentation strategy.""" + # setup loss function + if ( + self.hparams.mixup > 0 + or self.hparams.cutmix > 0.0 + or self.hparams.cutmix_minmax is not None + ): + # smoothing is handled with mixup target transform which outputs sparse, + # soft targets + if self.hparams.bce_loss: + train_loss_fn = BinaryCrossEntropy( + target_threshold=self.hparams.bce_target_thresh + ) + else: + train_loss_fn = SoftTargetCrossEntropy() + elif self.hparams.smoothing: + if self.hparams.bce_loss: + train_loss_fn = BinaryCrossEntropy( + smoothing=self.hparams.smoothing, + target_threshold=self.hparams.bce_target_thresh, + ) + else: + train_loss_fn = LabelSmoothingCrossEntropy( + smoothing=self.hparams.smoothing + ) + else: + train_loss_fn = nn.CrossEntropyLoss() + + return train_loss_fn + + def forward(self, batch): + """Computes forward step for image classifier. + + Arguments + --------- + batch : List[torch.Tensor, torch.Tensor] + Batch containing the images and labels. + + Returns + ------- + Predicted class and augmented class. : Tuple[torch.Tensor, torch.Tensor] + """ + img, target = batch + if not self.hparams.prefetcher: + img, target = img.to(self.device), target.to(self.device) + if self.mixup_fn is not None: + img, target = self.mixup_fn(img, target) + + return (self.modules["classifier"](img), target) + + def compute_loss(self, pred, batch): + """Sets up the loss function and computes the criterion. + + Arguments + --------- + pred : Tuple[torch.Tensor, torch.Tensor] + Predicted class and augmented class. + batch : List[torch.Tensor, torch.Tensor] + Same batch as input to the forward step. + + Returns + ------- + Cost function. : torch.Tensor + """ + self.criterion = self.setup_criterion() + + # taking it from pred because it might be augmented + return self.criterion(pred[0], pred[1]) + + def configure_optimizers(self): + """Configures the optimizes and, eventually the learning rate scheduler.""" + opt = torch.optim.Adam(self.modules.parameters(), lr=3e-4, weight_decay=0.0005) + return opt + + +def top_k_accuracy(k=1): + """ + Computes the top-K accuracy. + + Arguments + --------- + k : int + Number of top elements to consider for accuracy. + + Returns + ------- + accuracy : Callable + Top-K accuracy. + """ + + def acc(pred, batch): + if pred[1].ndim == 2: + target = pred[1].argmax(1) + else: + target = pred[1] + _, indices = torch.topk(pred[0], k, dim=1) + correct = torch.sum(indices == target.view(-1, 1)) + accuracy = correct.item() / target.size(0) + + return torch.Tensor([accuracy]).to(pred[0].device) + + return acc + + +if __name__ == "__main__": + assert len(sys.argv) > 1, "Please pass the configuration file to the script." + hparams = parse_configuration(sys.argv[1]) + + train_loader, val_loader = create_loaders(hparams) + + exp_folder = mm.utils.checkpointer.create_experiment_folder( + hparams.output_folder, hparams.experiment_name + ) + + checkpointer = mm.utils.checkpointer.Checkpointer( + exp_folder, hparams=hparams, key="loss" + ) + + mind = ImageClassification(hparams=hparams) + + top1 = mm.Metric("top1_acc", top_k_accuracy(k=1), eval_only=True) + top5 = mm.Metric("top5_acc", top_k_accuracy(k=5), eval_only=True) + + mind.train( + epochs=hparams.epochs, + datasets={"train": train_loader, "val": val_loader}, + metrics=[top5, top1], + checkpointer=checkpointer, + debug=hparams.debug, + ) + + mind.test(datasets={"test": val_loader}, metrics=[top1, top5]) diff --git a/recipes/object_detection/README.md b/recipes/object_detection/README.md new file mode 100644 index 00000000..b7d8bd2f --- /dev/null +++ b/recipes/object_detection/README.md @@ -0,0 +1,58 @@ +## Object Detection using YOLO + +**Disclaimer**: we will shortly releease HuggingFace checkpoints for COCO and VOC for both PhiNet and XiNet. + +In an attempt to showcase the simplicity of the YOLO object detection pipeline, we propose our implementation +free of the many abstraction layers of current state-of-the-art implementations. In fact, our implementation targets having not more than two abstraction layers, so that changes and improvements are transparent and reproducibile. + +This recipe uses some components from state-of-the-art object detection pipelines (via ultralytics), and supports distributed training. + +To reproduce our results, you can follow these steps: + +1. install PhiNets with `pip install git+https://github.com/fpaissan/micromind` +2. install the additional dependencies for this recipe with `pip install -r extra_requirements.txt` +3. start a training! + +### Training + +The experiment's configuration is stored inside the files in the `cfg` folder. They can be overridden simply from the command line by providing a new value. To start a training on COCO using YOLOPhiNet, you can use: +``` +python train.py cfg/yolo_phinet.py +``` + +### Inference +In order to export the model and/or run an inference using PyTorch, you can pass an image and the path to a pretrained model to the inference script. +For this, you can use this command: +``` +python inference.py cfg/yolo_phinet.py IMG_PATH --ckpt_pretrained MODEL_PATH +``` + +This will print the predicted output, and save an ONNX model in `model.onnx`. + +#### Referencing PhiNet +If you use PhiNet or `micromind`, please cite our work: +``` +@article{Paissan_2022_TECS, + author = {Paissan, Francesco and Ancilotto, Alberto and Farella, Elisabetta}, + title = {PhiNets: A Scalable Backbone for Low-Power AI at the Edge}, + year = {2022}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3510832}, + doi = {10.1145/3510832}, + journal = {ACM Trans. Embed. Comput. Syst.}, +} +``` + +#### Referencing XiNet +If you use XiNet or `micromind`, please cite our work: +``` +@InProceedings{Ancilotto_2023_ICCV, + author = {Ancilotto, Alberto and Paissan, Francesco and Farella, Elisabetta}, + title = {XiNet: Efficient Neural Networks for tinyML}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2023}, + pages = {16968-16977} +} +``` diff --git a/recipes/object_detection/cfg/data/coco.names b/recipes/object_detection/cfg/data/coco.names new file mode 100644 index 00000000..ca76c80b --- /dev/null +++ b/recipes/object_detection/cfg/data/coco.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/recipes/object_detection/cfg/data/coco.yaml b/recipes/object_detection/cfg/data/coco.yaml new file mode 100644 index 00000000..55786935 --- /dev/null +++ b/recipes/object_detection/cfg/data/coco.yaml @@ -0,0 +1,151 @@ +######## +# Data configuration file for COCO8 trainings. +# Based on the ultralytics data conf. +# +# Adapted by: +# - Matteo Beltrami, 2023 +# - Francesco Paissan, 2023 +######## +task: detect # (str) YOLO task, i.e. detect, segment, classify, pose +mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark + +# Train settings ------------------------------------------------------------------------------------------------------- +imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes +rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val' +cache: False # (bool) True/ram, disk or False. Use cache for data loading +single_cls: False # (bool) train multi-class data as single-class +fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set) + +# Segmentation +overlap_mask: True # (bool) masks should overlap during training (segment train only) +mask_ratio: 4 # (int) mask downsample ratio (segment train only) + +# Prediction settings -------------------------------------------------------------------------------------------------- +classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3] + +# Hyperparameters ------------------------------------------------------------------------------------------------------ +box: 7.5 # (float) box loss gain +cls: 0.5 # (float) cls loss gain (scale with pixels) +dfl: 1.5 # (float) dfl loss gain + +hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction) +degrees: 0.0 # (float) image rotation (+/- deg) +translate: 0.1 # (float) image translation (+/- fraction) +scale: 0.5 # (float) image scale (+/- gain) +shear: 0.0 # (float) image shear (+/- deg) +perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # (float) image flip up-down (probability) +fliplr: 0.5 # (float) image flip left-right (probability) +mosaic: 1.0 # (float) image mosaic (probability) +mixup: 0.0 # (float) image mixup (probability) +copy_paste: 0.0 # (float) segment copy-paste (probability) + + +# Dataset location +path: /mnt/data/coco # dataset root dir +train: train2017.txt # train images (relative to 'path') 118287 images +val: val2017.txt # val images (relative to 'path') 5000 images +test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 + +# Classes +names: + 0: person + 1: bicycle + 2: car + 3: motorcycle + 4: airplane + 5: bus + 6: train + 7: truck + 8: boat + 9: traffic light + 10: fire hydrant + 11: stop sign + 12: parking meter + 13: bench + 14: bird + 15: cat + 16: dog + 17: horse + 18: sheep + 19: cow + 20: elephant + 21: bear + 22: zebra + 23: giraffe + 24: backpack + 25: umbrella + 26: handbag + 27: tie + 28: suitcase + 29: frisbee + 30: skis + 31: snowboard + 32: sports ball + 33: kite + 34: baseball bat + 35: baseball glove + 36: skateboard + 37: surfboard + 38: tennis racket + 39: bottle + 40: wine glass + 41: cup + 42: fork + 43: knife + 44: spoon + 45: bowl + 46: banana + 47: apple + 48: sandwich + 49: orange + 50: broccoli + 51: carrot + 52: hot dog + 53: pizza + 54: donut + 55: cake + 56: chair + 57: couch + 58: potted plant + 59: bed + 60: dining table + 61: toilet + 62: tv + 63: laptop + 64: mouse + 65: remote + 66: keyboard + 67: cell phone + 68: microwave + 69: oven + 70: toaster + 71: sink + 72: refrigerator + 73: book + 74: clock + 75: vase + 76: scissors + 77: teddy bear + 78: hair drier + 79: toothbrush + + +# Download script/URL (optional) +download: | + from ultralytics.utils.downloads import download + from pathlib import Path + + # Download labels + segments = True # segment or box labels + dir = Path(data_cfg['path']) # dataset root dir + url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/' + urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels + download(urls, dir=dir.parent) + # Download data + urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images + 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images + 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional) + download(urls, dir=dir / 'images', threads=3) diff --git a/recipes/object_detection/cfg/data/coco8.yaml b/recipes/object_detection/cfg/data/coco8.yaml new file mode 100644 index 00000000..64927885 --- /dev/null +++ b/recipes/object_detection/cfg/data/coco8.yaml @@ -0,0 +1,144 @@ +######## +# Data configuration file for COCO8 trainings. +# Based on the ultralytics data conf. +# +# Adapted by: +# - Matteo Beltrami, 2023 +# - Francesco Paissan, 2023 +######## +task: detect # (str) YOLO task, i.e. detect, segment, classify, pose +mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark + +# Train settings ------------------------------------------------------------------------------------------------------- +imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes +rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val' +cache: False # (bool) True/ram, disk or False. Use cache for data loading +single_cls: False # (bool) train multi-class data as single-class +fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set) + +# Segmentation +overlap_mask: True # (bool) masks should overlap during training (segment train only) +mask_ratio: 4 # (int) mask downsample ratio (segment train only) + +# Prediction settings -------------------------------------------------------------------------------------------------- +classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3] + +# Hyperparameters ------------------------------------------------------------------------------------------------------ +box: 7.5 # (float) box loss gain +cls: 0.5 # (float) cls loss gain (scale with pixels) +dfl: 1.5 # (float) dfl loss gain + +hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction) +hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction) +hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction) +degrees: 0.0 # (float) image rotation (+/- deg) +translate: 0.1 # (float) image translation (+/- fraction) +scale: 0.5 # (float) image scale (+/- gain) +shear: 0.0 # (float) image shear (+/- deg) +perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001 +flipud: 0.0 # (float) image flip up-down (probability) +fliplr: 0.5 # (float) image flip left-right (probability) +mosaic: 1.0 # (float) image mosaic (probability) +mixup: 0.0 # (float) image mixup (probability) +copy_paste: 0.0 # (float) segment copy-paste (probability) + + +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +path: /mnt/data/coco8 # dataset root dir +train: images/train # train images (relative to 'path') 4 images +val: images/val # val images (relative to 'path') 4 images +test: # test images (optional) + +# Classes +names: + 0: person + 1: bicycle + 2: car + 3: motorcycle + 4: airplane + 5: bus + 6: train + 7: truck + 8: boat + 9: traffic light + 10: fire hydrant + 11: stop sign + 12: parking meter + 13: bench + 14: bird + 15: cat + 16: dog + 17: horse + 18: sheep + 19: cow + 20: elephant + 21: bear + 22: zebra + 23: giraffe + 24: backpack + 25: umbrella + 26: handbag + 27: tie + 28: suitcase + 29: frisbee + 30: skis + 31: snowboard + 32: sports ball + 33: kite + 34: baseball bat + 35: baseball glove + 36: skateboard + 37: surfboard + 38: tennis racket + 39: bottle + 40: wine glass + 41: cup + 42: fork + 43: knife + 44: spoon + 45: bowl + 46: banana + 47: apple + 48: sandwich + 49: orange + 50: broccoli + 51: carrot + 52: hot dog + 53: pizza + 54: donut + 55: cake + 56: chair + 57: couch + 58: potted plant + 59: bed + 60: dining table + 61: toilet + 62: tv + 63: laptop + 64: mouse + 65: remote + 66: keyboard + 67: cell phone + 68: microwave + 69: oven + 70: toaster + 71: sink + 72: refrigerator + 73: book + 74: clock + 75: vase + 76: scissors + 77: teddy bear + 78: hair drier + 79: toothbrush + +# Download script/URL (optional) +download: | + from pathlib import Path + import zipfile + import os + data_cfg['path'] = Path(data_cfg['path']) + os.makedirs(data_cfg["path"], exist_ok=True) + os.system(f"wget https://ultralytics.com/assets/coco8.zip -O {os.path.join(data_cfg['path'], 'coco8.zip')}") + with zipfile.ZipFile(os.path.join(data_cfg['path'], 'coco8.zip'), 'r') as zip_ref: + zip_ref.extractall(data_cfg['path'].parent) diff --git a/recipes/object_detection/cfg/yolo_phinet.py b/recipes/object_detection/cfg/yolo_phinet.py new file mode 100644 index 00000000..01940ff5 --- /dev/null +++ b/recipes/object_detection/cfg/yolo_phinet.py @@ -0,0 +1,26 @@ +""" +YOLOPhiNet training configuration. + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 +""" +# Data configuration +batch_size = 8 +data_cfg = "cfg/data/coco.yaml" +data_dir = "data/coco8" + +# Model configuration +input_shape = (3, 672, 672) +alpha = 2.3 +num_layers = 7 +beta = 0.75 +t_zero = 5 +divisor = 8 +downsampling_layers = [5, 7] +return_layers = [4, 6, 7] + +# Placeholder for inference +ckpt_pretrained = "" +output_dir = "detection_output" +coco_names = "cfg/data/coco.names" diff --git a/recipes/object_detection/inference.py b/recipes/object_detection/inference.py new file mode 100644 index 00000000..b971424b --- /dev/null +++ b/recipes/object_detection/inference.py @@ -0,0 +1,108 @@ +""" +YOLOv8 inference. + +This code allows you to launch an object detection inference using a YOLO MicroMind. + +To run this script, you should pass the checkpoint with the weights and the path to +an image, following this example: + python inference.py cfg/yolo_phinet.py IMG_PATH --ckpt_pretrained CHECKPOINT_PATH + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 + +""" + +import sys +import time +from pathlib import Path + +import torch +import torchvision + +from micromind.utils import parse_configuration +from micromind.utils.yolo import ( + draw_bounding_boxes_and_save, + postprocess, + preprocess, +) +from train import YOLO + + +class Inference(YOLO): + def __init__(self, hparams): + super().__init__(hparams=hparams, m_cfg={}) + + def forward(self, batch): + """Executes the detection network. + + Arguments + --------- + bacth : List[torch.Tensor] + Input to the detection network. + + Returns + ------- + Output of the detection network : torch.Tensor + """ + backbone = self.modules["phinet"](batch[0])[1] + backbone[-1] = self.modules["sppf"](backbone[-1]) + neck = self.modules["neck"](*backbone) + head = self.modules["head"](neck) + return head + + +if __name__ == "__main__": + assert len(sys.argv) > 2, " ".join( + "Something went wrong when launching the script. \ + Please check the arguments.".split( + " " + ) + ) + + hparams = parse_configuration(sys.argv[1]) + + output_folder_path = Path(hparams.output_dir) + output_folder_path.mkdir(parents=True, exist_ok=True) + img_paths = [sys.argv[2]] + for img_path in img_paths: + image = torchvision.io.read_image(img_path) + out_paths = [ + ( + output_folder_path + / f"{Path(img_path).stem}_output{Path(img_path).suffix}" + ).as_posix() + ] + if not isinstance(image, torch.Tensor): + print("Error in image loading. Check your image file.") + sys.exit(1) + + pre_processed_image = preprocess(image) + + model = Inference(hparams) + # Load pretrained if passed. + if hparams.ckpt_pretrained != "": + model.load_modules(hparams.ckpt_pretrained) + print(f"Pretrained model loaded from {hparams.ckpt_pretrained}.") + else: + print("Running inference with no weights.") + + model.eval() + + with torch.no_grad(): + st = time.time() + predictions = model((pre_processed_image, None)) + print(f"Inference took {int(round(((time.time() - st) * 1000)))}ms") + post_predictions = postprocess( + preds=predictions[0], img=pre_processed_image, orig_imgs=image + ) + + class_labels = [s.strip() for s in open(hparams.coco_names, "r").readlines()] + draw_bounding_boxes_and_save( + orig_img_paths=img_paths, + output_img_paths=out_paths, + all_predictions=post_predictions, + class_labels=class_labels, + ) + # Exporting onnx model. + model.export("model.onnx", "onnx", hparams.input_shape) diff --git a/recipes/object_detection/prepare_data.py b/recipes/object_detection/prepare_data.py new file mode 100644 index 00000000..bb3b570f --- /dev/null +++ b/recipes/object_detection/prepare_data.py @@ -0,0 +1,88 @@ +""" +Data preparation script for YOLO training. Parses ultralytics yaml files +and, if needed, downloads them on disk. + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 +""" +from typing import Dict +import os + +from torch.utils.data import DataLoader, ConcatDataset +from ultralytics.data import build_yolo_dataset + + +def create_loaders(m_cfg: Dict, data_cfg: Dict, batch_size: int): + """Creates DataLoaders for dataset specified in the configuration file. + Refer to ... for how to select the proper configuration. + + Arguments + --------- + m_cfg : Dict + Contains information about the training process (e.g., data augmentation). + data_cfg : Dict + Contains details about the data configurations (e.g., image size, etc.). + batch_size : int + Batch size for the training process. + + """ + if "download" in data_cfg and not os.path.exists(data_cfg["path"]): + # download data if it's not there + exec(data_cfg["download"]) + + mode = "train" + if isinstance(data_cfg["train"], list): + train_set = [] + for p in data_cfg["train"]: + train_set.append( + build_yolo_dataset( + m_cfg, + p, + batch_size, + data_cfg, + mode=mode, + rect=mode == "val", + ) + ) + train_set = ConcatDataset(train_set) + train_set = build_yolo_dataset( + m_cfg, + data_cfg["train"], + batch_size, + data_cfg, + mode=mode, + rect=mode == "val", + ) + + train_loader = DataLoader( + train_set, + batch_size, + shuffle=True, + num_workers=16, + persistent_workers=True, + pin_memory=True, + collate_fn=getattr(train_set, "collate_fn", None), + ) + + mode = "val" + val_set = build_yolo_dataset( + m_cfg, + data_cfg["val"], + batch_size, + data_cfg, + mode=mode, + rect=mode == "val", + ) + + val_loader = DataLoader( + val_set, + batch_size, + shuffle=False, + num_workers=16, + persistent_workers=True, + pin_memory=True, + collate_fn=getattr(val_set, "collate_fn", None), + ) + + return train_loader, val_loader diff --git a/recipes/object_detection/train.py b/recipes/object_detection/train.py new file mode 100644 index 00000000..bbbc1af9 --- /dev/null +++ b/recipes/object_detection/train.py @@ -0,0 +1,234 @@ +""" +YOLO training. + +This code allows you to train an object detection model with the YOLOv8 neck and loss. + +To run this script, you can start it with: + python train.py cfg/yolo_phinet.py + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 +""" + +import torch +from prepare_data import create_loaders +from torchinfo import summary +from ultralytics.utils.ops import scale_boxes, xywh2xyxy +from yolo_loss import Loss + +import micromind as mm +from micromind.networks import PhiNet +from micromind.networks.yolo import SPPF, DetectionHead, Yolov8Neck +from micromind.utils import parse_configuration +from micromind.utils.yolo import ( + load_config, + mean_average_precision, + postprocess, +) +import sys +import os + + +class YOLO(mm.MicroMind): + def __init__(self, m_cfg, hparams, *args, **kwargs): + """Initializes the YOLO model.""" + super().__init__(*args, **kwargs) + + self.modules["phinet"] = PhiNet( + input_shape=hparams.input_shape, + alpha=hparams.alpha, + num_layers=hparams.num_layers, + beta=hparams.beta, + t_zero=hparams.t_zero, + include_top=False, + compatibility=False, + divisor=hparams.divisor, + downsampling_layers=hparams.downsampling_layers, + return_layers=hparams.return_layers, + ) + + sppf_ch, neck_filters, up, head_filters = self.get_parameters() + + self.modules["sppf"] = SPPF(*sppf_ch) + self.modules["neck"] = Yolov8Neck(filters=neck_filters, up=up) + self.modules["head"] = DetectionHead(filters=head_filters) + + tot_params = 0 + for m in self.modules.values(): + temp = summary(m, verbose=0) + tot_params += temp.total_params + + print(f"Total parameters of model: {tot_params * 1e-6:.2f} M") + + self.m_cfg = m_cfg + + def get_parameters(self): + """ + Gets the parameters with which to initialize the network detection part + (SPPF block, Yolov8Neck, DetectionHead). + """ + in_shape = self.modules["phinet"].input_shape + x = torch.randn(1, *in_shape) + y = self.modules["phinet"](x) + + c1 = c2 = y[1][2].shape[1] + sppf = SPPF(c1, c2) + out_sppf = sppf(y[1][2]) + + neck_filters = [y[1][0].shape[1], y[1][1].shape[1], out_sppf.shape[1]] + up = [2, 2] + up[0] = y[1][1].shape[2] / out_sppf.shape[2] + up[1] = y[1][0].shape[2] / (up[0] * out_sppf.shape[2]) + temp = """The layers you selected are not valid. \ + Please choose only layers between which the spatial resolution \ + doubles every time. Eventually, you can achieve this by \ + changing the downsampling layers.""" + + assert up == [2, 2], " ".join(temp.split()) + + neck = Yolov8Neck(filters=neck_filters, up=up) + out_neck = neck(y[1][0], y[1][1], out_sppf) + + head_filters = ( + out_neck[0].shape[1], + out_neck[1].shape[1], + out_neck[2].shape[1], + ) + + return (c1, c2), neck_filters, up, head_filters + + def preprocess_batch(self, batch): + """Preprocesses a batch of images by scaling and converting to float.""" + preprocessed_batch = {} + preprocessed_batch["img"] = ( + batch["img"].to(self.device, non_blocking=True).float() / 255 + ) + for k in batch: + if isinstance(batch[k], torch.Tensor) and k != "img": + preprocessed_batch[k] = batch[k].to(self.device) + + return preprocessed_batch + + def forward(self, batch): + """Runs the forward method by calling every module.""" + preprocessed_batch = self.preprocess_batch(batch) + backbone = self.modules["phinet"](preprocessed_batch["img"].to(self.device))[1] + backbone[-1] = self.modules["sppf"](backbone[-1]) + neck = self.modules["neck"](*backbone) + head = self.modules["head"](neck) + + return head + + def compute_loss(self, pred, batch): + """Computes the loss.""" + self.criterion = Loss(self.m_cfg, self.modules["head"], self.device) + preprocessed_batch = self.preprocess_batch(batch) + + lossi_sum, lossi = self.criterion( + pred[1], + preprocessed_batch, + ) + + return lossi_sum + + def configure_optimizers(self): + """Configures the optimizer and the scheduler.""" + opt = torch.optim.SGD(self.modules.parameters(), lr=1e-2, weight_decay=0.0005) + sched = torch.optim.lr_scheduler.CosineAnnealingLR( + opt, T_max=14000, eta_min=1e-3 + ) + return opt, sched + + @torch.no_grad() + def mAP(self, pred, batch): + """Compute the mean average precision (mAP) for a batch of predictions. + + Arguments + --------- + pred : torch.Tensor + Model predictions for the batch. + batch : dict + A dictionary containing batch information, including bounding boxes, + classes and shapes. + + Returns + ------- + torch.Tensor + A tensor containing the computed mean average precision (mAP) for the batch. + """ + preprocessed_batch = self.preprocess_batch(batch) + post_predictions = postprocess( + preds=pred[0], img=preprocessed_batch, orig_imgs=batch + ) + + batch_bboxes_xyxy = xywh2xyxy(batch["bboxes"]) + dim = batch["resized_shape"][0][0] + batch_bboxes_xyxy[:, :4] *= dim + + batch_bboxes = [] + for i in range(len(batch["batch_idx"])): + for b in range(len(batch_bboxes_xyxy[batch["batch_idx"] == i, :])): + batch_bboxes.append( + scale_boxes( + batch["resized_shape"][i], + batch_bboxes_xyxy[batch["batch_idx"] == i, :][b], + batch["ori_shape"][i], + ) + ) + batch_bboxes = torch.stack(batch_bboxes) + mmAP = mean_average_precision(post_predictions, batch, batch_bboxes) + + return torch.Tensor([mmAP]) + + +def replace_datafolder(hparams, data_cfg): + """Replaces the data root folder, if told to do so from the configuration.""" + data_cfg["path"] = str(data_cfg["path"]) + data_cfg["path"] = ( + data_cfg["path"][:-1] if data_cfg["path"][-1] == "/" else data_cfg["path"] + ) + for key in ["train", "val"]: + if hasattr(hparams, "data_dir"): + if hparams.data_dir != data_cfg["path"]: + data_cfg[key] = str(data_cfg[key]).replace(data_cfg["path"], "") + data_cfg[key] = ( + data_cfg[key][1:] if data_cfg[key][0] == "/" else data_cfg[key] + ) + data_cfg[key] = os.path.join(hparams.data_dir, data_cfg[key]) + + data_cfg["path"] = hparams.data_dir + + return data_cfg + + +if __name__ == "__main__": + assert len(sys.argv) > 1, "Please pass the configuration file to the script." + hparams = parse_configuration(sys.argv[1]) + + m_cfg, data_cfg = load_config(hparams.data_cfg) + + # check if specified path for images is different, correct it in case + data_cfg = replace_datafolder(hparams, data_cfg) + + train_loader, val_loader = create_loaders(m_cfg, data_cfg, hparams.batch_size) + + exp_folder = mm.utils.checkpointer.create_experiment_folder( + hparams.output_folder, hparams.experiment_name + ) + + checkpointer = mm.utils.checkpointer.Checkpointer( + exp_folder, hparams=hparams, key="loss" + ) + + yolo_mind = YOLO(m_cfg, hparams=hparams) + + mAP = mm.Metric("mAP", yolo_mind.mAP, eval_only=True, eval_period=1) + + yolo_mind.train( + epochs=200, + datasets={"train": train_loader, "val": val_loader}, + metrics=[mAP], + checkpointer=checkpointer, + debug=hparams.debug, + ) diff --git a/recipes/object_detection/yolo_loss.py b/recipes/object_detection/yolo_loss.py new file mode 100644 index 00000000..29650a60 --- /dev/null +++ b/recipes/object_detection/yolo_loss.py @@ -0,0 +1,137 @@ +""" +Wrapper for the YOLO loss, from the ultralytics implementation. +For a reference on the parameters, please refer to https://shorturl.at/gkrAO + + +Authors: + - Matteo Beltrami, 2023 + - Francesco Paissan, 2023 +""" +import torch +import torch.nn as nn +from ultralytics.utils.loss import BboxLoss, v8DetectionLoss +from ultralytics.utils.ops import xywh2xyxy +from ultralytics.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors + + +class Loss(v8DetectionLoss): + def __init__(self, h, m, device): # model must be de-paralleled + self.bce = nn.BCEWithLogitsLoss(reduction="none") + self.hyp = h + self.stride = m.stride + self.nc = m.nc + self.no = m.no + self.reg_max = m.reg_max + self.device = device + + self.use_dfl = m.reg_max > 1 + + self.assigner = TaskAlignedAssigner( + topk=10, num_classes=self.nc, alpha=0.5, beta=6.0 + ) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device) + self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device) + + def preprocess(self, targets, batch_size, scale_tensor): + """ + Preprocesses the target counts and matches with the input batch size + to output a tensor. + """ + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + counts = counts.to(dtype=torch.int32) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + """ + Decode predicted object bounding box coordinates from anchor points and + distribution. + """ + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = ( + pred_dist.view(b, a, 4, c // 4) + .softmax(3) + .matmul(self.proj.type(pred_dist.dtype)) + ) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, preds, batch): + """ + Calculate the sum of the loss for box, cls and dfl multiplied by batch size. + """ + loss = torch.zeros(3, device=self.device) # box, cls, dfl + feats = preds[1] if isinstance(preds, tuple) else preds + pred_distri, pred_scores = torch.cat( + [xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2 + ).split((self.reg_max * 4, self.nc), 1) + + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size = pred_scores.shape[0] + imgsz = ( + torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) + * self.stride[0] + ) # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # Targets + targets = torch.cat( + (batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), + 1, + ) + targets = self.preprocess( + targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]] + ) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # Pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + _, target_bboxes, target_scores, fg_mask, _ = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt, + ) + + target_scores_sum = max(target_scores.sum(), 1) + + # Cls loss + loss[1] = ( + self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum + ) # BCE + + # Bbox loss + if fg_mask.sum(): + target_bboxes /= stride_tensor + loss[0], loss[2] = self.bbox_loss( + pred_distri, + pred_bboxes, + anchor_points, + target_bboxes, + target_scores, + target_scores_sum, + fg_mask, + ) + + loss[0] *= self.hyp.box # box gain + loss[1] *= self.hyp.cls # cls gain + loss[2] *= self.hyp.dfl # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) diff --git a/recipes/tinyCLAP/README.md b/recipes/tinyCLAP/README.md new file mode 100644 index 00000000..330971ee --- /dev/null +++ b/recipes/tinyCLAP/README.md @@ -0,0 +1,12 @@ +This folder will contatin the code for the paper of [tinyCLAP](https://arxiv.org/abs/2311.14517). + +``` +@misc{paissan2023tinyclap, + title={tinyCLAP: Distilling Constrastive Language-Audio Pretrained Models}, + author={Francesco Paissan and Elisabetta Farella}, + year={2023}, + eprint={2311.14517}, + archivePrefix={arXiv}, + primaryClass={cs.SD} +} +``` diff --git a/tests/test_networks.py b/tests/test_networks.py index ed489bdf..9d75f8b7 100644 --- a/tests/test_networks.py +++ b/tests/test_networks.py @@ -7,9 +7,9 @@ import torch -def test_onnx(): - from micromind.networks import PhiNet +def test_onnx_phinet(): from micromind.convert import convert_to_onnx + from micromind.networks import PhiNet save_path = "temp.onnx" @@ -28,9 +28,30 @@ def test_onnx(): os.remove(save_path) -def test_openvino(): - from micromind.networks import PhiNet +def test_onnx_xinet(): + from micromind.convert import convert_to_onnx + from micromind.networks import XiNet + + save_path = "temp.onnx" + + in_shape = (3, 224, 224) + net = XiNet(in_shape, include_top=True) + + convert_to_onnx(net, save_path, simplify=True) + import os + + os.remove(save_path) + + convert_to_onnx(net, save_path, simplify=False) + + import os + + os.remove(save_path) + + +def test_openvino_phinet(): from micromind.convert import convert_to_openvino + from micromind.networks import PhiNet save_dir = "vino" @@ -44,9 +65,25 @@ def test_openvino(): shutil.rmtree(save_dir) -def test_tflite(): - from micromind.networks import PhiNet +def test_openvino_xinet(): + from micromind.convert import convert_to_openvino + from micromind.networks import XiNet + + save_dir = "vino" + + in_shape = (3, 224, 224) + net = XiNet(in_shape) + + convert_to_openvino(net, save_dir) + + import shutil + + shutil.rmtree(save_dir) + + +def test_tflite_phinet(): from micromind.convert import convert_to_tflite + from micromind.networks import PhiNet save_path = "tflite" @@ -65,3 +102,26 @@ def test_tflite(): import shutil shutil.rmtree(save_path) + + +def test_tflite_xinet(): + from micromind.convert import convert_to_tflite + from micromind.networks import XiNet + + save_path = "tflite" + + in_shape = (3, 224, 224) + net = XiNet(in_shape) + + convert_to_tflite(net, save_path) + + import shutil + + shutil.rmtree(save_path) + + temp = torch.Tensor(100, in_shape[1], in_shape[2], in_shape[0]) + convert_to_tflite(net, save_path, temp) + + import shutil + + shutil.rmtree(save_path) From a131b77be6e1c869af0e2ccf66fe976d52f96496 Mon Sep 17 00:00:00 2001 From: Francesco Paissan <46992226+fpaissan@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:11:11 +0100 Subject: [PATCH 2/2] fix linters --- recipes/image_classification/train.py | 2 +- recipes/object_detection/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/image_classification/train.py b/recipes/image_classification/train.py index 296ecf2b..9680fd21 100644 --- a/recipes/image_classification/train.py +++ b/recipes/image_classification/train.py @@ -61,7 +61,7 @@ def __init__(self, hparams, *args, **kwargs): include_top=True, num_classes=hparams.num_classes, ) - + self.mixup_fn, _ = setup_mixup(hparams) print("Number of parameters for each module:") diff --git a/recipes/object_detection/train.py b/recipes/object_detection/train.py index 79f85df9..bdc213c2 100644 --- a/recipes/object_detection/train.py +++ b/recipes/object_detection/train.py @@ -172,7 +172,7 @@ def mAP(self, pred, batch): batch["ori_shape"][i], ) ) - + batch_bboxes = torch.stack(batch_bboxes).to(self.device) mmAP = mean_average_precision(post_predictions, batch, batch_bboxes)