micromind-toolkit · fpaissan · Nov 16, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 23, 2023
diff --git a/micromind/core.py b/micromind/core.py
@@ -18,7 +18,7 @@
 import torch
 import os
 
-from .utils.helpers import select_and_load_checkpoint, get_random_string
+from .utils.helpers import get_random_string
 from .utils.checkpointer import Checkpointer
 
 # This is used ONLY if you are not using argparse to get the hparams
@@ -114,24 +114,13 @@ def reduce(self, stage, clear=False):
         """
 
         if self.reduction == "mean":
-            if clear or (
-                self.history[stage][-1].shape[0] != self.history[stage][0].shape[0]
-            ):
-                # tmp = torch.stack(self.history[stage][:-1]).mean()
-                tmp = torch.stack(self.history[stage]).mean()
-            else:
-                tmp = torch.stack(self.history[stage]).mean()
+            tmp = torch.cat(self.history[stage], dim=0).mean()
         elif self.reduction == "sum":
-            if (
-                clear
-                or self.history[stage][-1].shape[0] != self.history[stage][0].shape[0]
-            ):
-                tmp = torch.stack(self.history[stage][:-1]).sum()
-            else:
-                tmp = torch.stack(self.history[stage]).sum()
+            tmp = torch.cat(self.history[stage], dim=0).sum()
 
         if clear:
             self.history[stage] = []
+
         return tmp.item()
 
 
@@ -314,42 +303,23 @@ def on_train_start(self):
             self.experiment_folder = "tmp_" + get_random_string()
             logger.info(f"Created temporary folder for debug {self.experiment_folder}.")
 
-        save_dir = os.path.join(self.experiment_folder, "save")
-        if os.path.exists(save_dir):
-            if len(os.listdir(save_dir)) != 0:
-                # select which checkpoint and load it.
-                checkpoint, path = select_and_load_checkpoint(save_dir)
-                self.opt, self.lr_sched = self.configure_optimizers()
-                self.opt.load_state_dict(checkpoint["optimizer"])
-                self.lr_sched.load_state_dict(checkpoint["lr_scheduler"])
-                self.start_epoch = checkpoint["epoch"] + 1
-
-                self.load_modules(path)
-
-                if self.accelerator.is_local_main_process:
-                    self.checkpointer = Checkpointer(
-                        checkpoint["key"],
-                        mode=checkpoint["mode"],
-                        checkpoint_path=self.experiment_folder,
-                    )
-
-                    logger.info(f"Loaded existing checkpoint from {path}.")
-            else:
-                self.opt, self.lr_sched = self.configure_optimizers()
-                self.start_epoch = 0
+        accelerate_dir = os.path.join(self.experiment_folder, "save")
+        if os.path.exists(accelerate_dir):
+            self.opt, self.lr_sched = self.configure_optimizers()
 
-                self.checkpointer = Checkpointer(
-                    "val_loss", checkpoint_path=self.experiment_folder
-                )
         else:
             os.makedirs(self.experiment_folder, exist_ok=True)
 
             self.opt, self.lr_sched = self.configure_optimizers()
             self.start_epoch = 0
 
-            self.checkpointer = Checkpointer(
-                "val_loss", checkpoint_path=self.experiment_folder
-            )
+        # handle start_epoch better
+        self.start_epoch = 0
+        self.checkpointer = Checkpointer(
+            "val_loss",
+            checkpoint_path=self.experiment_folder,
+            accelerator=self.accelerator,
+        )
 
         self.accelerator = Accelerator()
         self.device = self.accelerator.device
@@ -362,6 +332,9 @@ def on_train_start(self):
         for i, key in enumerate(list(self.datasets.keys())[::-1]):
             self.datasets[key] = accelerated[-(i + 1)]
 
+        if os.path.exists(accelerate_dir):
+            self.accelerator.load_state(accelerate_dir)
+
     def on_train_end(self):
         """Runs at the end of each training. Cleans up before exiting."""
         if self.hparams.debug:
@@ -430,6 +403,7 @@ def train(
 
                     model_out = self(batch)
                     loss = self.compute_loss(model_out, batch)
+                    loss_epoch += loss.item()
 
                     self.accelerator.backward(loss)
                     self.opt.step()
@@ -443,7 +417,6 @@ def train(
 
                     running_train.update({"train_loss": loss_epoch / (idx + 1)})
 
-                    loss_epoch += loss.item()
                     pbar.set_postfix(**running_train)
 
                     if self.debug and idx > 10:

diff --git a/micromind/networks/phinet.py b/micromind/networks/phinet.py
@@ -480,7 +480,8 @@ def __init__(
         h_swish: bool = True,  # S1
         squeeze_excite: bool = True,  # S1
         divisor: int = 1,
-    ) -> None:
+        return_layers=None
+        ) -> None:
         """This class implements the PhiNet architecture.
 
         Arguments
@@ -509,6 +510,7 @@ def __init__(
         self.t_zero = t_zero
         self.num_layers = num_layers
         self.num_classes = num_classes
+        self.return_layers = return_layers
 
         if compatibility:  # disables operations hard for some platforms
             h_swish = False
@@ -686,6 +688,12 @@ def __init__(
                 ),
             )
 
+        if self.return_layers is not None:
+            print(f"PhiNet configured to return layers {self.return_layers}:")
+            for i in self.return_layers:
+                print(f"Layer {i} - {self._layers[i].__class__}")
+
+
     def forward(self, x):
         """Executes PhiNet network
 
@@ -698,10 +706,15 @@ def forward(self, x):
         ------
             Logits if `include_top=True`, otherwise embeddings : torch.Tensor
         """
-        for layers in self._layers:
+        ret = []
+        for i, layers in enumerate(self._layers):
             x = layers(x)
+            if i in self.return_layers: ret.append(x)
 
         if self.classify:
             x = self.classifier(x)
 
+        if self.return_layers is not None:
+            return x, ret
         return x
+
diff --git a/micromind/utils/checkpointer.py b/micromind/utils/checkpointer.py
@@ -20,12 +20,14 @@ def __init__(
         mode: str = "min",
         top_k: int = 1,
         checkpoint_path: Union[str, Path] = ".",
+        accelerator=None,
     ) -> None:
         assert mode in ["max", "min"], "Checkpointer mode can be only max or min."
         self.key = key
         self.mode = mode
         self.top_k = top_k
 
+        self.accelerator = accelerator
         self.bests = [torch.inf] * self.top_k
         self.check_paths = [""] * self.top_k
         self.root_dir = checkpoint_path
@@ -96,7 +98,12 @@ def __call__(
 
         self.fstream.close()
 
+        self.accelerator.save_state(
+            output_dir=os.path.join(self.save_dir, "accelerate")
+        )
+
         if self.mode == "max":
             return self.check_paths[self.bests.index(max(self.bests))]
         elif self.mode == "min":
             return self.check_paths[self.bests.index(min(self.bests))]
+
diff --git a/micromind/utils/load_params.py b/micromind/utils/load_params.py
@@ -1,8 +1,4 @@
 """
-Authors:
-    - Matteo Beltrami, 2023
-    - Francesco Paissan, 2023
-
 This code is used to obtain a version of the yolov8 model with pre-trained
 weights that can be used with the implementation of the network present in
 this repository.
@@ -12,14 +8,18 @@
 The desired model is downloaded and each model parameter is copied into the
 model state dictionary initialized via our yolov8 implementation.
 The model state dictionary is finally saved in the pytorch .pt format.
+
+Authors:
+    - Matteo Beltrami, 2023
+    - Francesco Paissan, 2023
 """
 
 import torch
 import sys
 import requests
 import os
 
-from micromind.networks.modules import YOLOv8
+from micromind.networks.yolov8 import YOLOv8
 from yolo_helpers import get_variant_multiples
 
 

diff --git a/micromind/utils/yolo_helpers.py b/micromind/utils/yolo_helpers.py
@@ -268,30 +268,36 @@ def compute_transform(
 def preprocess(im, imgsz=640, model_stride=32, model_pt=True):
     """Preprocess a batch of images for inference.
 
-    This function preprocesses a batch of images for inference by
-    resizing, transforming, and normalizing them.
-
-    Arguments
-    ---------
-    im : torch.Tensor or list of torch.Tensor
-        An input image or a batch of images to be preprocessed.
-    imgsz : int, optional
-        The target size of the images after preprocessing.
-        Default is 640.
-    model_stride : int, optional
-        The stride value used for padding calculation when `auto` is True
-        in `compute_transform`. Default is 32.
-    model_pt : bool, optional
-        If True, the function automatically calculates the padding to
-        maintain the same shapes for all input images in the batch.
-        Default is True.
-
-    Returns
-    -------
-    torch.Tensor
-        The preprocessed batch of images as a torch.Tensor with shape
-        (n, 3, h, w), where n is the number of images, 3 represents the
-        RGB channels, and h and w are the height and width of the images.
+        This function preprocesses a batch of images for inference by
+        resizing, transforming, and normalizing them.
+
+        Arguments
+        ---------
+    <<<<<<< HEAD
+        im : torch.Tensor or list of torch.Tensor
+            An input image or a batch of images to be preprocessed.
+    =======
+        im : list of numpy.ndarray or numpy.ndarray
+            A batch of input images to be preprocessed.
+            Can be a list of images or a single image as a numpy array.
+    >>>>>>> c9d189aba6190520147128b8a16b4924003dd7b1
+        imgsz : int, optional
+            The target size of the images after preprocessing.
+            Default is 640.
+        model_stride : int, optional
+            The stride value used for padding calculation when `auto` is True
+            in `compute_transform`. Default is 32.
+        model_pt : bool, optional
+            If True, the function automatically calculates the padding to
+            maintain the same shapes for all input images in the batch.
+            Default is True.
+
+        Returns
+        -------
+        torch.Tensor
+            The preprocessed batch of images as a torch.Tensor with shape
+            (n, 3, h, w), where n is the number of images, 3 represents the
+            RGB channels, and h and w are the height and width of the images.
     """
     auto = model_pt
     im = compute_transform(im, new_shape=imgsz, auto=auto, stride=model_stride)
@@ -528,7 +534,6 @@ def postprocess(preds, img, orig_imgs):
         multi_label=True,
     )
     all_preds = []
-
     for i, pred in enumerate(preds):
         orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
         if isinstance(orig_img, dict):
@@ -749,23 +754,28 @@ def xywh2xyxy(x):
 
 def bbox_format(box):
     """
-    Convert a tensor of coordinates [x1, y1, x2, y2] representing two points
-    defining a rectangle to the format [x_min, y_min, x_max, y_max], where
-    x_min, y_min represent the top-left corner, and x_max, y_max represent the
-    bottom-right corner of the rectangle.
-
-    Arguments
-    ---------
-    box : torch.Tensor
-        A tensor of coordinates in the format [x1, y1, x2, y2] where x1, y1, x2, y2
-        represent the coordinates of two points defining a rectangle.
-
-    Returns
-    -------
-    torch.Tensor
-        The coordinates in the format [x_min, y_min, x_max, y_max] where x_min, y_min
-        represent the top-left vertex, and x_max, y_max represent the bottom-right
-        vertex of the rectangle.
+        Convert a tensor of coordinates [x1, y1, x2, y2] representing two points
+        defining a rectangle to the format [x_min, y_min, x_max, y_max], where
+        x_min, y_min represent the top-left corner, and x_max, y_max represent the
+        bottom-right corner of the rectangle.
+
+        Arguments
+        ---------
+        box : torch.Tensor
+    <<<<<<< HEAD
+            A tensor of coordinates in the format [x1, y1, x2, y2] where x1, y1, x2, y2
+            represent the coordinates of two points defining a rectangle.
+    =======
+            A tensor of coordinates in the format [x1, y1, x2, y2] where x1, y1, x2, y2 represent
+            the coordinates of two points defining a rectangle.
+    >>>>>>> c9d189aba6190520147128b8a16b4924003dd7b1
+
+        Returns
+        -------
+        torch.Tensor
+            The coordinates in the format [x_min, y_min, x_max, y_max] where x_min, y_min
+            represent the top-left vertex, and x_max, y_max represent the bottom-right
+            vertex of the rectangle.
     """
     x1, y1, x2, y2 = box[0], box[1], box[2], box[3]