Optimize YOLO neck using XiConv (#78)

* started training w/ B0 recipe * Revert "started training w/ B0 recipe" This reverts commit 66cafca. * yolov8opt class - using xinet conv * res 320 * add epochs hparams, default res 640, imgsz from cli * fix inference modules * minor details --------- Co-authored-by: Matteo Beltrami <71525176+matteobeltrami@users.noreply.github.com>
micromind-toolkit · Jan 16, 2024 · e9af6c5 · e9af6c5
1 parent db3854e
commit e9af6c5
Show file tree

Hide file tree

Showing 5 changed files with 87 additions and 5 deletions.
diff --git a/micromind/networks/yolo.py b/micromind/networks/yolo.py
@@ -14,6 +14,7 @@
 import torch.nn.functional as F
 
 from micromind.utils.yolo import autopad, dist2bbox, make_anchors
+from .xinet import XiConv
 
 
 class Upsample:
@@ -455,6 +456,69 @@ def forward(self, p3, p4, p5):
         return return_heads
 
 
+class Yolov8NeckOpt(Yolov8Neck):
+    def __init__(
+        self, filters=[256, 512, 768], up=[2, 2], heads=[True, True, True], d=1
+    ):
+        super().__init__()
+        self.heads = heads
+        self.up1 = Upsample(up[0], mode="nearest")
+        self.up2 = Upsample(up[1], mode="nearest")
+        self.n1 = XiConv(
+            c_in=int(filters[1] + filters[2]),
+            c_out=int(filters[1]),
+            kernel_size=3,
+            gamma=3,
+            skip_tensor_in=False,
+        )
+        self.n2 = XiConv(
+            int(filters[0] + filters[1]),
+            int(filters[0]),
+            kernel_size=3,
+            gamma=3,
+            skip_tensor_in=False,
+        )
+        """
+        Only if we decide to use the 2nd and 3rd detection head we define
+        the needed blocks. Otherwise the not needed blocks would be initialized
+        (and thus would occupy space) but will never be used.
+        """
+        if self.heads[1] or self.heads[2]:
+            self.n3 = XiConv(
+                int(filters[0]),
+                int(filters[0]),
+                kernel_size=3,
+                gamma=3,
+                stride=2,
+                padding=1,
+                skip_tensor_in=False,
+            )
+            self.n4 = XiConv(
+                int(filters[0] + filters[1]),
+                int(filters[1]),
+                kernel_size=3,
+                gamma=3,
+                skip_tensor_in=False,
+            )
+        if self.heads[2]:
+            self.n5 = XiConv(
+                int(filters[1]),
+                int(filters[1]),
+                gamma=3,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                skip_tensor_in=False,
+            )
+            self.n6 = XiConv(
+                int(filters[1] + filters[2]),
+                int(filters[2]),
+                gamma=3,
+                kernel_size=3,
+                skip_tensor_in=False,
+            )
+
+
 class DetectionHead(nn.Module):
     """Implements YOLOv8's detection head.
 

diff --git a/recipes/object_detection/README.md b/recipes/object_detection/README.md
@@ -1,5 +1,6 @@
 ## Object Detection using YOLO
 
+**[16 Jan 2024]** Added optimized YOLO neck, using XiConv. Fixed compatibility with ultralytics weights.<br />
 **[17 Dec 2023]** Add VOC dataset, selective head option, and instructions for dataset download.<br />
 **[1 Dec 2023]** Fix DDP handling and computational graph.
 
@@ -24,6 +25,11 @@ The experiment's configuration is stored inside the files in the `cfg` folder. T
 python train.py cfg/yolo_phinet.py
 ```
 
+If you want to scale the input resolution, you can simply override the argument from the CLI, as in:
+```
+python train.py cfg/yolo_phinet.py --input_shape 3,96,96
+```
+
 ### Inference
 In order to export the model and/or run an inference using PyTorch, you can pass an image and the path to a pretrained model to the inference script.
 For this, you can use this command:

diff --git a/recipes/object_detection/cfg/yolo_phinet.py b/recipes/object_detection/cfg/yolo_phinet.py
@@ -9,9 +9,10 @@
 batch_size = 8
 data_cfg = "cfg/data/coco.yaml"
 data_dir = "data/coco"
+epochs = 200
 
 # Model configuration
-input_shape = (3, 672, 672)
+input_shape = [3, 640, 640]
 alpha = 2.3
 num_layers = 7
 beta = 0.75

diff --git a/recipes/object_detection/inference.py b/recipes/object_detection/inference.py
@@ -45,7 +45,7 @@ def forward(self, img):
         -------
             Output of the detection network : torch.Tensor
         """
-        backbone = self.modules["phinet"](img)
+        backbone = self.modules["backbone"](img)
         neck_input = backbone[1]
         neck_input.append(self.modules["sppf"](backbone[0]))
         neck = self.modules["neck"](*neck_input)
@@ -62,6 +62,11 @@ def forward(self, img):
     )
 
     hparams = parse_configuration(sys.argv[1])
+    if isinstance(hparams.input_shape, str):
+        hparams.input_shape = [
+            int(x) for x in "".join(hparams.input_shape).split(",")
+        ]  # temp solution
+        print(f"Setting input shape to {hparams.input_shape}.")
 
     output_folder_path = Path(hparams.output_dir)
     output_folder_path.mkdir(parents=True, exist_ok=True)

diff --git a/recipes/object_detection/train.py b/recipes/object_detection/train.py
@@ -18,7 +18,7 @@
 
 import micromind as mm
 from micromind.networks import PhiNet
-from micromind.networks.yolo import SPPF, DetectionHead, Yolov8Neck
+from micromind.networks.yolo import SPPF, DetectionHead, Yolov8Neck, Yolov8NeckOpt
 from micromind.utils import parse_configuration
 from micromind.utils.yolo import (
     load_config,
@@ -53,7 +53,7 @@ def __init__(self, m_cfg, hparams, *args, **kwargs):
         )
 
         self.modules["sppf"] = SPPF(*sppf_ch)
-        self.modules["neck"] = Yolov8Neck(
+        self.modules["neck"] = Yolov8NeckOpt(
             filters=neck_filters, up=up, heads=hparams.heads
         )
         self.modules["head"] = DetectionHead(filters=head_filters, heads=hparams.heads)
@@ -231,11 +231,17 @@ def replace_datafolder(hparams, data_cfg):
 if __name__ == "__main__":
     assert len(sys.argv) > 1, "Please pass the configuration file to the script."
     hparams = parse_configuration(sys.argv[1])
+    if isinstance(hparams.input_shape, str):
+        hparams.input_shape = [
+            int(x) for x in "".join(hparams.input_shape).split(",")
+        ]  # temp solution
+        print(f"Setting input shape to {hparams.input_shape}.")
 
     m_cfg, data_cfg = load_config(hparams.data_cfg)
 
     # check if specified path for images is different, correct it in case
     data_cfg = replace_datafolder(hparams, data_cfg)
+    m_cfg.imgsz = hparams.input_shape[-1]  # temp solution
 
     train_loader, val_loader = create_loaders(m_cfg, data_cfg, hparams.batch_size)
 
@@ -252,7 +258,7 @@ def replace_datafolder(hparams, data_cfg):
     mAP = mm.Metric("mAP", yolo_mind.mAP, eval_only=True, eval_period=1)
 
     yolo_mind.train(
-        epochs=200,
+        epochs=hparams.epochs,
         datasets={"train": train_loader, "val": val_loader},
         metrics=[mAP],
         checkpointer=checkpointer,