Skip to content

Commit

Permalink
Optimize YOLO neck using XiConv (#78)
Browse files Browse the repository at this point in the history
* started training w/ B0 recipe

* Revert "started training w/ B0 recipe"

This reverts commit 66cafca.

* yolov8opt class - using xinet conv

* res 320

* add epochs hparams, default res 640, imgsz from cli

* fix inference modules

* minor details

---------

Co-authored-by: Matteo Beltrami <71525176+matteobeltrami@users.noreply.github.com>
  • Loading branch information
fpaissan and matteobeltrami authored Jan 16, 2024
1 parent db3854e commit e9af6c5
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 5 deletions.
64 changes: 64 additions & 0 deletions micromind/networks/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import torch.nn.functional as F

from micromind.utils.yolo import autopad, dist2bbox, make_anchors
from .xinet import XiConv


class Upsample:
Expand Down Expand Up @@ -455,6 +456,69 @@ def forward(self, p3, p4, p5):
return return_heads


class Yolov8NeckOpt(Yolov8Neck):
def __init__(
self, filters=[256, 512, 768], up=[2, 2], heads=[True, True, True], d=1
):
super().__init__()
self.heads = heads
self.up1 = Upsample(up[0], mode="nearest")
self.up2 = Upsample(up[1], mode="nearest")
self.n1 = XiConv(
c_in=int(filters[1] + filters[2]),
c_out=int(filters[1]),
kernel_size=3,
gamma=3,
skip_tensor_in=False,
)
self.n2 = XiConv(
int(filters[0] + filters[1]),
int(filters[0]),
kernel_size=3,
gamma=3,
skip_tensor_in=False,
)
"""
Only if we decide to use the 2nd and 3rd detection head we define
the needed blocks. Otherwise the not needed blocks would be initialized
(and thus would occupy space) but will never be used.
"""
if self.heads[1] or self.heads[2]:
self.n3 = XiConv(
int(filters[0]),
int(filters[0]),
kernel_size=3,
gamma=3,
stride=2,
padding=1,
skip_tensor_in=False,
)
self.n4 = XiConv(
int(filters[0] + filters[1]),
int(filters[1]),
kernel_size=3,
gamma=3,
skip_tensor_in=False,
)
if self.heads[2]:
self.n5 = XiConv(
int(filters[1]),
int(filters[1]),
gamma=3,
kernel_size=3,
stride=2,
padding=1,
skip_tensor_in=False,
)
self.n6 = XiConv(
int(filters[1] + filters[2]),
int(filters[2]),
gamma=3,
kernel_size=3,
skip_tensor_in=False,
)


class DetectionHead(nn.Module):
"""Implements YOLOv8's detection head.
Expand Down
6 changes: 6 additions & 0 deletions recipes/object_detection/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Object Detection using YOLO

**[16 Jan 2024]** Added optimized YOLO neck, using XiConv. Fixed compatibility with ultralytics weights.<br />
**[17 Dec 2023]** Add VOC dataset, selective head option, and instructions for dataset download.<br />
**[1 Dec 2023]** Fix DDP handling and computational graph.

Expand All @@ -24,6 +25,11 @@ The experiment's configuration is stored inside the files in the `cfg` folder. T
python train.py cfg/yolo_phinet.py
```

If you want to scale the input resolution, you can simply override the argument from the CLI, as in:
```
python train.py cfg/yolo_phinet.py --input_shape 3,96,96
```

### Inference
In order to export the model and/or run an inference using PyTorch, you can pass an image and the path to a pretrained model to the inference script.
For this, you can use this command:
Expand Down
3 changes: 2 additions & 1 deletion recipes/object_detection/cfg/yolo_phinet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
batch_size = 8
data_cfg = "cfg/data/coco.yaml"
data_dir = "data/coco"
epochs = 200

# Model configuration
input_shape = (3, 672, 672)
input_shape = [3, 640, 640]
alpha = 2.3
num_layers = 7
beta = 0.75
Expand Down
7 changes: 6 additions & 1 deletion recipes/object_detection/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def forward(self, img):
-------
Output of the detection network : torch.Tensor
"""
backbone = self.modules["phinet"](img)
backbone = self.modules["backbone"](img)
neck_input = backbone[1]
neck_input.append(self.modules["sppf"](backbone[0]))
neck = self.modules["neck"](*neck_input)
Expand All @@ -62,6 +62,11 @@ def forward(self, img):
)

hparams = parse_configuration(sys.argv[1])
if isinstance(hparams.input_shape, str):
hparams.input_shape = [
int(x) for x in "".join(hparams.input_shape).split(",")
] # temp solution
print(f"Setting input shape to {hparams.input_shape}.")

output_folder_path = Path(hparams.output_dir)
output_folder_path.mkdir(parents=True, exist_ok=True)
Expand Down
12 changes: 9 additions & 3 deletions recipes/object_detection/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import micromind as mm
from micromind.networks import PhiNet
from micromind.networks.yolo import SPPF, DetectionHead, Yolov8Neck
from micromind.networks.yolo import SPPF, DetectionHead, Yolov8Neck, Yolov8NeckOpt
from micromind.utils import parse_configuration
from micromind.utils.yolo import (
load_config,
Expand Down Expand Up @@ -53,7 +53,7 @@ def __init__(self, m_cfg, hparams, *args, **kwargs):
)

self.modules["sppf"] = SPPF(*sppf_ch)
self.modules["neck"] = Yolov8Neck(
self.modules["neck"] = Yolov8NeckOpt(
filters=neck_filters, up=up, heads=hparams.heads
)
self.modules["head"] = DetectionHead(filters=head_filters, heads=hparams.heads)
Expand Down Expand Up @@ -231,11 +231,17 @@ def replace_datafolder(hparams, data_cfg):
if __name__ == "__main__":
assert len(sys.argv) > 1, "Please pass the configuration file to the script."
hparams = parse_configuration(sys.argv[1])
if isinstance(hparams.input_shape, str):
hparams.input_shape = [
int(x) for x in "".join(hparams.input_shape).split(",")
] # temp solution
print(f"Setting input shape to {hparams.input_shape}.")

m_cfg, data_cfg = load_config(hparams.data_cfg)

# check if specified path for images is different, correct it in case
data_cfg = replace_datafolder(hparams, data_cfg)
m_cfg.imgsz = hparams.input_shape[-1] # temp solution

train_loader, val_loader = create_loaders(m_cfg, data_cfg, hparams.batch_size)

Expand All @@ -252,7 +258,7 @@ def replace_datafolder(hparams, data_cfg):
mAP = mm.Metric("mAP", yolo_mind.mAP, eval_only=True, eval_period=1)

yolo_mind.train(
epochs=200,
epochs=hparams.epochs,
datasets={"train": train_loader, "val": val_loader},
metrics=[mAP],
checkpointer=checkpointer,
Expand Down

0 comments on commit e9af6c5

Please sign in to comment.