Face mask detection (#9)

* config dependabot.yml * face_mask_detection example --------- Co-authored-by: KexinFeng <fenkexin@amazon.com>
awslabs · Mar 15, 2023 · 1fe4128 · 1fe4128
1 parent 8eaf7b3
commit 1fe4128
Show file tree

Hide file tree

Showing 10 changed files with 171 additions and 116 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -5,7 +5,7 @@
 
 version: 2
 updates:
-  - package-ecosystem: "" # See documentation for possible values
+  - package-ecosystem: "pip" # See documentation for possible values
     directory: "/" # Location of package manifests
     schedule:
       interval: "weekly"

diff --git a/ATLearn/algorithms/TL_object_detection.py b/ATLearn/algorithms/TL_object_detection.py
@@ -17,8 +17,10 @@
 
 ssl._create_default_https_context = ssl._create_unverified_context
 import random
+import onnxruntime as ort
 from tqdm import tqdm
 from datetime import datetime
+from typing import List
 from ATLearn.algorithms.helper import *
 from ATLearn.utils.yolo_data_loader import create_dataloader
 IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp'  # include image suffixes
@@ -119,7 +121,7 @@ def __init__(self, data=None, val_data=None, user_network=None, network='yolov5s
         if self.val_data:
             self.val_loader, _ = create_dataloader(self.val_data, imgsz, batch_size, gs)
 
-    def train_model(self):
+    def train_model(self, model_file):
         t_start = time.time()
         for epoch in tqdm(range(1, self.epochs+1)):
             training_loss = 0.
@@ -152,7 +154,7 @@ def train_model(self):
 
         # jit save mask.pt
         self.model.eval()
-        torch.save(self.model, "./mask.pt")
+        torch.save(self.model, model_file)
 
     def validation(self, iou_thres=0.2, conf_thres=0.001, save_txt=True):
         self.model.eval()
@@ -203,17 +205,28 @@ def validation(self, iou_thres=0.2, conf_thres=0.001, save_txt=True):
         for i, c in enumerate(ap_class):
             print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
 
-    def predict(self, input_data, class_names, conf_thres=0.7, iou_thres=0.45, show_img=True, save_txt=True):
-        self.model = torch.load("./mask.pt")
-        self.model.names = class_names
-        self.model.eval()
+    def predict(self,
+                input_data: str,
+                class_names: List[str],
+                conf_thres=0.7,
+                iou_thres=0.45,
+                show_img=True,
+                save_txt=True,
+                model_file='./mask.pt'):
         is_file = Path(input_data).suffix[1:] in (IMG_FORMATS + VID_FORMATS) or input_data == "camera"
         assert is_file, f"Only image and video are supported now!"
+        model_file_suffix = model_file.split('.')[-1].lower()
         if input_data.split('.')[-1].lower() in IMG_FORMATS:
             print("Image")
             img0 = cv2.imread(input_data)  # BGR
             assert img0 is not None, f'Image Not Found {input_data}'
-            img0 = self.show_results(img0, input_data, conf_thres, iou_thres, save_txt)
+            if 'pt' == model_file_suffix:
+                img0 = self.show_results(img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt)
+            elif 'onnx' == model_file_suffix:
+                img0 = self.show_results_onnx(img0, model_file, class_names, input_data, conf_thres, iou_thres,
+                                              save_txt)
+            else:
+                raise("model file suffix is not recoganized.")
             if show_img:
                 cv2.imshow(str(Path(input_data)), img0)
                 cv2.waitKey(0)
@@ -226,15 +239,24 @@ def predict(self, input_data, class_names, conf_thres=0.7, iou_thres=0.45, show_
             ret_val, img0 = cap.read()
             while ret_val:
                 _, img0 = cap.read()
-                img0 = self.show_results(img0, input_data, conf_thres, iou_thres, save_txt)
+                if 'pt' == model_file_suffix:
+                    img0 = self.show_results(img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt)
+                elif 'onnx' == model_file_suffix:
+                    img0 = self.show_results_onnx(img0, model_file, class_names, input_data, conf_thres, iou_thres,
+                                                  save_txt)
+                else:
+                    raise ("model file suffix is not recoganized.")
                 if show_img:
                     cv2.imshow(str(Path(input_data)), img0)
                     cv2.waitKey(1)
                 ret_val, img0 = cap.read()
         else:
-            print("Unknown input format! Only image and video are supported now!")
+            raise("Unknown input format! Only image and video are supported now!")
+
+    def show_results(self, img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt):
+        self.model = torch.load(model_file)
+        self.model.eval()
 
-    def show_results(self, img0, input_data, conf_thres, iou_thres, save_txt):
         img = letterbox(img0, 640, stride=32, auto=False, scaleFill=True)[0]
         img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
         img = np.ascontiguousarray(img)
@@ -247,16 +269,68 @@ def show_results(self, img0, input_data, conf_thres, iou_thres, save_txt):
                                    agnostic=False, max_det=1000)[0]
 
         colors = Colors()
-        annotator = Annotator(img0, line_width=3, example=str(self.model.names))
+        annotator = Annotator(img0, line_width=3, example=str(class_names))
+        pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round()
+        for c in pred[:, -1].unique():
+            n = (pred[:, -1] == c).sum()  # detections per class
+            s = f"{n} {class_names[int(c)]}{'s' * (n > 1)}, "  # add to string
+            print(s)
+        lines = []
+        for *xyxy, conf, cls in reversed(pred):
+            c = int(cls)  # integer class
+            label = f'{class_names[c]} {conf:.2f}'
+            annotator.box_label(xyxy, label, color=colors(c, True))
+
+            gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
+            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+            line = (cls, *xywh, conf)
+            lines.append(line)
+
+        if save_txt:
+            save_path = input_data.rsplit('.', 1)[0] + '.txt'
+            with open(save_path, 'a') as f:
+                for line in lines:
+                    f.write(('%g ' * len(line)).rstrip() % line + '\n')
+        img0 = annotator.result()
+
+        return img0
+
+    def show_results_onnx(self, img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt):
+        # Load the ONNX model
+        # model_file = "./onnx_models_100/mask.onnx"
+        session = ort.InferenceSession(model_file)
+
+        # Define input and output names
+        input_name = session.get_inputs()[0].name
+        output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]
+
+        img = letterbox(img0, 640, stride=32, auto=False, scaleFill=True)[0]
+        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
+        img = np.ascontiguousarray(img)
+        img = np.expand_dims(img, axis=0)
+        img = img.astype(np.float32) / 255.0
+
+        # Run inference
+        input_data_onnx = {input_name: img}
+        pred = session.run(output_names, input_data_onnx)[0]
+        pred = torch.from_numpy(pred).to(self.device)
+
+        # Post-process
+        pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, classes=None,
+                                   agnostic=False, max_det=1000)[0]
+
+        # Visualization
+        colors = Colors()
+        annotator = Annotator(img0, line_width=3, example=str(class_names))
         pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round()
         for c in pred[:, -1].unique():
             n = (pred[:, -1] == c).sum()  # detections per class
-            s = f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, "  # add to string
+            s = f"{n} {class_names[int(c)]}{'s' * (n > 1)}, "  # add to string
             print(s)
         lines = []
         for *xyxy, conf, cls in reversed(pred):
             c = int(cls)  # integer class
-            label = f'{self.model.names[c]} {conf:.2f}'
+            label = f'{class_names[c]} {conf:.2f}'
             annotator.box_label(xyxy, label, color=colors(c, True))
 
             gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]

diff --git a/BaseTL_old.py b/BaseTL_old.py
diff --git a/ALGORITHMS.md → docs/ALGORITHMS.md b/ALGORITHMS.md → docs/ALGORITHMS.md
diff --git a/examples/docs/face_mask_detection.md b/examples/docs/face_mask_detection.md
@@ -0,0 +1,61 @@
+# Face Mask Detection
+
+In this example, we apply pre-trained YOLOv5 model and fine-tune it on [Face Mask Detection dataset](https://www.kaggle.com/datasets/andrewmvd/face-mask-detection?select=images)
+.The output model will be used to detect human face and classify it into two classes: with a mask or without mask. 
+This example demonstrates how to apply transfer learning technique with YOLOv5. More specifically, the YOLOv5 layers near the input are frozen while those near the output are fine-tuned with the customized data.
+
+After the training, the model is then exported to ONNX file, which can be loaded back for inference in python.
+
+The source code is in [face_mask_detection.py](../face_mask_detection/src/face_mask_detection.py)
+
+## Data preparation
+
+The data set is downloaded from Kaggle contest [Face Mask Detection](https://www.kaggle.com/code/nasreehsamaae/face-mask-detection-yolov5)
+. In this example, the data is stored in `examples/face_mask_detection/data/archive`. However, before the data is fed into YOLOv5, there is a data preparation 
+step. See [Convert PASCAL VOC XML to YOLO for Object Detection](https://towardsdatascience.com/convert-pascal-voc-xml-to-yolo-for-object-detection-f969811ccba5)
+.
+
+## Training
+
+In [face_mask_detection.py](../face_mask_detection/src/face_mask_detection.py), the code 
+```python
+model.train_model(model_file="../model_file/mask.pt")
+```
+will execute the model training. Here `model_file` specifies where to store the output model file.
+
+In the directory `examples/face_mask_detection/model_file`, there are also two other files: `serving.properties` and 
+`synset.txt`. They are used when loading this model into DJL, to specify the inference configuration. 
+
+## Export YOLOv5 to ONNX
+As illustrated in [YOLOv official document](https://github.com/ultralytics/yolov5/issues/251), the model convertion 
+from `*.pt` to `*.onxx` is done by the following steps.
+1. Download `export.py` to the same directory as the model file `*.pt` (`mask.pt` in this example).
+2. Go to that directory and run
+```bash
+python export.py --weights mask.pt --include onnx
+```
+Then `mask.onnx` will be exported. This file can be loaded into Python inference code as shown below, or can be loaded 
+into DJL.
+
+## Inference
+
+In [face_mask_detection.py](../face_mask_detection/src/face_mask_detection.py), the code 
+```python
+model.predict(input_data="../../figs/face_mask.png", class_names=["w/o", "w/"],
+              conf_thres=0.415, model_file="../model_file/mask.onnx")
+``` 
+executes the inference task with the trained model in `model_file`. Here, the `input_data` is the picture file. 
+Another option is `input_data="camera"`, which will trigure real-time inference with the video input. The 
+`model_file` can also be specified to `../model_file/mask.pt` which does the inference with the saved pytorch model.
+
+The example input picture is 
+<p align="center">
+  <img src="../../figs/face_mask.png" width="400" title="The required number of training samples.">
+</p>
+
+The inference result is 
+<p align="center">
+  <img src="../../figs/face_mask_result.png" width="400" title="The required number of training samples.">
+</p>
+
+
diff --git a/examples/face_mask_detection/model_file/serving.properties b/examples/face_mask_detection/model_file/serving.properties
@@ -0,0 +1,6 @@
+width=640
+height=640
+resize=true
+toTensor=true
+option.modelName=mask-onnx
+nmsThreshold=0.42
diff --git a/examples/face_mask_detection/model_file/synset.txt b/examples/face_mask_detection/model_file/synset.txt
@@ -0,0 +1,2 @@
+w/o
+w/
diff --git a/examples/face_mask_detection/src/face_mask_detection.py b/examples/face_mask_detection/src/face_mask_detection.py
@@ -0,0 +1,14 @@
+import ATLearn
+from ATLearn import task, algorithm
+
+#%%
+model = ATLearn.get_model(task.OBJECT_DETECTION,
+                          algorithm.OD_STANDARD_TRANSFER,
+                          data="../data/archive/train",
+                          val_data="../data/archive/val",
+                          network="yolov5s")
+#%% train
+model.train_model(model_file="../model_file/mask.pt")
+#%% predict
+model.predict(input_data="../../figs/face_mask.png", class_names=["w/o", "w/"],
+              conf_thres=0.415, model_file="../model_file/mask.onnx")
diff --git a/figs/face_mask.png b/figs/face_mask.png
diff --git a/figs/face_mask_result.png b/figs/face_mask_result.png