Skip to content
This repository has been archived by the owner on Sep 12, 2024. It is now read-only.

Commit

Permalink
Face mask detection (#9)
Browse files Browse the repository at this point in the history
* config dependabot.yml

* face_mask_detection example

---------

Co-authored-by: KexinFeng <fenkexin@amazon.com>
  • Loading branch information
KexinFeng and KexinFeng authored Mar 15, 2023
1 parent 8eaf7b3 commit 1fe4128
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 116 deletions.
2 changes: 1 addition & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

version: 2
updates:
- package-ecosystem: "" # See documentation for possible values
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
Expand Down
100 changes: 87 additions & 13 deletions ATLearn/algorithms/TL_object_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@

ssl._create_default_https_context = ssl._create_unverified_context
import random
import onnxruntime as ort
from tqdm import tqdm
from datetime import datetime
from typing import List
from ATLearn.algorithms.helper import *
from ATLearn.utils.yolo_data_loader import create_dataloader
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp' # include image suffixes
Expand Down Expand Up @@ -119,7 +121,7 @@ def __init__(self, data=None, val_data=None, user_network=None, network='yolov5s
if self.val_data:
self.val_loader, _ = create_dataloader(self.val_data, imgsz, batch_size, gs)

def train_model(self):
def train_model(self, model_file):
t_start = time.time()
for epoch in tqdm(range(1, self.epochs+1)):
training_loss = 0.
Expand Down Expand Up @@ -152,7 +154,7 @@ def train_model(self):

# jit save mask.pt
self.model.eval()
torch.save(self.model, "./mask.pt")
torch.save(self.model, model_file)

def validation(self, iou_thres=0.2, conf_thres=0.001, save_txt=True):
self.model.eval()
Expand Down Expand Up @@ -203,17 +205,28 @@ def validation(self, iou_thres=0.2, conf_thres=0.001, save_txt=True):
for i, c in enumerate(ap_class):
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

def predict(self, input_data, class_names, conf_thres=0.7, iou_thres=0.45, show_img=True, save_txt=True):
self.model = torch.load("./mask.pt")
self.model.names = class_names
self.model.eval()
def predict(self,
input_data: str,
class_names: List[str],
conf_thres=0.7,
iou_thres=0.45,
show_img=True,
save_txt=True,
model_file='./mask.pt'):
is_file = Path(input_data).suffix[1:] in (IMG_FORMATS + VID_FORMATS) or input_data == "camera"
assert is_file, f"Only image and video are supported now!"
model_file_suffix = model_file.split('.')[-1].lower()
if input_data.split('.')[-1].lower() in IMG_FORMATS:
print("Image")
img0 = cv2.imread(input_data) # BGR
assert img0 is not None, f'Image Not Found {input_data}'
img0 = self.show_results(img0, input_data, conf_thres, iou_thres, save_txt)
if 'pt' == model_file_suffix:
img0 = self.show_results(img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt)
elif 'onnx' == model_file_suffix:
img0 = self.show_results_onnx(img0, model_file, class_names, input_data, conf_thres, iou_thres,
save_txt)
else:
raise("model file suffix is not recoganized.")
if show_img:
cv2.imshow(str(Path(input_data)), img0)
cv2.waitKey(0)
Expand All @@ -226,15 +239,24 @@ def predict(self, input_data, class_names, conf_thres=0.7, iou_thres=0.45, show_
ret_val, img0 = cap.read()
while ret_val:
_, img0 = cap.read()
img0 = self.show_results(img0, input_data, conf_thres, iou_thres, save_txt)
if 'pt' == model_file_suffix:
img0 = self.show_results(img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt)
elif 'onnx' == model_file_suffix:
img0 = self.show_results_onnx(img0, model_file, class_names, input_data, conf_thres, iou_thres,
save_txt)
else:
raise ("model file suffix is not recoganized.")
if show_img:
cv2.imshow(str(Path(input_data)), img0)
cv2.waitKey(1)
ret_val, img0 = cap.read()
else:
print("Unknown input format! Only image and video are supported now!")
raise("Unknown input format! Only image and video are supported now!")

def show_results(self, img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt):
self.model = torch.load(model_file)
self.model.eval()

def show_results(self, img0, input_data, conf_thres, iou_thres, save_txt):
img = letterbox(img0, 640, stride=32, auto=False, scaleFill=True)[0]
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
Expand All @@ -247,16 +269,68 @@ def show_results(self, img0, input_data, conf_thres, iou_thres, save_txt):
agnostic=False, max_det=1000)[0]

colors = Colors()
annotator = Annotator(img0, line_width=3, example=str(self.model.names))
annotator = Annotator(img0, line_width=3, example=str(class_names))
pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round()
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s = f"{n} {class_names[int(c)]}{'s' * (n > 1)}, " # add to string
print(s)
lines = []
for *xyxy, conf, cls in reversed(pred):
c = int(cls) # integer class
label = f'{class_names[c]} {conf:.2f}'
annotator.box_label(xyxy, label, color=colors(c, True))

gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf)
lines.append(line)

if save_txt:
save_path = input_data.rsplit('.', 1)[0] + '.txt'
with open(save_path, 'a') as f:
for line in lines:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
img0 = annotator.result()

return img0

def show_results_onnx(self, img0, model_file, class_names, input_data, conf_thres, iou_thres, save_txt):
# Load the ONNX model
# model_file = "./onnx_models_100/mask.onnx"
session = ort.InferenceSession(model_file)

# Define input and output names
input_name = session.get_inputs()[0].name
output_names = [session.get_outputs()[i].name for i in range(len(session.get_outputs()))]

img = letterbox(img0, 640, stride=32, auto=False, scaleFill=True)[0]
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
img = np.expand_dims(img, axis=0)
img = img.astype(np.float32) / 255.0

# Run inference
input_data_onnx = {input_name: img}
pred = session.run(output_names, input_data_onnx)[0]
pred = torch.from_numpy(pred).to(self.device)

# Post-process
pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, classes=None,
agnostic=False, max_det=1000)[0]

# Visualization
colors = Colors()
annotator = Annotator(img0, line_width=3, example=str(class_names))
pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], img0.shape).round()
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
s = f"{n} {self.model.names[int(c)]}{'s' * (n > 1)}, " # add to string
s = f"{n} {class_names[int(c)]}{'s' * (n > 1)}, " # add to string
print(s)
lines = []
for *xyxy, conf, cls in reversed(pred):
c = int(cls) # integer class
label = f'{self.model.names[c]} {conf:.2f}'
label = f'{class_names[c]} {conf:.2f}'
annotator.box_label(xyxy, label, color=colors(c, True))

gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
Expand Down
102 changes: 0 additions & 102 deletions BaseTL_old.py

This file was deleted.

File renamed without changes.
61 changes: 61 additions & 0 deletions examples/docs/face_mask_detection.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Face Mask Detection

In this example, we apply pre-trained YOLOv5 model and fine-tune it on [Face Mask Detection dataset](https://www.kaggle.com/datasets/andrewmvd/face-mask-detection?select=images)
.The output model will be used to detect human face and classify it into two classes: with a mask or without mask.
This example demonstrates how to apply transfer learning technique with YOLOv5. More specifically, the YOLOv5 layers near the input are frozen while those near the output are fine-tuned with the customized data.

After the training, the model is then exported to ONNX file, which can be loaded back for inference in python.

The source code is in [face_mask_detection.py](../face_mask_detection/src/face_mask_detection.py)

## Data preparation

The data set is downloaded from Kaggle contest [Face Mask Detection](https://www.kaggle.com/code/nasreehsamaae/face-mask-detection-yolov5)
. In this example, the data is stored in `examples/face_mask_detection/data/archive`. However, before the data is fed into YOLOv5, there is a data preparation
step. See [Convert PASCAL VOC XML to YOLO for Object Detection](https://towardsdatascience.com/convert-pascal-voc-xml-to-yolo-for-object-detection-f969811ccba5)
.

## Training

In [face_mask_detection.py](../face_mask_detection/src/face_mask_detection.py), the code
```python
model.train_model(model_file="../model_file/mask.pt")
```
will execute the model training. Here `model_file` specifies where to store the output model file.

In the directory `examples/face_mask_detection/model_file`, there are also two other files: `serving.properties` and
`synset.txt`. They are used when loading this model into DJL, to specify the inference configuration.

## Export YOLOv5 to ONNX
As illustrated in [YOLOv official document](https://github.com/ultralytics/yolov5/issues/251), the model convertion
from `*.pt` to `*.onxx` is done by the following steps.
1. Download `export.py` to the same directory as the model file `*.pt` (`mask.pt` in this example).
2. Go to that directory and run
```bash
python export.py --weights mask.pt --include onnx
```
Then `mask.onnx` will be exported. This file can be loaded into Python inference code as shown below, or can be loaded
into DJL.

## Inference

In [face_mask_detection.py](../face_mask_detection/src/face_mask_detection.py), the code
```python
model.predict(input_data="../../figs/face_mask.png", class_names=["w/o", "w/"],
conf_thres=0.415, model_file="../model_file/mask.onnx")
```
executes the inference task with the trained model in `model_file`. Here, the `input_data` is the picture file.
Another option is `input_data="camera"`, which will trigure real-time inference with the video input. The
`model_file` can also be specified to `../model_file/mask.pt` which does the inference with the saved pytorch model.

The example input picture is
<p align="center">
<img src="../../figs/face_mask.png" width="400" title="The required number of training samples.">
</p>

The inference result is
<p align="center">
<img src="../../figs/face_mask_result.png" width="400" title="The required number of training samples.">
</p>


6 changes: 6 additions & 0 deletions examples/face_mask_detection/model_file/serving.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
width=640
height=640
resize=true
toTensor=true
option.modelName=mask-onnx
nmsThreshold=0.42
2 changes: 2 additions & 0 deletions examples/face_mask_detection/model_file/synset.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
w/o
w/
14 changes: 14 additions & 0 deletions examples/face_mask_detection/src/face_mask_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import ATLearn
from ATLearn import task, algorithm

#%%
model = ATLearn.get_model(task.OBJECT_DETECTION,
algorithm.OD_STANDARD_TRANSFER,
data="../data/archive/train",
val_data="../data/archive/val",
network="yolov5s")
#%% train
model.train_model(model_file="../model_file/mask.pt")
#%% predict
model.predict(input_data="../../figs/face_mask.png", class_names=["w/o", "w/"],
conf_thres=0.415, model_file="../model_file/mask.onnx")
Binary file added figs/face_mask.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/face_mask_result.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 1fe4128

Please sign in to comment.