-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from 20toduc01/develop
Major refactoring
- Loading branch information
Showing
9 changed files
with
207 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
import torch, cv2, os, urllib, torchvision | ||
import numpy as np | ||
|
||
img_size = 640 | ||
stride = 32 | ||
|
||
class Yolov5Detector(): | ||
def __init__(self, device='auto'): | ||
if device == 'auto': | ||
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' | ||
if not os.path.isfile('./models/yolov5.torchscript.pt'): | ||
downloader = urllib.URLopener() | ||
downloader.retrieve("aaaaaaaaaaaaaaaaaaaaaaaaaaa", "./models/yolov5.torchscript.pt") | ||
with open('./models/yolov5.torchscript.pt', 'rb') as f: | ||
self.model = torch.jit.load(f, map_location=self.device) | ||
|
||
def detect(self, cv2img): | ||
try: | ||
# Padded resize | ||
img = letterbox(cv2img, auto=False)[0] | ||
resized = np.array(img) | ||
# Convert | ||
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 | ||
img = np.ascontiguousarray(img) | ||
except: | ||
return None | ||
img = torch.from_numpy(img).to(self.device) | ||
img = img.float() # uint8 to fp16/32 | ||
img /= 255.0 # 0 - 255 to 0.0 - 1.0 | ||
if img.ndimension() == 3: | ||
img = img.unsqueeze(0) | ||
|
||
print(img.shape) | ||
|
||
pred = self.model(img)[0] | ||
|
||
conf_thres = 0.25 | ||
iou_thres = 0.45 | ||
classes = None | ||
agnostic_nms=False | ||
|
||
# Apply NMS | ||
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=10)[0] | ||
return pred, resized | ||
|
||
|
||
def box_iou(box1, box2): | ||
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py | ||
""" | ||
Return intersection-over-union (Jaccard index) of boxes. | ||
Both sets of boxes are expected to be in (x1, y1, x2, y2) format. | ||
Arguments: | ||
box1 (Tensor[N, 4]) | ||
box2 (Tensor[M, 4]) | ||
Returns: | ||
iou (Tensor[N, M]): the NxM matrix containing the pairwise | ||
IoU values for every element in boxes1 and boxes2 | ||
""" | ||
|
||
def box_area(box): | ||
# box = 4xn | ||
return (box[2] - box[0]) * (box[3] - box[1]) | ||
|
||
area1 = box_area(box1.T) | ||
area2 = box_area(box2.T) | ||
|
||
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) | ||
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) | ||
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) | ||
|
||
|
||
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, | ||
labels=(), max_det=300): | ||
"""Runs Non-Maximum Suppression (NMS) on inference results | ||
Returns: | ||
list of detections, on (n,6) tensor per image [xyxy, conf, cls] | ||
""" | ||
|
||
nc = prediction.shape[2] - 5 # number of classes | ||
xc = prediction[..., 4] > conf_thres # candidates | ||
|
||
# Settings | ||
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height | ||
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() | ||
redundant = True # require redundant detections | ||
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) | ||
merge = False # use merge-NMS | ||
|
||
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] | ||
for xi, x in enumerate(prediction): # image index, image inference | ||
# Apply constraints | ||
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height | ||
x = x[xc[xi]] # confidence | ||
|
||
# Cat apriori labels if autolabelling | ||
if labels and len(labels[xi]): | ||
l = labels[xi] | ||
v = torch.zeros((len(l), nc + 5), device=x.device) | ||
v[:, :4] = l[:, 1:5] # box | ||
v[:, 4] = 1.0 # conf | ||
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls | ||
x = torch.cat((x, v), 0) | ||
|
||
# If none remain process next image | ||
if not x.shape[0]: | ||
continue | ||
|
||
# Compute conf | ||
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf | ||
|
||
# Box (center x, center y, width, height) to (x1, y1, x2, y2) | ||
box = xywh2xyxy(x[:, :4]) | ||
|
||
# Detections matrix nx6 (xyxy, conf, cls) | ||
if multi_label: | ||
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T | ||
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) | ||
else: # best class only | ||
conf, j = x[:, 5:].max(1, keepdim=True) | ||
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] | ||
|
||
# Filter by class | ||
if classes is not None: | ||
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] | ||
|
||
# Check shape | ||
n = x.shape[0] # number of boxes | ||
if not n: # no boxes | ||
continue | ||
elif n > max_nms: # excess boxes | ||
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence | ||
|
||
# Batched NMS | ||
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes | ||
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores | ||
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS | ||
if i.shape[0] > max_det: # limit detections | ||
i = i[:max_det] | ||
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) | ||
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) | ||
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix | ||
weights = iou * scores[None] # box weights | ||
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes | ||
if redundant: | ||
i = i[iou.sum(1) > 1] # require redundancy | ||
|
||
output[xi] = x[i] | ||
|
||
return output | ||
|
||
|
||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): | ||
# Resize and pad image while meeting stride-multiple constraints | ||
shape = img.shape[:2] # current shape [height, width] | ||
if isinstance(new_shape, int): | ||
new_shape = (new_shape, new_shape) | ||
|
||
# Scale ratio (new / old) | ||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) | ||
if not scaleup: # only scale down, do not scale up (for better test mAP) | ||
r = min(r, 1.0) | ||
|
||
# Compute padding | ||
ratio = r, r # width, height ratios | ||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) | ||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding | ||
if auto: # minimum rectangle | ||
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding | ||
elif scaleFill: # stretch | ||
dw, dh = 0.0, 0.0 | ||
new_unpad = (new_shape[1], new_shape[0]) | ||
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios | ||
|
||
dw /= 2 # divide padding into 2 sides | ||
dh /= 2 | ||
|
||
if shape[::-1] != new_unpad: # resize | ||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) | ||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) | ||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) | ||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border | ||
return img, ratio, (dw, dh) | ||
|
||
|
||
def xywh2xyxy(x): | ||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right | ||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | ||
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x | ||
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y | ||
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x | ||
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y | ||
return y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.