From ac409f29467fd772d1de7e6983d19de762852c4a Mon Sep 17 00:00:00 2001
From: DavidBaldsiefen <david.baldsiefen@gmx.de>
Date: Sun, 27 Feb 2022 12:29:39 +0700
Subject: [PATCH 01/15] Assert engine precision #6777

---
 detect.py        | 2 ++
 models/common.py | 3 +++
 val.py           | 1 +
 3 files changed, 6 insertions(+)

diff --git a/detect.py b/detect.py
index 76f67bea1b90..d913921060d5 100644
--- a/detect.py
+++ b/detect.py
@@ -95,6 +95,8 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
     # Half
     half &= (pt or jit or onnx or engine) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
+    if engine:
+        assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half'
     if pt or jit:
         model.model.half() if half else model.model.float()
 
diff --git a/models/common.py b/models/common.py
index 0dae0244e932..9e265144435f 100644
--- a/models/common.py
+++ b/models/common.py
@@ -296,6 +296,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
         w = str(weights[0] if isinstance(weights, list) else weights)
         pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w)  # get backend
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+        trt_fp16_input = False
         w = attempt_download(w)  # download if not local
         if data:  # data.yaml path (optional)
             with open(data, errors='ignore') as f:
@@ -348,6 +349,8 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
                 shape = tuple(model.get_binding_shape(index))
                 data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
                 bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+                if model.binding_is_input(index) and dtype == np.float16:
+                    trt_fp16_input = dtype == np.float16
             binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
             context = model.create_execution_context()
             batch_size = bindings['images'].shape[0]
diff --git a/val.py b/val.py
index 78abbda8231a..68e5d9ff0159 100644
--- a/val.py
+++ b/val.py
@@ -143,6 +143,7 @@ def run(data,
         if pt or jit:
             model.model.half() if half else model.model.float()
         elif engine:
+            assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half'
             batch_size = model.batch_size
         else:
             half = False

From c891005d3660c6baf5edb4162b1cf861925cd2e6 Mon Sep 17 00:00:00 2001
From: DavidB <david.baldsiefen@gmx.de>
Date: Fri, 4 Mar 2022 17:28:07 +0700
Subject: [PATCH 02/15] Default to FP32 inputs for TensorRT engines

---
 export.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/export.py b/export.py
index 286df623d252..e786bc9b3392 100644
--- a/export.py
+++ b/export.py
@@ -227,8 +227,12 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
 
         inputs = [network.get_input(i) for i in range(network.num_inputs)]
         outputs = [network.get_output(i) for i in range(network.num_outputs)]
+        network.get_input(0).dtype = trt.DataType.FLOAT
         LOGGER.info(f'{prefix} Network Description:')
         for inp in inputs:
+            # default to FP32 input precision unless specified otherwise; then let TensorRT decide
+            if not half:
+                inp.dtype = trt.DataType.FLOAT
             LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
         for out in outputs:
             LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')

From fd186fcdc0ffc6f8822f10cd577d02bfa6a221ad Mon Sep 17 00:00:00 2001
From: DavidB <david.baldsiefen@gmx.de>
Date: Fri, 4 Mar 2022 17:30:38 +0700
Subject: [PATCH 03/15] Default to FP16 TensorRT exports #6777

---
 export.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/export.py b/export.py
index e786bc9b3392..e0162eb19312 100644
--- a/export.py
+++ b/export.py
@@ -237,9 +237,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
         for out in outputs:
             LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')
 
-        half &= builder.platform_has_fast_fp16
-        LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}')
-        if half:
+        LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')
+        if builder.platform_has_fast_fp16:
             config.set_flag(trt.BuilderFlag.FP16)
         with builder.build_engine(network, config) as engine, open(f, 'wb') as t:
             t.write(engine.serialize())

From 5be4bf3404a97f154b37159c6915121113bdf27c Mon Sep 17 00:00:00 2001
From: DavidB <david.baldsiefen@gmx.de>
Date: Fri, 4 Mar 2022 17:32:21 +0700
Subject: [PATCH 04/15] Remove wrong line #6777

---
 export.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/export.py b/export.py
index e0162eb19312..3f0bccaa9abe 100644
--- a/export.py
+++ b/export.py
@@ -227,7 +227,6 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
 
         inputs = [network.get_input(i) for i in range(network.num_inputs)]
         outputs = [network.get_output(i) for i in range(network.num_outputs)]
-        network.get_input(0).dtype = trt.DataType.FLOAT
         LOGGER.info(f'{prefix} Network Description:')
         for inp in inputs:
             # default to FP32 input precision unless specified otherwise; then let TensorRT decide

From a2c7b671481989d2c1a88fc4fbb38fc6059bc8f0 Mon Sep 17 00:00:00 2001
From: DavidB <david.baldsiefen@gmx.de>
Date: Sat, 5 Mar 2022 10:05:21 +0700
Subject: [PATCH 05/15] Automatically adjust detect.py input precision #6777

---
 detect.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/detect.py b/detect.py
index d913921060d5..211a0049ae4b 100644
--- a/detect.py
+++ b/detect.py
@@ -94,9 +94,10 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Half
+    if engine and model.trt_fp16_input != half:
+        LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
+        half = model.trt_fp16_input
     half &= (pt or jit or onnx or engine) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
-    if engine:
-        assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half'
     if pt or jit:
         model.model.half() if half else model.model.float()
 

From 468b0d53477adbe353d83b15ecde2f47553197f5 Mon Sep 17 00:00:00 2001
From: DavidB <david.baldsiefen@gmx.de>
Date: Sat, 5 Mar 2022 10:11:05 +0700
Subject: [PATCH 06/15] Automatically adjust val.py input precision #6777

---
 val.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/val.py b/val.py
index 68e5d9ff0159..6e45fac9e599 100644
--- a/val.py
+++ b/val.py
@@ -143,7 +143,9 @@ def run(data,
         if pt or jit:
             model.model.half() if half else model.model.float()
         elif engine:
-            assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half'
+            if model.trt_fp16_input != half
+                LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
+                half = model.trt_fp16_input
             batch_size = model.batch_size
         else:
             half = False

From fb64186200e818eb394c3bf40201a11cd115ba5e Mon Sep 17 00:00:00 2001
From: DavidB <david.baldsiefen@gmx.de>
Date: Sat, 5 Mar 2022 10:14:32 +0700
Subject: [PATCH 07/15] Add missing colon

---
 val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/val.py b/val.py
index 6e45fac9e599..4b214ccbae56 100644
--- a/val.py
+++ b/val.py
@@ -143,7 +143,7 @@ def run(data,
         if pt or jit:
             model.model.half() if half else model.model.float()
         elif engine:
-            if model.trt_fp16_input != half
+            if model.trt_fp16_input != half:
                 LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
                 half = model.trt_fp16_input
             batch_size = model.batch_size

From 60fc01bc1e8555dcfce9361026a40cbe20ebb213 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 20:41:49 +0100
Subject: [PATCH 08/15] Cleanup

---
 val.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/val.py b/val.py
index 850d1e907c86..e015efbc99f7 100644
--- a/val.py
+++ b/val.py
@@ -144,7 +144,8 @@ def run(data,
             model.model.half() if half else model.model.float()
         elif engine:
             if model.trt_fp16_input != half:
-                LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
+                LOGGER.info('model ' + (
+                    'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
                 half = model.trt_fp16_input
             batch_size = model.batch_size
         else:

From a4ee73788d5157ef95c1d50f2cd56a1776ecf255 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 20:43:13 +0100
Subject: [PATCH 09/15] Cleanup

---
 detect.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index 211a0049ae4b..c3e6848a0032 100644
--- a/detect.py
+++ b/detect.py
@@ -95,7 +95,8 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
     # Half
     if engine and model.trt_fp16_input != half:
-        LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
+        LOGGER.info('model ' + (
+            'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
         half = model.trt_fp16_input
     half &= (pt or jit or onnx or engine) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
     if pt or jit:

From d2b27ffa6e368666ac8c39b842689c3913216184 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 20:46:19 +0100
Subject: [PATCH 10/15] Remove default trt_fp16_input definition

---
 models/common.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/models/common.py b/models/common.py
index 9e265144435f..933ddc038bf4 100644
--- a/models/common.py
+++ b/models/common.py
@@ -296,7 +296,6 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
         w = str(weights[0] if isinstance(weights, list) else weights)
         pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w)  # get backend
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-        trt_fp16_input = False
         w = attempt_download(w)  # download if not local
         if data:  # data.yaml path (optional)
             with open(data, errors='ignore') as f:

From 321e00d4991c696bc6d42a675f9824d532241deb Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 20:57:22 +0100
Subject: [PATCH 11/15] Experiment

---
 export.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/export.py b/export.py
index 3f0bccaa9abe..831e3c1bcb59 100644
--- a/export.py
+++ b/export.py
@@ -229,9 +229,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
         outputs = [network.get_output(i) for i in range(network.num_outputs)]
         LOGGER.info(f'{prefix} Network Description:')
         for inp in inputs:
-            # default to FP32 input precision unless specified otherwise; then let TensorRT decide
-            if not half:
-                inp.dtype = trt.DataType.FLOAT
+            # if not half:  # default to FP32 input unless specified otherwise; then let TensorRT decide
+            #     inp.dtype = trt.DataType.FLOAT
             LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
         for out in outputs:
             LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')

From 06dbd2b78b463fa91a6b1282db2b8d6cb47e841a Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 21:01:37 +0100
Subject: [PATCH 12/15] Reorder detect.py if statement to after half checks

---
 detect.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/detect.py b/detect.py
index c3e6848a0032..ba43ed9e1eed 100644
--- a/detect.py
+++ b/detect.py
@@ -94,13 +94,13 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Half
-    if engine and model.trt_fp16_input != half:
-        LOGGER.info('model ' + (
-            'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
-        half = model.trt_fp16_input
     half &= (pt or jit or onnx or engine) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
     if pt or jit:
         model.model.half() if half else model.model.float()
+    elif engine and model.trt_fp16_input != half:
+        LOGGER.info('model ' + (
+            'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
+        half = model.trt_fp16_input
 
     # Dataloader
     if webcam:

From 9e6b78541dfb0b1fff7931cadc215827e1c44cac Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 21:16:11 +0100
Subject: [PATCH 13/15] Update common.py

---
 models/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/models/common.py b/models/common.py
index 933ddc038bf4..70ee7105abfc 100644
--- a/models/common.py
+++ b/models/common.py
@@ -338,6 +338,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
             import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
             check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0
             Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
+            trt_fp16_input = False
             logger = trt.Logger(trt.Logger.INFO)
             with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
                 model = runtime.deserialize_cuda_engine(f.read())
@@ -349,7 +350,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None):
                 data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
                 bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
                 if model.binding_is_input(index) and dtype == np.float16:
-                    trt_fp16_input = dtype == np.float16
+                    trt_fp16_input = True
             binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
             context = model.create_execution_context()
             batch_size = bindings['images'].shape[0]

From 9f4c3744f565c1ec8c8af35481931d617b1282c9 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 21:17:32 +0100
Subject: [PATCH 14/15] Update export.py

---
 export.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/export.py b/export.py
index 831e3c1bcb59..7a5205d55ee6 100644
--- a/export.py
+++ b/export.py
@@ -229,8 +229,6 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F
         outputs = [network.get_output(i) for i in range(network.num_outputs)]
         LOGGER.info(f'{prefix} Network Description:')
         for inp in inputs:
-            # if not half:  # default to FP32 input unless specified otherwise; then let TensorRT decide
-            #     inp.dtype = trt.DataType.FLOAT
             LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')
         for out in outputs:
             LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')

From 48c4debe68808bdd7f24f3c786f83b9e4471fc92 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 6 Mar 2022 21:20:03 +0100
Subject: [PATCH 15/15] Cleanup

---
 val.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/val.py b/val.py
index e015efbc99f7..dfbfa3935210 100644
--- a/val.py
+++ b/val.py
@@ -143,11 +143,11 @@ def run(data,
         if pt or jit:
             model.model.half() if half else model.model.float()
         elif engine:
+            batch_size = model.batch_size
             if model.trt_fp16_input != half:
                 LOGGER.info('model ' + (
                     'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.')
                 half = model.trt_fp16_input
-            batch_size = model.batch_size
         else:
             half = False
             batch_size = 1  # export.py models default to batch-size 1