From ac409f29467fd772d1de7e6983d19de762852c4a Mon Sep 17 00:00:00 2001 From: DavidBaldsiefen Date: Sun, 27 Feb 2022 12:29:39 +0700 Subject: [PATCH 01/15] Assert engine precision #6777 --- detect.py | 2 ++ models/common.py | 3 +++ val.py | 1 + 3 files changed, 6 insertions(+) diff --git a/detect.py b/detect.py index 76f67bea1b90..d913921060d5 100644 --- a/detect.py +++ b/detect.py @@ -95,6 +95,8 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Half half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA + if engine: + assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half' if pt or jit: model.model.half() if half else model.model.float() diff --git a/models/common.py b/models/common.py index 0dae0244e932..9e265144435f 100644 --- a/models/common.py +++ b/models/common.py @@ -296,6 +296,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): w = str(weights[0] if isinstance(weights, list) else weights) pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults + trt_fp16_input = False w = attempt_download(w) # download if not local if data: # data.yaml path (optional) with open(data, errors='ignore') as f: @@ -348,6 +349,8 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): shape = tuple(model.get_binding_shape(index)) data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) + if model.binding_is_input(index) and dtype == np.float16: + trt_fp16_input = dtype == np.float16 binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] diff --git a/val.py b/val.py index 78abbda8231a..68e5d9ff0159 100644 --- a/val.py +++ b/val.py @@ -143,6 +143,7 @@ def run(data, if pt or jit: model.model.half() if half else model.model.float() elif engine: + assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half' batch_size = model.batch_size else: half = False From c891005d3660c6baf5edb4162b1cf861925cd2e6 Mon Sep 17 00:00:00 2001 From: DavidB Date: Fri, 4 Mar 2022 17:28:07 +0700 Subject: [PATCH 02/15] Default to FP32 inputs for TensorRT engines --- export.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/export.py b/export.py index 286df623d252..e786bc9b3392 100644 --- a/export.py +++ b/export.py @@ -227,8 +227,12 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] + network.get_input(0).dtype = trt.DataType.FLOAT LOGGER.info(f'{prefix} Network Description:') for inp in inputs: + # default to FP32 input precision unless specified otherwise; then let TensorRT decide + if not half: + inp.dtype = trt.DataType.FLOAT LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') From fd186fcdc0ffc6f8822f10cd577d02bfa6a221ad Mon Sep 17 00:00:00 2001 From: DavidB Date: Fri, 4 Mar 2022 17:30:38 +0700 Subject: [PATCH 03/15] Default to FP16 TensorRT exports #6777 --- export.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/export.py b/export.py index e786bc9b3392..e0162eb19312 100644 --- a/export.py +++ b/export.py @@ -237,9 +237,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F for out in outputs: LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') - half &= builder.platform_has_fast_fp16 - LOGGER.info(f'{prefix} building FP{16 if half else 32} engine in {f}') - if half: + LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}') + if builder.platform_has_fast_fp16: config.set_flag(trt.BuilderFlag.FP16) with builder.build_engine(network, config) as engine, open(f, 'wb') as t: t.write(engine.serialize()) From 5be4bf3404a97f154b37159c6915121113bdf27c Mon Sep 17 00:00:00 2001 From: DavidB Date: Fri, 4 Mar 2022 17:32:21 +0700 Subject: [PATCH 04/15] Remove wrong line #6777 --- export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/export.py b/export.py index e0162eb19312..3f0bccaa9abe 100644 --- a/export.py +++ b/export.py @@ -227,7 +227,6 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] - network.get_input(0).dtype = trt.DataType.FLOAT LOGGER.info(f'{prefix} Network Description:') for inp in inputs: # default to FP32 input precision unless specified otherwise; then let TensorRT decide From a2c7b671481989d2c1a88fc4fbb38fc6059bc8f0 Mon Sep 17 00:00:00 2001 From: DavidB Date: Sat, 5 Mar 2022 10:05:21 +0700 Subject: [PATCH 05/15] Automatically adjust detect.py input precision #6777 --- detect.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/detect.py b/detect.py index d913921060d5..211a0049ae4b 100644 --- a/detect.py +++ b/detect.py @@ -94,9 +94,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) imgsz = check_img_size(imgsz, s=stride) # check image size # Half + if engine and model.trt_fp16_input != half: + LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') + half = model.trt_fp16_input half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA - if engine: - assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half' if pt or jit: model.model.half() if half else model.model.float() From 468b0d53477adbe353d83b15ecde2f47553197f5 Mon Sep 17 00:00:00 2001 From: DavidB Date: Sat, 5 Mar 2022 10:11:05 +0700 Subject: [PATCH 06/15] Automatically adjust val.py input precision #6777 --- val.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/val.py b/val.py index 68e5d9ff0159..6e45fac9e599 100644 --- a/val.py +++ b/val.py @@ -143,7 +143,9 @@ def run(data, if pt or jit: model.model.half() if half else model.model.float() elif engine: - assert (model.trt_fp16_input == half), 'model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half' + if model.trt_fp16_input != half + LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') + half = model.trt_fp16_input batch_size = model.batch_size else: half = False From fb64186200e818eb394c3bf40201a11cd115ba5e Mon Sep 17 00:00:00 2001 From: DavidB Date: Sat, 5 Mar 2022 10:14:32 +0700 Subject: [PATCH 07/15] Add missing colon --- val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/val.py b/val.py index 6e45fac9e599..4b214ccbae56 100644 --- a/val.py +++ b/val.py @@ -143,7 +143,7 @@ def run(data, if pt or jit: model.model.half() if half else model.model.float() elif engine: - if model.trt_fp16_input != half + if model.trt_fp16_input != half: LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') half = model.trt_fp16_input batch_size = model.batch_size From 60fc01bc1e8555dcfce9361026a40cbe20ebb213 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 20:41:49 +0100 Subject: [PATCH 08/15] Cleanup --- val.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/val.py b/val.py index 850d1e907c86..e015efbc99f7 100644 --- a/val.py +++ b/val.py @@ -144,7 +144,8 @@ def run(data, model.model.half() if half else model.model.float() elif engine: if model.trt_fp16_input != half: - LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') + LOGGER.info('model ' + ( + 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') half = model.trt_fp16_input batch_size = model.batch_size else: From a4ee73788d5157ef95c1d50f2cd56a1776ecf255 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 20:43:13 +0100 Subject: [PATCH 09/15] Cleanup --- detect.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detect.py b/detect.py index 211a0049ae4b..c3e6848a0032 100644 --- a/detect.py +++ b/detect.py @@ -95,7 +95,8 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Half if engine and model.trt_fp16_input != half: - LOGGER.info('model ' + ('requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') + LOGGER.info('model ' + ( + 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') half = model.trt_fp16_input half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA if pt or jit: From d2b27ffa6e368666ac8c39b842689c3913216184 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 20:46:19 +0100 Subject: [PATCH 10/15] Remove default trt_fp16_input definition --- models/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/models/common.py b/models/common.py index 9e265144435f..933ddc038bf4 100644 --- a/models/common.py +++ b/models/common.py @@ -296,7 +296,6 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): w = str(weights[0] if isinstance(weights, list) else weights) pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - trt_fp16_input = False w = attempt_download(w) # download if not local if data: # data.yaml path (optional) with open(data, errors='ignore') as f: From 321e00d4991c696bc6d42a675f9824d532241deb Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 20:57:22 +0100 Subject: [PATCH 11/15] Experiment --- export.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/export.py b/export.py index 3f0bccaa9abe..831e3c1bcb59 100644 --- a/export.py +++ b/export.py @@ -229,9 +229,8 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F outputs = [network.get_output(i) for i in range(network.num_outputs)] LOGGER.info(f'{prefix} Network Description:') for inp in inputs: - # default to FP32 input precision unless specified otherwise; then let TensorRT decide - if not half: - inp.dtype = trt.DataType.FLOAT + # if not half: # default to FP32 input unless specified otherwise; then let TensorRT decide + # inp.dtype = trt.DataType.FLOAT LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') From 06dbd2b78b463fa91a6b1282db2b8d6cb47e841a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 21:01:37 +0100 Subject: [PATCH 12/15] Reorder detect.py if statement to after half checks --- detect.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/detect.py b/detect.py index c3e6848a0032..ba43ed9e1eed 100644 --- a/detect.py +++ b/detect.py @@ -94,13 +94,13 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) imgsz = check_img_size(imgsz, s=stride) # check image size # Half - if engine and model.trt_fp16_input != half: - LOGGER.info('model ' + ( - 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') - half = model.trt_fp16_input half &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16 supported on limited backends with CUDA if pt or jit: model.model.half() if half else model.model.float() + elif engine and model.trt_fp16_input != half: + LOGGER.info('model ' + ( + 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') + half = model.trt_fp16_input # Dataloader if webcam: From 9e6b78541dfb0b1fff7931cadc215827e1c44cac Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 21:16:11 +0100 Subject: [PATCH 13/15] Update common.py --- models/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/common.py b/models/common.py index 933ddc038bf4..70ee7105abfc 100644 --- a/models/common.py +++ b/models/common.py @@ -338,6 +338,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) + trt_fp16_input = False logger = trt.Logger(trt.Logger.INFO) with open(w, 'rb') as f, trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(f.read()) @@ -349,7 +350,7 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=False, data=None): data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device) bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr())) if model.binding_is_input(index) and dtype == np.float16: - trt_fp16_input = dtype == np.float16 + trt_fp16_input = True binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) context = model.create_execution_context() batch_size = bindings['images'].shape[0] From 9f4c3744f565c1ec8c8af35481931d617b1282c9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 21:17:32 +0100 Subject: [PATCH 14/15] Update export.py --- export.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/export.py b/export.py index 831e3c1bcb59..7a5205d55ee6 100644 --- a/export.py +++ b/export.py @@ -229,8 +229,6 @@ def export_engine(model, im, file, train, half, simplify, workspace=4, verbose=F outputs = [network.get_output(i) for i in range(network.num_outputs)] LOGGER.info(f'{prefix} Network Description:') for inp in inputs: - # if not half: # default to FP32 input unless specified otherwise; then let TensorRT decide - # inp.dtype = trt.DataType.FLOAT LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}') for out in outputs: LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}') From 48c4debe68808bdd7f24f3c786f83b9e4471fc92 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 6 Mar 2022 21:20:03 +0100 Subject: [PATCH 15/15] Cleanup --- val.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/val.py b/val.py index e015efbc99f7..dfbfa3935210 100644 --- a/val.py +++ b/val.py @@ -143,11 +143,11 @@ def run(data, if pt or jit: model.model.half() if half else model.model.float() elif engine: + batch_size = model.batch_size if model.trt_fp16_input != half: LOGGER.info('model ' + ( 'requires' if model.trt_fp16_input else 'incompatible with') + ' --half. Adjusting automatically.') half = model.trt_fp16_input - batch_size = model.batch_size else: half = False batch_size = 1 # export.py models default to batch-size 1