From 7798401dae5f4b75c00492f2a1b7b025f0fb6a0e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 19:39:58 -0700 Subject: [PATCH 01/47] Autobatch --- utils/autobatch.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 utils/autobatch.py diff --git a/utils/autobatch.py b/utils/autobatch.py new file mode 100644 index 000000000000..993e731980fb --- /dev/null +++ b/utils/autobatch.py @@ -0,0 +1,37 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Auto-batch utils +""" + +import random + +import numpy as np +import torch +import yaml +from tqdm import tqdm + +from utils.general import colorstr + + +def autobatch(model, imgsz=640, fraction=0.8): + # Automatically compute optimal batch size to use `fraction` of available CUDA memory + prefix = colorstr('autobatch: ') + print(f'\n{prefix} Computing optimal batch size') + + t = torch.cuda.get_device_properties(0).total_memory / 1E9 # (GB) + r = torch.cuda.memory_reserved(0) / 1E9 # (GB) + a = torch.cuda.memory_allocated(0) / 1E9 # (GB) + f = r - a # free inside reserved + + try: + batch_sizes = [1, 2, 4, 8] + print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + except Exception as e: + print() + + + #x, y = zip(*x) + #p = np.polyfit(x, y) + + + return None From 38ee33c6f93aa11681677a07c3861d561d8f9138 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 19:46:08 -0700 Subject: [PATCH 02/47] fix mem --- utils/autobatch.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 993e731980fb..39b8d37c6bae 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -13,14 +13,14 @@ from utils.general import colorstr -def autobatch(model, imgsz=640, fraction=0.8): +def autobatch(model, imgsz=640, fraction=0.8, device=0): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'\n{prefix} Computing optimal batch size') - t = torch.cuda.get_device_properties(0).total_memory / 1E9 # (GB) - r = torch.cuda.memory_reserved(0) / 1E9 # (GB) - a = torch.cuda.memory_allocated(0) / 1E9 # (GB) + t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) + r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) + a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = r - a # free inside reserved try: From 9bab7579c660e5a49b55e7cbe311e9f5b304dffa Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 19:48:26 -0700 Subject: [PATCH 03/47] fix mem2 --- utils/autobatch.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 39b8d37c6bae..4ad204caa1fd 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -21,17 +21,16 @@ def autobatch(model, imgsz=640, fraction=0.8, device=0): t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) - f = r - a # free inside reserved + f = t - (r + a) # free inside reserved + batch_sizes = [1, 2, 4, 8] + x = [] try: - batch_sizes = [1, 2, 4, 8] print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') except Exception as e: print() - - #x, y = zip(*x) - #p = np.polyfit(x, y) - + # x, y = zip(*x) + # p = np.polyfit(x, y) return None From 8a0ee56f213e52f8dac5a3593ed1e50d3bb748d2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:03:45 -0700 Subject: [PATCH 04/47] Update --- utils/autobatch.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 4ad204caa1fd..6892b8332a42 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -3,17 +3,16 @@ Auto-batch utils """ -import random +from copy import deepcopy import numpy as np import torch -import yaml -from tqdm import tqdm from utils.general import colorstr +from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=640, fraction=0.8, device=0): +def autobatch(model, imgsz=64, fraction=0.8, device='cpu'): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'\n{prefix} Computing optimal batch size') @@ -22,15 +21,23 @@ def autobatch(model, imgsz=640, fraction=0.8, device=0): r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved + # f = 15.8 + print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8] - x = [] + model = deepcopy(de_parallel(model)).train() try: - print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] + y = profile(img, model, n=3, device=device) + y = [x[2] for x in y] # memory [2] except Exception as e: print() - # x, y = zip(*x) - # p = np.polyfit(x, y) + p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + f_intercept = int((f - p[0]) / p[1]) # optimal batch size + return f_intercept - return None + +model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) + +autobatch(model) From 8a68891d9fa6d846bba6f15517bef1d90e5aa795 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:05:19 -0700 Subject: [PATCH 05/47] Update --- utils/autobatch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 6892b8332a42..66e07f133578 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,11 +12,12 @@ from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=64, fraction=0.8, device='cpu'): +def autobatch(model, imgsz=64, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'\n{prefix} Computing optimal batch size') + device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) @@ -34,7 +35,7 @@ def autobatch(model, imgsz=64, fraction=0.8, device='cpu'): print() p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((f - p[0]) / p[1]) # optimal batch size + f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From 6f67028dda0242b5ca2fac4f586b1cf8cd92cfa5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:07:24 -0700 Subject: [PATCH 06/47] Update --- utils/autobatch.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 66e07f133578..a10d71a87c88 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,10 +12,10 @@ from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=64, fraction=0.9): +def autobatch(model, imgsz=640, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') - print(f'\n{prefix} Computing optimal batch size') + print(f'\n{prefix} Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) @@ -38,7 +38,4 @@ def autobatch(model, imgsz=64, fraction=0.9): f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept - -model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) - -autobatch(model) +# autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From ccd47a06a75c3ab33e98ee1f848c27814515798c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:14:22 -0700 Subject: [PATCH 07/47] Update --- utils/autobatch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index a10d71a87c88..9e013602d26e 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -15,7 +15,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') - print(f'\n{prefix} Computing optimal batch size for --imgsz {imgsz}') + print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) @@ -23,7 +23,7 @@ def autobatch(model, imgsz=640, fraction=0.9): a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved # f = 15.8 - print(f'\n{prefix} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8] model = deepcopy(de_parallel(model)).train() @@ -35,6 +35,7 @@ def autobatch(model, imgsz=640, fraction=0.9): print() p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + print(batch_sizes, y, p) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From 05d7860754395f072e73f033d576847029487a62 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:16:26 -0700 Subject: [PATCH 08/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 9e013602d26e..603d2930d203 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,7 +34,7 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + p = np.polyfit(batch_sizes, y, w=batch_sizes, deg=1) # first degree polynomial fit print(batch_sizes, y, p) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From 78cbd2a58aa03c7597f2e3ea6d3ba320ec6edfb4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:18:17 -0700 Subject: [PATCH 09/47] Update --- utils/autobatch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index 603d2930d203..b91000c951da 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -36,6 +36,7 @@ def autobatch(model, imgsz=640, fraction=0.9): p = np.polyfit(batch_sizes, y, w=batch_sizes, deg=1) # first degree polynomial fit print(batch_sizes, y, p) + print(np.polyval(p, batch_sizes)) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size return f_intercept From 3b34bd45c3af75694d3175337bdc7809d1b3dc4d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:19:45 -0700 Subject: [PATCH 10/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index b91000c951da..3dc08616b13d 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8] + batch_sizes = [1, 2, 4, 8, 16] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] From cc09ecf3ef1149b6666287753baa2a3abc912f38 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:21:34 -0700 Subject: [PATCH 11/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 3dc08616b13d..e209a7a1cb67 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16] + batch_sizes = [1, 2, 4, 8, 16, 32, 64] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] From 6d0b3e9e3334ee8b116ab03f564dffc2a4b75b00 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:22:41 -0700 Subject: [PATCH 12/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index e209a7a1cb67..2db2781b0a58 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32, 64] + batch_sizes = [1, 2, 4, 8, 16, 32] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] From 9282c21310604cf29e246649c9bcb371b8c972f7 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:25:33 -0700 Subject: [PATCH 13/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2db2781b0a58..35296542219a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,7 +34,7 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - p = np.polyfit(batch_sizes, y, w=batch_sizes, deg=1) # first degree polynomial fit + p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit print(batch_sizes, y, p) print(np.polyval(p, batch_sizes)) f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size From 45ddb57b0da1f40c4f3e3d14a0caaefc95ff8ff6 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:28:35 -0700 Subject: [PATCH 14/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 35296542219a..df8e4abc6cff 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -37,7 +37,7 @@ def autobatch(model, imgsz=640, fraction=0.9): p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit print(batch_sizes, y, p) print(np.polyval(p, batch_sizes)) - f_intercept = int((f * fraction - p[0]) / p[1]) # optimal batch size + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From b1a57d167a12c294553148112e89717d6c8c6134 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:29:55 -0700 Subject: [PATCH 15/47] Update --- utils/autobatch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index df8e4abc6cff..e95f6e75802a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,10 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - print(batch_sizes, y, p) - print(np.polyval(p, batch_sizes)) - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size + for i in range(2, 7): + p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size + print(f_intercept) + + return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 122733dcd1aa362c54e6e493781d319810eb41af Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:32:14 -0700 Subject: [PATCH 16/47] Update --- utils/autobatch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index e95f6e75802a..dbe6a64d273a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -25,7 +25,7 @@ def autobatch(model, imgsz=640, fraction=0.9): # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32] + batch_sizes = [1, 2, 4, 8, 16, 64] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] @@ -34,12 +34,13 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print() - for i in range(2, 7): + + print(y) + for i in range(2, len(batch_sizes)): p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size print(f_intercept) - return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 13c4996143107cea4da61e0f8eb029b4cb4d4a7a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:34:55 -0700 Subject: [PATCH 17/47] Update --- utils/autobatch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index dbe6a64d273a..7845bd44cf18 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -30,11 +30,11 @@ def autobatch(model, imgsz=640, fraction=0.9): try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) - y = [x[2] for x in y] # memory [2] + y = [x[2] for x in y if y] # memory [2] except Exception as e: - print() - + print((f'{prefix}{e}) + batch_sizes = batch_sizes[:len(y)] print(y) for i in range(2, len(batch_sizes)): p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit From bd34ab884182dd7aa754e3097f13ba1decefb6d5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:35:53 -0700 Subject: [PATCH 18/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 7845bd44cf18..1dcdccfdb65e 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -32,7 +32,7 @@ def autobatch(model, imgsz=640, fraction=0.9): y = profile(img, model, n=3, device=device) y = [x[2] for x in y if y] # memory [2] except Exception as e: - print((f'{prefix}{e}) + print(f'{prefix}{e}') batch_sizes = batch_sizes[:len(y)] print(y) From bbe56b8ef63f46b1440498d8be58285e12ae2452 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:37:33 -0700 Subject: [PATCH 19/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 1dcdccfdb65e..fa8df008ab37 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -30,10 +30,10 @@ def autobatch(model, imgsz=640, fraction=0.9): try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) - y = [x[2] for x in y if y] # memory [2] except Exception as e: print(f'{prefix}{e}') + y = [x[2] for x in y if y] # memory [2] batch_sizes = batch_sizes[:len(y)] print(y) for i in range(2, len(batch_sizes)): From aef68c99b78fbd64ff6e349b7e0517f5ba451dea Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:38:43 -0700 Subject: [PATCH 20/47] Update --- utils/autobatch.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index fa8df008ab37..2b8152e47ed6 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -33,9 +33,8 @@ def autobatch(model, imgsz=640, fraction=0.9): except Exception as e: print(f'{prefix}{e}') - y = [x[2] for x in y if y] # memory [2] + y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] - print(y) for i in range(2, len(batch_sizes)): p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size From 3faf0558b5d65800486641fb518e9a7435510c04 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:44:58 -0700 Subject: [PATCH 21/47] Update --- utils/autobatch.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2b8152e47ed6..bcf6332b7547 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,7 +12,7 @@ from utils.torch_utils import de_parallel, profile -def autobatch(model, imgsz=640, fraction=0.9): +def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') @@ -22,10 +22,9 @@ def autobatch(model, imgsz=640, fraction=0.9): r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved - # f = 15.8 print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 64] + batch_sizes = [1, 2, 4, 8, 16] model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] @@ -35,11 +34,8 @@ def autobatch(model, imgsz=640, fraction=0.9): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] - for i in range(2, len(batch_sizes)): - p = np.polyfit(batch_sizes[:i], y[:i], deg=1) # first degree polynomial fit - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f_intercept) - + p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 831593b63104e71d3368f4bb93fa358c731aa7a3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:45:22 -0700 Subject: [PATCH 22/47] Update --- utils/autobatch.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index bcf6332b7547..b9f722869158 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -16,7 +16,7 @@ def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') - + model = deepcopy(de_parallel(model)).train() device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) @@ -25,7 +25,6 @@ def autobatch(model, imgsz=640, fraction=0.95): print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8, 16] - model = deepcopy(de_parallel(model)).train() try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From 65e3bf6d19a51085c85dd858ed5feae13351dd7a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 20:46:31 -0700 Subject: [PATCH 23/47] Update --- utils/autobatch.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index b9f722869158..b0c89311ffc1 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -14,6 +14,12 @@ def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory + # Usage: + # import torch + # from utils.autobatch import autobatch + # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) + # print(autobatch(model)) + prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') model = deepcopy(de_parallel(model)).train() From 6a0c4d21e08438f7cc8168679df760de5353360c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 6 Oct 2021 22:33:18 -0700 Subject: [PATCH 24/47] Update --- utils/autobatch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index b0c89311ffc1..f344156409bb 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -12,6 +12,14 @@ from utils.torch_utils import de_parallel, profile +def check_batch_size(model, imgsz=640, b=16): + # Check YOLOv5 batch size + assert isinstance(b, int), f'batch-size {b} must be integer' + if b < 1: + b = autobatch(model, imgsz) # compute optimal batch size + return b + + def autobatch(model, imgsz=640, fraction=0.95): # Automatically compute optimal batch size to use `fraction` of available CUDA memory # Usage: From 6fa983403504835eba3570dbd700e2a08f3aea95 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:01:15 -0700 Subject: [PATCH 25/47] Update train.py --- train.py | 15 ++++++++++----- utils/autobatch.py | 3 +-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/train.py b/train.py index da7346be77ab..ed33f506c17d 100644 --- a/train.py +++ b/train.py @@ -36,6 +36,7 @@ from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors +from utils.autobatch import check_batch_size from utils.datasets import create_dataloader from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \ @@ -131,6 +132,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary print(f'freezing {k}') v.requires_grad = False + # Image size + gs = max(int(model.stride.max()), 32) # grid size (max stride) + imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple + + # Batch size + if cuda and RANK == -1: # single-GPU only + batch_size = check_batch_size(model, batch_size, imgsz) + # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing @@ -190,11 +199,6 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary del ckpt, csd - # Image sizes - gs = max(int(model.stride.max()), 32) # grid size (max stride) - nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) - imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple - # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n' @@ -242,6 +246,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) # Model parameters + nl = model.model[-1].nl # number of detection layers (to scale hyps) hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers diff --git a/utils/autobatch.py b/utils/autobatch.py index f344156409bb..89cd5b53c40c 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -14,8 +14,7 @@ def check_batch_size(model, imgsz=640, b=16): # Check YOLOv5 batch size - assert isinstance(b, int), f'batch-size {b} must be integer' - if b < 1: + if b < 1 or b == 'auto': b = autobatch(model, imgsz) # compute optimal batch size return b From 888f55c700126fa2e7e4d25a63f417f6e507a3bd Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:15:34 -0700 Subject: [PATCH 26/47] print result --- utils/autobatch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/autobatch.py b/utils/autobatch.py index 89cd5b53c40c..2834fb035020 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,6 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size + print(f'{prefix} batch-size {f_intercept} estimated to use f{fraction * 100}%% of CUDA device {device} memory') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From d2f47bcba8888ead4715682f7edebcf9aa9ce29c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:17:55 -0700 Subject: [PATCH 27/47] Cleanup print result --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2834fb035020..8e5ce7ddd238 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,7 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix} batch-size {f_intercept} estimated to use f{fraction * 100}%% of CUDA device {device} memory') + print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of CUDA:{device} memory') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From c94026a7c082ed01bf8e3088e14829a83da49608 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:34:05 -0700 Subject: [PATCH 28/47] swap fix in call --- train.py | 2 +- utils/autobatch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index ed33f506c17d..b7b542a38b46 100644 --- a/train.py +++ b/train.py @@ -138,7 +138,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only - batch_size = check_batch_size(model, batch_size, imgsz) + batch_size = check_batch_size(model, imgsz, batch_size) # Optimizer nbs = 64 # nominal batch size diff --git a/utils/autobatch.py b/utils/autobatch.py index 8e5ce7ddd238..5ea1978fd999 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,7 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of CUDA:{device} memory') + print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of {str(device).upper()} memory') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From afdfcfba49fe7ce832b025e4876f203d1b59f709 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:36:06 -0700 Subject: [PATCH 29/47] to 64 --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 5ea1978fd999..03484969f988 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -37,7 +37,7 @@ def autobatch(model, imgsz=640, fraction=0.95): f = t - (r + a) # free inside reserved print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16] + batch_sizes = [1, 2, 4, 8, 16, 32, 64] try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From ab7cc125b68b2018f61935a39cf824e219495381 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:41:40 -0700 Subject: [PATCH 30/47] use total --- utils/autobatch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 03484969f988..404f81af3dee 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -47,8 +47,9 @@ def autobatch(model, imgsz=640, fraction=0.95): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {fraction * 100}% of {str(device).upper()} memory') + f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size + print(f'{prefix}batch-size {f_intercept} estimated to utilize {f_intercept:3}G of ' + f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0g})') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From a036dd4291d85a288a501aca4ef3a5807463337e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:44:34 -0700 Subject: [PATCH 31/47] fix --- utils/autobatch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 404f81af3dee..6838eeea45ee 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,8 +48,8 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {f_intercept:3}G of ' - f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0g})') + print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:3}G of ' + f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0f}%)') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From f6f80eda42eb198561b0a8d1299a6a71b2cae311 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:50:28 -0700 Subject: [PATCH 32/47] fix --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 6838eeea45ee..5f3f2a781287 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,7 +48,7 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:3}G of ' + print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:.3g}G of ' f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0f}%)') return f_intercept From e601f42d30cd200542cc79852cb8f6b571c1dd49 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:53:55 -0700 Subject: [PATCH 33/47] fix --- utils/autobatch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 5f3f2a781287..b8676a0544a6 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -48,8 +48,8 @@ def autobatch(model, imgsz=640, fraction=0.95): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize {t * fraction:.3g}G of ' - f'{str(device).upper()} {t:.3g}G ({fraction * 100:.0f}%)') + print(f'{prefix}batch-size {f_intercept} estimated to utilize ' + f'{str(device).upper()} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') return f_intercept # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From f55ad0b8ad12ee22f3b0ed897fddecf61fb442e0 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:55:41 -0700 Subject: [PATCH 34/47] fix --- train.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index b7b542a38b46..f2465366f62d 100644 --- a/train.py +++ b/train.py @@ -138,7 +138,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only - batch_size = check_batch_size(model, imgsz, batch_size) + with amp.autocast(): + batch_size = check_batch_size(model, imgsz, batch_size) # Optimizer nbs = 64 # nominal batch size From 58ed6afb5eaaddef6cea89971a9900040fc97efd Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 19:57:34 -0700 Subject: [PATCH 35/47] fix --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index b8676a0544a6..03b4f1f62d1a 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -47,7 +47,7 @@ def autobatch(model, imgsz=640, fraction=0.95): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((t * fraction - p[1]) / p[0]) # optimal batch size + f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size print(f'{prefix}batch-size {f_intercept} estimated to utilize ' f'{str(device).upper()} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') return f_intercept From 18f5dd322cbd3f08a5e132e0b00f74d9664a7b68 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:10:49 -0700 Subject: [PATCH 36/47] Update --- train.py | 3 ++- utils/autobatch.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index f2465366f62d..f4f54cf7ec7b 100644 --- a/train.py +++ b/train.py @@ -139,7 +139,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only with amp.autocast(): - batch_size = check_batch_size(model, imgsz, batch_size) + batch_size = check_batch_size(deepcopy(model).eval(), imgsz, batch_size) + batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer nbs = 64 # nominal batch size diff --git a/utils/autobatch.py b/utils/autobatch.py index 03b4f1f62d1a..33c0d2d9a68c 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -29,7 +29,6 @@ def autobatch(model, imgsz=640, fraction=0.95): prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') - model = deepcopy(de_parallel(model)).train() device = next(model.parameters()).device # get model device t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) From 4b39534b39ccf0d17b47e22f6efc01be1a96e381 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:16:36 -0700 Subject: [PATCH 37/47] Update --- train.py | 2 +- utils/torch_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index f4f54cf7ec7b..01b6aacb16ee 100644 --- a/train.py +++ b/train.py @@ -139,7 +139,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only with amp.autocast(): - batch_size = check_batch_size(deepcopy(model).eval(), imgsz, batch_size) + check_batch_size(deepcopy(model).eval(), imgsz, batch_size) batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer diff --git a/utils/torch_utils.py b/utils/torch_utils.py index d1c48f73ea72..6f52f9a3728d 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -126,7 +126,7 @@ def profile(input, ops, n=10, device=None): _ = (sum([yi.sum() for yi in y]) if isinstance(y, list) else y).sum().backward() t[2] = time_sync() except Exception as e: # no backward method - print(e) + # print(e) # for debug t[2] = float('nan') tf += (t[1] - t[0]) * 1000 / n # ms per op forward tb += (t[2] - t[1]) * 1000 / n # ms per op backward From 1c9b42ad030365fab111f2c75825523dfd0773c3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:17:46 -0700 Subject: [PATCH 38/47] Update --- train.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/train.py b/train.py index 01b6aacb16ee..7760c826cf7b 100644 --- a/train.py +++ b/train.py @@ -138,8 +138,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only + + model2 = deepcopy(model).eval() + for k, v in model2.named_parameters(): + v.requires_grad = True # train all layers + check_batch_size(model2.eval(), imgsz, batch_size) + with amp.autocast(): - check_batch_size(deepcopy(model).eval(), imgsz, batch_size) + check_batch_size(model2.eval(), imgsz, batch_size) batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer From 5c2e2356d29690aa1c79a42fd2109ca84e0d09eb Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:23:36 -0700 Subject: [PATCH 39/47] Update --- train.py | 7 ------- utils/autobatch.py | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/train.py b/train.py index 7760c826cf7b..dfe997b1bde2 100644 --- a/train.py +++ b/train.py @@ -138,14 +138,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Batch size if cuda and RANK == -1: # single-GPU only - - model2 = deepcopy(model).eval() - for k, v in model2.named_parameters(): - v.requires_grad = True # train all layers - check_batch_size(model2.eval(), imgsz, batch_size) - with amp.autocast(): - check_batch_size(model2.eval(), imgsz, batch_size) batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) # Optimizer diff --git a/utils/autobatch.py b/utils/autobatch.py index 33c0d2d9a68c..d94dabc47981 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -36,7 +36,7 @@ def autobatch(model, imgsz=640, fraction=0.95): f = t - (r + a) # free inside reserved print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32, 64] + batch_sizes = [1, 2, 4, 8, 16, 32] try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From 08f8e1740f4b671ff6a64aea6e1c7792a3fa5f5b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:25:24 -0700 Subject: [PATCH 40/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index d94dabc47981..73ffe68b628f 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -19,7 +19,7 @@ def check_batch_size(model, imgsz=640, b=16): return b -def autobatch(model, imgsz=640, fraction=0.95): +def autobatch(model, imgsz=640, fraction=0.9): # Automatically compute optimal batch size to use `fraction` of available CUDA memory # Usage: # import torch From a9c00fae43f0be6351f9caf0668827344df6be3e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:35:13 -0700 Subject: [PATCH 41/47] Update --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 73ffe68b628f..3f97140c1a29 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -36,7 +36,7 @@ def autobatch(model, imgsz=640, fraction=0.9): f = t - (r + a) # free inside reserved print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') - batch_sizes = [1, 2, 4, 8, 16, 32] + batch_sizes = [1, 2, 4, 8, 16] try: img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes] y = profile(img, model, n=3, device=device) From ccabcd36d1c3f182f2bee7193f9a7653a95f4058 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 20:54:23 -0700 Subject: [PATCH 42/47] Update --- train.py | 9 ++++----- utils/autobatch.py | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/train.py b/train.py index dfe997b1bde2..d83f3cd1863c 100644 --- a/train.py +++ b/train.py @@ -36,7 +36,7 @@ from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors -from utils.autobatch import check_batch_size +from utils.autobatch import check_train_batch_size from utils.datasets import create_dataloader from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \ @@ -137,9 +137,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple # Batch size - if cuda and RANK == -1: # single-GPU only - with amp.autocast(): - batch_size = check_batch_size(deepcopy(model).train(), imgsz, batch_size) + if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size + batch_size = check_train_batch_size(model, imgsz) # Optimizer nbs = 64 # nominal batch size @@ -446,7 +445,7 @@ def parse_opt(known=False): parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') diff --git a/utils/autobatch.py b/utils/autobatch.py index 3f97140c1a29..22a8c59040c8 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -7,20 +7,20 @@ import numpy as np import torch +from torch.cuda import amp from utils.general import colorstr -from utils.torch_utils import de_parallel, profile +from utils.torch_utils import profile -def check_batch_size(model, imgsz=640, b=16): - # Check YOLOv5 batch size - if b < 1 or b == 'auto': - b = autobatch(model, imgsz) # compute optimal batch size - return b +def check_train_batch_size(model, imgsz=640): + # Check YOLOv5 training batch size + with amp.autocast(): + return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size -def autobatch(model, imgsz=640, fraction=0.9): - # Automatically compute optimal batch size to use `fraction` of available CUDA memory +def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): + # Automatically estimate best batch size to use `fraction` of available CUDA memory # Usage: # import torch # from utils.autobatch import autobatch @@ -30,6 +30,10 @@ def autobatch(model, imgsz=640, fraction=0.9): prefix = colorstr('autobatch: ') print(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') device = next(model.parameters()).device # get model device + if device.type == 'cpu': + print(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') + return batch_size + t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) From af25dbcd135070d169507c3b141b70fcf3e45ac6 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 7 Oct 2021 21:10:04 -0700 Subject: [PATCH 43/47] Cleanup printing --- utils/autobatch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 22a8c59040c8..cf65502d5608 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -34,11 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): print(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') return batch_size + d = str(device).upper() # 'CUDA:0' t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) f = t - (r + a) # free inside reserved - print(f'{prefix}{t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + print(f'{prefix}{d} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') batch_sizes = [1, 2, 4, 8, 16] try: @@ -50,9 +51,8 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): y = [x[2] for x in y if x] # memory [2] batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit - f_intercept = int((f * fraction - p[1]) / p[0]) # optimal batch size - print(f'{prefix}batch-size {f_intercept} estimated to utilize ' - f'{str(device).upper()} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') - return f_intercept + b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) + print(f'{prefix}batch-size {b} estimated to utilize {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') + return b # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 3e1c74fc424454b4abf3c6b2cc2b58bb212a4e7b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 25 Oct 2021 13:42:06 +0200 Subject: [PATCH 44/47] Update final printout --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index cf65502d5608..d46596e25935 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -52,7 +52,7 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) - print(f'{prefix}batch-size {b} estimated to utilize {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') + print(f'Using {prefix}batch-size {b} for {fraction * 100:.0f}% estimated {d} utilization {t * fraction:.3g}G/{t:.3g}G') return b # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 602cf9ae4148057d699e36b3fc909972c3a28f56 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 25 Oct 2021 13:43:38 +0200 Subject: [PATCH 45/47] Update autobatch.py --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index d46596e25935..8ed25afaa2fe 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -52,7 +52,7 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) - print(f'Using {prefix}batch-size {b} for {fraction * 100:.0f}% estimated {d} utilization {t * fraction:.3g}G/{t:.3g}G') + print(f'{prefix}Using batch-size {b} for {d} {t * fraction:.3g}G/{t:.3g}G, {fraction * 100:.0f}% utilization') return b # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 126c13abef951b506b0337af768c768ebe51082b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 25 Oct 2021 13:48:14 +0200 Subject: [PATCH 46/47] Update autobatch.py --- utils/autobatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 8ed25afaa2fe..2584e93394b0 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -52,7 +52,7 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) - print(f'{prefix}Using batch-size {b} for {d} {t * fraction:.3g}G/{t:.3g}G, {fraction * 100:.0f}% utilization') + print(f'{prefix}Using batch-size {b} for {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') return b # autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)) From 327954fa3597efd0e080b1f83508fa382ef4b5bf Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 25 Oct 2021 13:50:39 +0200 Subject: [PATCH 47/47] Update autobatch.py --- utils/autobatch.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/utils/autobatch.py b/utils/autobatch.py index 2584e93394b0..168b16f691ab 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -52,7 +52,5 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) - print(f'{prefix}Using batch-size {b} for {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') + print(f'{prefix}Using colorstr(batch-size {b}) for {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') return b - -# autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False))