From 7001eb69f94f26d725d864380dd721272d113c85 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Wed, 9 Jun 2021 10:56:11 +0200 Subject: [PATCH] Update `dataset_stats()` for HUB (#3536) * Update `dataset_stats()` for HUB Cleanup of 03b286e * autodownload flag * Update general.py * cleanup --- utils/datasets.py | 11 +++++------ utils/general.py | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 7c74d2c01322..108005c8de65 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -1086,18 +1086,17 @@ def verify_image_label(params): return [None] * 4 + [nm, nf, ne, nc] -def dataset_stats(path='data/coco128.yaml', verbose=False): +def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False): """ Return dataset statistics dictionary with images and instances counts per split per class - Usage: from utils.datasets import *; dataset_stats('data/coco128.yaml') + Usage: from utils.datasets import *; dataset_stats('coco128.yaml', verbose=True) Arguments path: Path to data.yaml + autodownload: Attempt to download dataset if not found locally verbose: Print stats dictionary """ - path = check_file(Path(path)) - with open(path) as f: + with open(check_file(Path(path))) as f: data = yaml.safe_load(f) # data dict - check_dataset(data) # download dataset if missing - + check_dataset(data, autodownload) # download dataset if missing nc = data['nc'] # number of classes stats = {'nc': nc, 'names': data['names']} # statistics dictionary for split in 'train', 'val', 'test': diff --git a/utils/general.py b/utils/general.py index a12b0aafba0e..367f30b925f4 100755 --- a/utils/general.py +++ b/utils/general.py @@ -220,14 +220,14 @@ def check_file(file): return files[0] # return file -def check_dataset(dict): +def check_dataset(data, autodownload=True): # Download dataset if not found locally - val, s = dict.get('val'), dict.get('download') + val, s = data.get('val'), data.get('download') if val and len(val): val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path if not all(x.exists() for x in val): print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) - if s and len(s): # download script + if s and len(s) and autodownload: # download script if s.startswith('http') and s.endswith('.zip'): # URL f = Path(s).name # filename print(f'Downloading {s} ...')