diff --git a/CHANGELOG.md b/CHANGELOG.md index 05076576..b87313e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changed + - [issues/96](https://github.com/podaac/concise/issues/96): + - impelemented sorting after [multi_core_download](https://github.com/podaac/concise/blob/23b44803f4829c1eb7e9d39b311a0373092daab3/podaac/merger/harmony/download_worker.py#L15) to preserve the input file order in the ### Deprecated ### Removed ### Fixed diff --git a/podaac/merger/harmony/download_worker.py b/podaac/merger/harmony/download_worker.py index 465b46ed..f4f3e2c9 100644 --- a/podaac/merger/harmony/download_worker.py +++ b/podaac/merger/harmony/download_worker.py @@ -44,8 +44,8 @@ def multi_core_download(urls, destination_dir, access_token, cfg, process_count= url_queue = manager.Queue(len(urls)) path_list = manager.list() - for url in urls: - url_queue.put(url) + for iurl, url in enumerate(urls): + url_queue.put((iurl, url)) # Spawn worker processes processes = [] @@ -64,7 +64,7 @@ def multi_core_download(urls, destination_dir, access_token, cfg, process_count= path_list = deepcopy(path_list) # ensure GC can cleanup multiprocessing - return [Path(path) for path in path_list] + return [Path(path) for ipath, path in sorted(path_list)] def _download_worker(url_queue, path_list, destination_dir, access_token, cfg): @@ -91,7 +91,7 @@ def _download_worker(url_queue, path_list, destination_dir, access_token, cfg): while not url_queue.empty(): try: - url = url_queue.get_nowait() + iurl, url = url_queue.get_nowait() except queue.Empty: break @@ -105,4 +105,4 @@ def _download_worker(url_queue, path_list, destination_dir, access_token, cfg): else: logger.warning('Origin filename could not be assertained - %s', url) - path_list.append(str(path)) + path_list.append((iurl, str(path)))