From 69a284de472a249781b8a86c2d68b1bb0267f1b9 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 16 Jan 2023 11:46:55 +0100 Subject: [PATCH] Fix issues with searching for files on ESGF (#1863) --- esmvalcore/esgf/_download.py | 10 +++++++--- tests/unit/esgf/test_search.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/esmvalcore/esgf/_download.py b/esmvalcore/esgf/_download.py index ce4a0e8fc3..9dd10fb397 100644 --- a/esmvalcore/esgf/_download.py +++ b/esmvalcore/esgf/_download.py @@ -224,7 +224,7 @@ def same_file(result): # cmip5.output1.ICHEC.EC-EARTH.historical # .mon.atmos.Amon.r1i1p1.v20121115 variable = file.name.split('_')[0] - if 'variable' in facets and facets['variable'] == variable: + if 'variable' not in facets or facets['variable'] == variable: files.append(file) else: logger.debug( @@ -279,8 +279,12 @@ def _get_facets(results): facets = { 'project': project, } - for idx, key in enumerate(keys): - facets[key] = values[idx] + if len(keys) == len(values): + for idx, key in enumerate(keys): + facets[key] = values[idx] + else: + logger.debug("Wrong dataset_id_template_ %s for dataset %s", + template, dataset_id) # The dataset_id does not contain the short_name for all projects, # so get it from the filename if needed: if 'short_name' not in facets: diff --git a/tests/unit/esgf/test_search.py b/tests/unit/esgf/test_search.py index ef38dd952d..52959f2b8a 100644 --- a/tests/unit/esgf/test_search.py +++ b/tests/unit/esgf/test_search.py @@ -372,6 +372,31 @@ def test_select_by_time_nodate(): assert result == files +def test_invalid_dataset_id_template(): + dataset_id = ( + 'obs4MIPs.IUP.XCH4_CRDP3.xch4.mon.v100') + dataset_id_template = ( + '%(project)s.%(institute)s.%(source_id)s.%(time_frequency)s' + ) + filenames = ['xch4_ghgcci_l3_v100_200301_201412.nc'] + results = [ + FileResult( + json={ + 'title': filename, + 'dataset_id': dataset_id + '|esgf.ceda.ac.uk', + 'dataset_id_template_': [dataset_id_template], + 'project': ['obs4MIPs'], + 'size': 100, + 'source_id': 'XCH4_CRDP3', + }, + context=None, + ) for filename in filenames + ] + file = ESGFFile(results) + + assert file.name == filenames[0] + + def test_search_unknown_project(): project = 'Unknown' msg = (f"Unable to download from ESGF, because project {project} is not on"