Skip to content

Commit

Permalink
Bugfix #1939 develop - failure reading obs when zipped file also exis…
Browse files Browse the repository at this point in the history
…ts (#1941)
  • Loading branch information
georgemccabe authored Nov 15, 2022
1 parent 8d1a28f commit 90d7c35
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 17 deletions.
Binary file added internal/tests/data/obs/20180201_0045.gz
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,19 @@ def get_data_dir(config):


@pytest.mark.parametrize(
'data_type', [
("FCST_"),
("OBS_"),
(""),
("MASK_"),
]
'data_type,allow_multiple', [
("FCST_", False),
("OBS_", False),
("", False),
("MASK_", False),
("FCST_", True),
("OBS_", True),
("", True),
("MASK_", True),
]
)
@pytest.mark.wrapper
def test_find_data_no_dated(metplus_config, data_type):
def test_find_data_no_dated(metplus_config, data_type, allow_multiple):
config = metplus_config

pcw = CommandBuilder(config)
Expand All @@ -39,8 +43,10 @@ def test_find_data_no_dated(metplus_config, data_type):
pcw.c_dict[f'{data_type}FILE_WINDOW_END'] = 3600
pcw.c_dict[f'{data_type}INPUT_DIR'] = get_data_dir(pcw.config)
pcw.c_dict[f'{data_type}INPUT_TEMPLATE'] = "{valid?fmt=%Y%m%d}_{valid?fmt=%H%M}"
pcw.c_dict[f'ALLOW_MULTIPLE_FILES'] = allow_multiple
add_field_info_to_time_info(time_info, var_info)
obs_file = pcw.find_data(time_info, data_type)
assert not isinstance(obs_file, list)
assert obs_file == pcw.c_dict[f'{data_type}INPUT_DIR']+'/20180201_0045'


Expand Down
24 changes: 18 additions & 6 deletions metplus/wrappers/command_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from .command_runner import CommandRunner

from ..util.constants import PYTHON_EMBEDDING_TYPES
from ..util.constants import PYTHON_EMBEDDING_TYPES, COMPRESSION_EXTENSIONS
from ..util import getlist, preprocess_file, loop_over_times_and_call
from ..util import do_string_sub, ti_calculate, get_seconds_from_string
from ..util import get_time_from_file, shift_time_seconds
Expand Down Expand Up @@ -736,25 +736,37 @@ def find_file_in_window(self, level, data_type, time_info, mandatory=True,
closest_files.append(fullpath)

if not closest_files:
msg = f"Could not find {data_type}INPUT files under {data_dir} within range " +\
f"[{valid_range_lower},{valid_range_upper}] using template {template}"
msg = (f"Could not find {data_type}INPUT files under {data_dir} within range "
f"[{valid_range_lower},{valid_range_upper}] using template {template}")
if not mandatory:
self.logger.warning(msg)
else:
self.log_error(msg)

return None

# remove any files that are the same as another but zipped
closest_files_fixed = []
for filepath in closest_files:
duplicate_found = False
for ext in COMPRESSION_EXTENSIONS:
if filepath.endswith(ext) and filepath[0:-len(ext)] in closest_files:
duplicate_found = True
continue

if not duplicate_found:
closest_files_fixed.append(filepath)

# check if file(s) needs to be preprocessed before returning the path
# if one file was found and return_list if False, return single file
if len(closest_files) == 1 and not return_list:
return preprocess_file(closest_files[0],
if len(closest_files_fixed) == 1 and not return_list:
return preprocess_file(closest_files_fixed[0],
self.c_dict.get(data_type + 'INPUT_DATATYPE', ''),
self.config)

# return list if multiple files are found
out = []
for close_file in closest_files:
for close_file in closest_files_fixed:
outfile = preprocess_file(close_file,
self.c_dict.get(data_type + 'INPUT_DATATYPE', ''),
self.config)
Expand Down
10 changes: 6 additions & 4 deletions metplus/wrappers/ensemble_stat_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,19 +424,21 @@ def run_at_time_all_fields(self, time_info):

# get point observation file if requested
if self.c_dict['OBS_POINT_INPUT_TEMPLATE']:
point_obs_path = self.find_data(time_info, data_type='OBS_POINT')
point_obs_path = self.find_data(time_info, data_type='OBS_POINT',
return_list=True)
if point_obs_path is None:
return

self.point_obs_files.append(point_obs_path)
self.point_obs_files.extend(point_obs_path)

# get grid observation file if requested
if self.c_dict['OBS_GRID_INPUT_TEMPLATE']:
grid_obs_path = self.find_data(time_info, data_type='OBS_GRID')
grid_obs_path = self.find_data(time_info, data_type='OBS_GRID',
return_list=True)
if grid_obs_path is None:
return

self.grid_obs_files.append(grid_obs_path)
self.grid_obs_files.extend(grid_obs_path)

# parse optional var list for FCST and/or OBS fields
var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info)
Expand Down

0 comments on commit 90d7c35

Please sign in to comment.