diff --git a/metplus/util/system_util.py b/metplus/util/system_util.py index 29f4e8bc1..55f2f964a 100644 --- a/metplus/util/system_util.py +++ b/metplus/util/system_util.py @@ -121,26 +121,19 @@ def prune_empty(output_dir, logger): """ # Check for empty files. - for root, dirs, files in os.walk(output_dir): - # Create a full file path by joining the path - # and filename. - for a_file in files: - a_file = os.path.join(root, a_file) - if os.stat(a_file).st_size == 0: - logger.debug("Empty file: " + a_file + - "...removing") - os.remove(a_file) + for a_file in traverse_dir(output_dir): + if os.stat(a_file).st_size == 0: + logger.debug("Empty file: " + a_file + + "...removing") + os.remove(a_file) # Now check for any empty directories, some # may have been created when removing # empty files. - for root, dirs, files in os.walk(output_dir): - for direc in dirs: - full_dir = os.path.join(root, direc) - if not os.listdir(full_dir): - logger.debug("Empty directory: " + full_dir + - "...removing") - os.rmdir(full_dir) + for full_dir in traverse_dir(output_dir, get_dirs=True): + if not os.listdir(full_dir): + logger.debug("Empty directory: " + full_dir + "...removing") + os.rmdir(full_dir) def get_files(filedir, filename_regex): @@ -353,3 +346,21 @@ def preprocess_file(filename, data_type, config, allow_dir=False): return filename return None + + +def traverse_dir(data_dir, get_dirs=False): + """!Generator used to navigate through and yield full path to all files or + directories under data_dir. + + @param data_dir directory to traverse + @param get_dirs If True, get all directories under data_dir. If False, get + all files under data_dir. Defaults to False (files). + """ + for dir_path, dirs, all_files in os.walk(data_dir, followlinks=True): + if get_dirs: + items = sorted(dirs) + else: + items = sorted(all_files) + + for dir_name in items: + yield os.path.join(dir_path, dir_name) diff --git a/metplus/wrappers/command_builder.py b/metplus/wrappers/command_builder.py index e198e4798..5a18ff9be 100755 --- a/metplus/wrappers/command_builder.py +++ b/metplus/wrappers/command_builder.py @@ -29,7 +29,7 @@ from ..util import get_wrapper_name, is_python_script from ..util.met_config import add_met_config_dict, handle_climo_dict from ..util import mkdir_p, get_skip_times -from ..util import get_log_path, RunArgs, run_cmd +from ..util import get_log_path, RunArgs, run_cmd, traverse_dir # pylint:disable=pointless-string-statement @@ -595,9 +595,9 @@ def _find_exact_file(self, data_type, time_info, mandatory=True, return found_files def _is_optional_input(self, mandatory): - return (not mandatory - or not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)) + return (not self.c_dict.get('MANDATORY', True) + or self.c_dict.get('ALLOW_MISSING_INPUTS', False) + or not mandatory) def _log_message_dynamic_level(self, msg, mandatory): """!Log message based on rules. If mandatory input and missing inputs @@ -741,18 +741,7 @@ def _find_file_in_window(self, data_type, time_info, mandatory=True, if not closest_files: msg = (f"Could not find {data_type}INPUT files under {data_dir} within range " f"[{valid_range_lower},{valid_range_upper}] using template {template}") - if (not mandatory - or not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)): - - if self.c_dict.get('SUPPRESS_WARNINGS', False): - self.logger.debug(msg) - else: - self.logger.warning(msg) - - else: - self.log_error(msg) - + self._log_message_dynamic_level(msg, mandatory) return None # remove any files that are the same as another but zipped @@ -800,41 +789,38 @@ def _get_closest_files(self, data_dir, template, valid_time, "%Y%m%d%H%M%S").strftime("%s")) # step through all files under input directory in sorted order - for dirpath, _, all_files in os.walk(data_dir, followlinks=True): - for filename in sorted(all_files): - fullpath = os.path.join(dirpath, filename) - - # remove input data directory to get relative path - rel_path = fullpath.replace(f'{data_dir}/', "") - # extract time information from relative path using template - file_time_info = get_time_from_file(rel_path, template, - self.logger) - if file_time_info is None: - continue + for fullpath in traverse_dir(data_dir): + # remove input data directory to get relative path + rel_path = fullpath.replace(f'{data_dir}/', "") + # extract time information from relative path using template + file_time_info = get_time_from_file(rel_path, template, + self.logger) + if file_time_info is None: + continue - # get valid time and check if it is within the time range - file_valid_time = file_time_info['valid'].strftime("%Y%m%d%H%M%S") - # skip if could not extract valid time - if not file_valid_time: - continue - file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M%S") - file_valid_seconds = int(file_valid_dt.strftime("%s")) - # skip if outside time range - if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: - continue + # get valid time and check if it is within the time range + file_valid_time = file_time_info['valid'].strftime("%Y%m%d%H%M%S") + # skip if could not extract valid time + if not file_valid_time: + continue + file_valid_dt = datetime.strptime(file_valid_time, "%Y%m%d%H%M%S") + file_valid_seconds = int(file_valid_dt.strftime("%s")) + # skip if outside time range + if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit: + continue - # if multiple files are allowed, get all files within range - if self.c_dict.get('ALLOW_MULTIPLE_FILES', False): - closest_files.append(fullpath) - continue + # if multiple files are allowed, get all files within range + if self.c_dict.get('ALLOW_MULTIPLE_FILES', False): + closest_files.append(fullpath) + continue - # if only 1 file is allowed, check if file is - # closer to desired valid time than previous match - diff = abs(valid_seconds - file_valid_seconds) - if diff < closest_time: - closest_time = diff - del closest_files[:] - closest_files.append(fullpath) + # if only 1 file is allowed, check if file is + # closer to desired valid time than previous match + diff = abs(valid_seconds - file_valid_seconds) + if diff < closest_time: + closest_time = diff + del closest_files[:] + closest_files.append(fullpath) return closest_files @@ -878,11 +864,7 @@ def find_input_files_ensemble(self, time_info, fill_missing=True): input_files = self.find_model(time_info, return_list=True, mandatory=False) if not input_files: msg = "Could not find any input files" - if (not self.c_dict.get('MANDATORY', True) - or self.c_dict.get('ALLOW_MISSING_INPUTS', False)): - self.logger.warning(msg) - else: - self.log_error(msg) + self._log_message_dynamic_level(msg, True) return False # if control file is requested, remove it from input list @@ -1033,15 +1015,7 @@ def find_and_check_output_file(self, time_info=None, # get directory that the output file will exist if is_directory: parent_dir = output_path - valid = '*' - lead = '*' - if time_info: - if time_info['valid'] != '*': - valid = time_info['valid'].strftime('%Y%m%d_%H%M%S') - if time_info['lead'] != '*': - lead = seconds_to_met_time(time_info['lead_seconds'], - force_hms=True) - + valid, lead = self._get_valid_and_lead_from_time_info(time_info) prefix = self.get_output_prefix(time_info, set_env_vars=False) prefix = f'{self.app_name}_{prefix}' if prefix else self.app_name search_string = f'{prefix}_{lead}L_{valid}V*' @@ -1081,6 +1055,19 @@ def find_and_check_output_file(self, time_info=None, 'to process') return False + @staticmethod + def _get_valid_and_lead_from_time_info(time_info): + valid = '*' + lead = '*' + if not time_info: + return valid, lead + + if time_info['valid'] != '*': + valid = time_info['valid'].strftime('%Y%m%d_%H%M%S') + if time_info['lead'] != '*': + lead = seconds_to_met_time(time_info['lead_seconds'], force_hms=True) + return valid, lead + def check_for_externals(self): self.check_for_gempak()