diff --git a/.github/jobs/get_use_case_commands.py b/.github/jobs/get_use_case_commands.py index 04920faa9c..b4de2094b0 100755 --- a/.github/jobs/get_use_case_commands.py +++ b/.github/jobs/get_use_case_commands.py @@ -14,7 +14,7 @@ sys.path.insert(0, METPLUS_TOP_DIR) from internal.tests.use_cases.metplus_use_case_suite import METplusUseCaseSuite -from metplus.util.met_util import expand_int_string_to_list +from metplus.util.string_manip import expand_int_string_to_list from docker_utils import VERSION_EXT diff --git a/docs/Contributors_Guide/deprecation.rst b/docs/Contributors_Guide/deprecation.rst index 491baef9e6..6c6d63e2f2 100644 --- a/docs/Contributors_Guide/deprecation.rst +++ b/docs/Contributors_Guide/deprecation.rst @@ -26,7 +26,7 @@ wrong variable and it is using WGRIB2 = wgrib2. check_for_deprecated_config() ----------------------------- -In **met_util.py** there is a function called +In **metplus/util/config_metplus.py** there is a function called check_for_deprecated_config. It contains a dictionary of dictionaries called deprecated_dict that specifies the old config name, the section it was found in, and a suggested alternative (None if no alternative diff --git a/docs/Users_Guide/getting_started.rst b/docs/Users_Guide/getting_started.rst index 3aada6c852..7941a97f68 100644 --- a/docs/Users_Guide/getting_started.rst +++ b/docs/Users_Guide/getting_started.rst @@ -327,7 +327,7 @@ user configuration file and The last line of the screen output should match this format:: - 05/04 09:42:52.277 metplus (met_util.py:212) INFO: METplus has successfully finished running. + 05/04 09:42:52.277 metplus INFO: METplus has successfully finished running. If this log message is not shown, there is likely an issue with one or more of the default configuration variable overrides in the @@ -339,7 +339,7 @@ how the :ref:`common_config_variables` control a use case run. If the run was successful, the line above the success message should contain the path to the METplus log file that was generated:: - 05/04 09:44:21.534 metplus (met_util.py:211) INFO: Check the log file for more information: /path/to/output/logs/metplus.log.20210504094421 + 05/04 09:44:21.534 metplus INFO: Check the log file for more information: /path/to/output/logs/metplus.log.20210504094421 * Review the log file and compare it to the Example.conf use case configuration file to see how the settings correspond to the result. diff --git a/docs/Users_Guide/systemconfiguration.rst b/docs/Users_Guide/systemconfiguration.rst index a52b997d3c..17a164b051 100644 --- a/docs/Users_Guide/systemconfiguration.rst +++ b/docs/Users_Guide/systemconfiguration.rst @@ -426,7 +426,7 @@ This defines the format of the ERROR log messages. Setting the value to:: Produces a log file with ERROR lines that match this format:: - 04/29 16:03:34.858 metplus (met_util.py:218) ERROR: METplus has finished running but had 1 error. + 04/29 16:03:34.858 metplus (run_util.py:192) ERROR: METplus has finished running but had 1 error. The format of the timestamp is set by :ref:`LOG_LINE_DATE_FORMAT`. @@ -442,7 +442,7 @@ This defines the format of the DEBUG log messages. Setting the value to:: Produces a log file with DEBUG lines that match this format:: - 04/29 15:54:22.851 metplus (met_util.py:207) DEBUG: METplus took 0:00:00.850983 to run. + 04/29 15:54:22.851 metplus (run_util.py:177) DEBUG: METplus took 0:00:00.850983 to run. The format of the timestamp is set by :ref:`LOG_LINE_DATE_FORMAT`. @@ -2648,9 +2648,9 @@ In most cases, there is a simple one-to-one relationship between a deprecated co Example:: - (met_util.py) ERROR: DEPRECATED CONFIG ITEMS WERE FOUND. PLEASE REMOVE/REPLACE THEM FROM CONFIG FILES - (met_util.py) ERROR: [dir] MODEL_DATA_DIR should be replaced with EXTRACT_TILES_GRID_INPUT_DIR - (met_util.py) ERROR: [config] STAT_LIST should be replaced with SERIES_ANALYSIS_STAT_LIST + ERROR: DEPRECATED CONFIG ITEMS WERE FOUND. PLEASE REMOVE/REPLACE THEM FROM CONFIG FILES + ERROR: [dir] MODEL_DATA_DIR should be replaced with EXTRACT_TILES_GRID_INPUT_DIR + ERROR: [config] STAT_LIST should be replaced with SERIES_ANALYSIS_STAT_LIST These cases can be handled automatically by using the :ref:`validate_config`. @@ -2666,10 +2666,10 @@ Starting in METplus 3.0, users are required to either explicitly set both FCST_* Example:: - (met_util.py) ERROR: If FCST_VAR1_NAME is set, the user must either set OBS_VAR1_NAME or change FCST_VAR1_NAME to BOTH_VAR1_NAME - (met_util.py) ERROR: If FCST_VAR2_NAME is set, the user must either set OBS_VAR2_NAME or change FCST_VAR2_NAME to BOTH_VAR2_NAME - (met_util.py) ERROR: If FCST_VAR1_LEVELS is set, the user must either set OBS_VAR1_LEVELS or change FCST_VAR1_LEVELS to BOTH_VAR1_LEVELS - (met_util.py) ERROR: If FCST_VAR2_LEVELS is set, the user must either set OBS_VAR2_LEVELS or change FCST_VAR2_LEVELS to BOTH_VAR2_LEVELS + ERROR: If FCST_VAR1_NAME is set, the user must either set OBS_VAR1_NAME or change FCST_VAR1_NAME to BOTH_VAR1_NAME + ERROR: If FCST_VAR2_NAME is set, the user must either set OBS_VAR2_NAME or change FCST_VAR2_NAME to BOTH_VAR2_NAME + ERROR: If FCST_VAR1_LEVELS is set, the user must either set OBS_VAR1_LEVELS or change FCST_VAR1_LEVELS to BOTH_VAR1_LEVELS + ERROR: If FCST_VAR2_LEVELS is set, the user must either set OBS_VAR2_LEVELS or change FCST_VAR2_LEVELS to BOTH_VAR2_LEVELS These cases can be handled automatically by using the :ref:`validate_config`, but users should review the suggested changes, as they may want to update differently. @@ -2682,7 +2682,7 @@ Instead of only being able to specify FCST_PCP_COMBINE_INPUT_LEVEL, users can no Example:: - (met_util.py) ERROR: [config] OBS_PCP_COMBINE_INPUT_LEVEL should be replaced with OBS_PCP_COMBINE_INPUT_ACCUMS + ERROR: [config] OBS_PCP_COMBINE_INPUT_LEVEL should be replaced with OBS_PCP_COMBINE_INPUT_ACCUMS These cases can be handled automatically by using the :ref:`validate_config`, but users should review the suggested changes, as they may want to include other available input accumulations. @@ -2719,17 +2719,17 @@ Due to these changes, MET configuration files that refer to any of these depreca Example log output:: - (met_util.py) DEBUG: Checking for deprecated environment variables in: DeprecatedConfig - (met_util.py) ERROR: Please remove deprecated environment variable ${GRID_VX} found in MET config file: DeprecatedConfig - (met_util.py) ERROR: MET to_grid variable should reference ${REGRID_TO_GRID} environment variable - (met_util.py) INFO: Be sure to set GRID_STAT_REGRID_TO_GRID to the correct value. + DEBUG: Checking for deprecated environment variables in: DeprecatedConfig + ERROR: Please remove deprecated environment variable ${GRID_VX} found in MET config file: DeprecatedConfig + ERROR: MET to_grid variable should reference ${REGRID_TO_GRID} environment variable + INFO: Be sure to set GRID_STAT_REGRID_TO_GRID to the correct value. - (met_util.py) ERROR: Please remove deprecated environment variable ${MET_VALID_HHMM} found in MET config file: DeprecatedConfig - (met_util.py) ERROR: Set GRID_STAT_CLIMO_MEAN_INPUT_[DIR/TEMPLATE] in a METplus config file to set CLIMO_MEAN_FILE in a MET config + ERROR: Please remove deprecated environment variable ${MET_VALID_HHMM} found in MET config file: DeprecatedConfig + ERROR: Set GRID_STAT_CLIMO_MEAN_INPUT_[DIR/TEMPLATE] in a METplus config file to set CLIMO_MEAN_FILE in a MET config - (met_util.py) ERROR: output_prefix variable should reference ${OUTPUT_PREFIX} environment variable - (met_util.py) INFO: GRID_STAT_OUTPUT_PREFIX will need to be added to the METplus config file that sets GRID_STAT_CONFIG_FILE. Set it to: - (met_util.py) INFO: GRID_STAT_OUTPUT_PREFIX = {CURRENT_FCST_NAME}_vs_{CURRENT_OBS_NAME} + ERROR: output_prefix variable should reference ${OUTPUT_PREFIX} environment variable + INFO: GRID_STAT_OUTPUT_PREFIX will need to be added to the METplus config file that sets GRID_STAT_CONFIG_FILE. Set it to: + INFO: GRID_STAT_OUTPUT_PREFIX = {CURRENT_FCST_NAME}_vs_{CURRENT_OBS_NAME} These cases can be handled automatically by using the :ref:`validate_config`, but users should review the suggested changes and make sure they add the appropriate recommended METplus configuration variables to their files to achieve the same behavior. diff --git a/docs/use_cases/met_tool_wrapper/Example/Example.py b/docs/use_cases/met_tool_wrapper/Example/Example.py index b19ebcf781..3c4a11b43a 100644 --- a/docs/use_cases/met_tool_wrapper/Example/Example.py +++ b/docs/use_cases/met_tool_wrapper/Example/Example.py @@ -174,30 +174,30 @@ # # You should also see a series of log output listing init/valid times, forecast lead times, and filenames derived from the filename templates. Here is an excerpt:: # -# 12/30 19:44:02.901 metplus (met_util.py:425) INFO: **************************************** -# 12/30 19:44:02.901 metplus (met_util.py:426) INFO: * Running METplus -# 12/30 19:44:02.902 metplus (met_util.py:432) INFO: * at valid time: 201702010000 -# 12/30 19:44:02.902 metplus (met_util.py:435) INFO: **************************************** -# 12/30 19:44:02.902 metplus.Example (example_wrapper.py:58) INFO: Running ExampleWrapper at valid time 20170201000000 -# 12/30 19:44:02.902 metplus.Example (example_wrapper.py:63) INFO: Input directory is /dir/containing/example/data -# 12/30 19:44:02.902 metplus.Example (example_wrapper.py:64) INFO: Input template is {init?fmt=%Y%m%d}/file_{init?fmt=%Y%m%d}_{init?fmt=%2H}_F{lead?fmt=%3H}.ext -# 12/30 19:44:02.902 metplus.Example (example_wrapper.py:79) INFO: Processing forecast lead 3 hours initialized at 2017-01-31 21Z and valid at 2017-02-01 00Z -# 12/30 19:44:02.903 metplus.Example (example_wrapper.py:88) INFO: Looking in input directory for file: 20170131/file_20170131_21_F003.ext -# 12/30 19:44:02.903 metplus.Example (example_wrapper.py:79) INFO: Processing forecast lead 6 hours initialized at 2017-01-31 18Z and valid at 2017-02-01 00Z -# 12/30 19:44:02.903 metplus.Example (example_wrapper.py:88) INFO: Looking in input directory for file: 20170131/file_20170131_18_F006.ext -# 12/30 19:44:02.904 metplus.Example (example_wrapper.py:79) INFO: Processing forecast lead 9 hours initialized at 2017-01-31 15Z and valid at 2017-02-01 00Z -# 12/30 19:44:02.904 metplus.Example (example_wrapper.py:88) INFO: Looking in input directory for file: 20170131/file_20170131_15_F009.ext -# 12/30 19:44:02.904 metplus.Example (example_wrapper.py:79) INFO: Processing forecast lead 12 hours initialized at 2017-01-31 12Z and valid at 2017-02-01 00Z -# 12/30 19:44:02.904 metplus.Example (example_wrapper.py:88) INFO: Looking in input directory for file: 20170131/file_20170131_12_F012.ext -# 12/30 19:44:02.904 metplus (met_util.py:425) INFO: **************************************** -# 12/30 19:44:02.904 metplus (met_util.py:426) INFO: * Running METplus -# 12/30 19:44:02.905 metplus (met_util.py:432) INFO: * at valid time: 201702010600 -# 12/30 19:44:02.905 metplus (met_util.py:435) INFO: **************************************** -# 12/30 19:44:02.905 metplus.Example (example_wrapper.py:58) INFO: Running ExampleWrapper at valid time 20170201060000 -# 12/30 19:44:02.905 metplus.Example (example_wrapper.py:63) INFO: Input directory is /dir/containing/example/data -# 12/30 19:44:02.905 metplus.Example (example_wrapper.py:64) INFO: Input template is {init?fmt=%Y%m%d}/file_{init?fmt=%Y%m%d}_{init?fmt=%2H}_F{lead?fmt=%3H}.ext -# 12/30 19:44:02.905 metplus.Example (example_wrapper.py:79) INFO: Processing forecast lead 3 hours initialized at 2017-02-01 03Z and valid at 2017-02-01 06Z -# 12/30 19:44:02.906 metplus.Example (example_wrapper.py:88) INFO: Looking in input directory for file: 20170201/file_20170201_03_F003.ext +# 12/30 19:44:02.901 metplus INFO: **************************************** +# 12/30 19:44:02.901 metplus INFO: * Running METplus +# 12/30 19:44:02.902 metplus INFO: * at valid time: 201702010000 +# 12/30 19:44:02.902 metplus INFO: **************************************** +# 12/30 19:44:02.902 metplus INFO: Running ExampleWrapper at valid time 20170201000000 +# 12/30 19:44:02.902 metplus INFO: Input directory is /dir/containing/example/data +# 12/30 19:44:02.902 metplus INFO: Input template is {init?fmt=%Y%m%d}/file_{init?fmt=%Y%m%d}_{init?fmt=%2H}_F{lead?fmt=%3H}.ext +# 12/30 19:44:02.902 metplus INFO: Processing forecast lead 3 hours initialized at 2017-01-31 21Z and valid at 2017-02-01 00Z +# 12/30 19:44:02.903 metplus INFO: Looking in input directory for file: 20170131/file_20170131_21_F003.ext +# 12/30 19:44:02.903 metplus INFO: Processing forecast lead 6 hours initialized at 2017-01-31 18Z and valid at 2017-02-01 00Z +# 12/30 19:44:02.903 metplus INFO: Looking in input directory for file: 20170131/file_20170131_18_F006.ext +# 12/30 19:44:02.904 metplus INFO: Processing forecast lead 9 hours initialized at 2017-01-31 15Z and valid at 2017-02-01 00Z +# 12/30 19:44:02.904 metplus INFO: Looking in input directory for file: 20170131/file_20170131_15_F009.ext +# 12/30 19:44:02.904 metplus INFO: Processing forecast lead 12 hours initialized at 2017-01-31 12Z and valid at 2017-02-01 00Z +# 12/30 19:44:02.904 metplus INFO: Looking in input directory for file: 20170131/file_20170131_12_F012.ext +# 12/30 19:44:02.904 metplus INFO: **************************************** +# 12/30 19:44:02.904 metplus INFO: * Running METplus +# 12/30 19:44:02.905 metplus INFO: * at valid time: 201702010600 +# 12/30 19:44:02.905 metplus INFO: **************************************** +# 12/30 19:44:02.905 metplus INFO: Running ExampleWrapper at valid time 20170201060000 +# 12/30 19:44:02.905 metplus INFO: Input directory is /dir/containing/example/data +# 12/30 19:44:02.905 metplus INFO: Input template is {init?fmt=%Y%m%d}/file_{init?fmt=%Y%m%d}_{init?fmt=%2H}_F{lead?fmt=%3H}.ext +# 12/30 19:44:02.905 metplus INFO: Processing forecast lead 3 hours initialized at 2017-02-01 03Z and valid at 2017-02-01 06Z +# 12/30 19:44:02.906 metplus INFO: Looking in input directory for file: 20170201/file_20170201_03_F003.ext # ############################################################################## diff --git a/internal/tests/pytests/util/config/test_config.py b/internal/tests/pytests/util/config/test_config.py index 0465bc62be..5edd7670c5 100644 --- a/internal/tests/pytests/util/config/test_config.py +++ b/internal/tests/pytests/util/config/test_config.py @@ -6,8 +6,7 @@ from configparser import NoOptionError from shutil import which, rmtree -from metplus.util import met_util as util - +from metplus.util.constants import MISSING_DATA_VALUE @pytest.mark.parametrize( 'input_value, result', [ @@ -178,14 +177,14 @@ def test_getexe(metplus_config, input_value, result): 'input_value, default, result', [ ('1.1', None, 1.1), ('1.1', 2.2, 1.1), - (None, None, util.MISSING_DATA_VALUE), + (None, None, MISSING_DATA_VALUE), (None, 1.1, 1.1), ('integer', None, None), ('integer', 1.1, None), ('0', None, 0.0), ('0', 2.2, 0.0), - ('', None, util.MISSING_DATA_VALUE), - ('', 2.2, util.MISSING_DATA_VALUE), + ('', None, MISSING_DATA_VALUE), + ('', 2.2, MISSING_DATA_VALUE), ] ) def test_getfloat(metplus_config, input_value, default, result): @@ -205,7 +204,7 @@ def test_getfloat(metplus_config, input_value, default, result): 'input_value, default, result', [ ('1', None, 1), ('1', 2, 1), - (None, None, util.MISSING_DATA_VALUE), + (None, None, MISSING_DATA_VALUE), (None, 1, 1), ('integer', None, None), ('integer', 1, None), @@ -214,8 +213,8 @@ def test_getfloat(metplus_config, input_value, default, result): ('1.7', 2, None), ('1.0', None, None), ('1.0', 2, None), - ('', None, util.MISSING_DATA_VALUE), - ('', 2.2, util.MISSING_DATA_VALUE), + ('', None, MISSING_DATA_VALUE), + ('', 2.2, MISSING_DATA_VALUE), ] ) @pytest.mark.util diff --git a/internal/tests/pytests/util/config_metplus/test_config_metplus.py b/internal/tests/pytests/util/config_metplus/test_config_metplus.py index f7161d6c8b..8974d69b9a 100644 --- a/internal/tests/pytests/util/config_metplus/test_config_metplus.py +++ b/internal/tests/pytests/util/config_metplus/test_config_metplus.py @@ -7,7 +7,7 @@ from datetime import datetime from metplus.util import config_metplus - +from metplus.util.time_util import ti_calculate @pytest.mark.util def test_get_default_config_list(): @@ -150,9 +150,9 @@ def test_find_var_indices_fcst(metplus_config, data_types = ['FCST'] config.set('config', config_var_name, "NAME1") met_tool = 'grid_stat' if set_met_tool else None - var_name_indices = config_metplus.find_var_name_indices(config, - data_types=data_types, - met_tool=met_tool) + var_name_indices = config_metplus._find_var_name_indices(config, + data_types=data_types, + met_tool=met_tool) assert len(var_name_indices) == len(expected_indices) for actual_index in var_name_indices: @@ -648,7 +648,7 @@ def test_find_var_indices_wrapper_specific(metplus_config, met_tool, indices): conf.set('config', f'{data_type}_VAR1_NAME', "NAME1") conf.set('config', f'{data_type}_GRID_STAT_VAR2_NAME', "GSNAME2") - var_name_indices = config_metplus.find_var_name_indices(conf, data_types=[data_type], + var_name_indices = config_metplus._find_var_name_indices(conf,data_types=[data_type], met_tool=met_tool) assert var_name_indices == indices @@ -1073,3 +1073,95 @@ def test_getraw_instance_with_unset_var(metplus_config): ) new_config.set('config', 'CURRENT_FCST_NAME', 'NAME') assert new_config.getraw('config', 'OUTPUT_PREFIX') == 'FCST_NAME' + + +@pytest.mark.parametrize( + 'config_value, expected_result', [ + # 2 items semi-colon at end + ('GRIB_lvl_typ = 234; desc = "HI_CLOUD";', + 'GRIB_lvl_typ = 234; desc = "HI_CLOUD";'), + # 2 items no semi-colon at end + ('GRIB_lvl_typ = 234; desc = "HI_CLOUD"', + 'GRIB_lvl_typ = 234; desc = "HI_CLOUD";'), + # 1 item semi-colon at end + ('GRIB_lvl_typ = 234;', + 'GRIB_lvl_typ = 234;'), + # 1 item no semi-colon at end + ('GRIB_lvl_typ = 234', + 'GRIB_lvl_typ = 234;'), + ] +) +@pytest.mark.util +def test_format_var_items_options_semicolon(config_value, + expected_result): + time_info = {} + + field_configs = {'name': 'FNAME', + 'levels': 'FLEVEL', + 'options': config_value} + + var_items = config_metplus._format_var_items(field_configs, time_info) + result = var_items.get('extra') + assert result == expected_result + + +@pytest.mark.parametrize( + 'input_dict, expected_list', [ + ({'init': datetime(2019, 2, 1, 6), + 'lead': 7200, }, + [ + {'index': '1', + 'fcst_name': 'FNAME_2019', + 'fcst_level': 'Z06', + 'obs_name': 'ONAME_2019', + 'obs_level': 'L06', + }, + {'index': '1', + 'fcst_name': 'FNAME_2019', + 'fcst_level': 'Z08', + 'obs_name': 'ONAME_2019', + 'obs_level': 'L08', + }, + ]), + ({'init': datetime(2021, 4, 13, 9), + 'lead': 10800, }, + [ + {'index': '1', + 'fcst_name': 'FNAME_2021', + 'fcst_level': 'Z09', + 'obs_name': 'ONAME_2021', + 'obs_level': 'L09', + }, + {'index': '1', + 'fcst_name': 'FNAME_2021', + 'fcst_level': 'Z12', + 'obs_name': 'ONAME_2021', + 'obs_level': 'L12', + }, + ]), + ] +) +@pytest.mark.util +def test_sub_var_list(metplus_config, input_dict, expected_list): + config = metplus_config + config.set('config', 'FCST_VAR1_NAME', 'FNAME_{init?fmt=%Y}') + config.set('config', 'FCST_VAR1_LEVELS', 'Z{init?fmt=%H}, Z{valid?fmt=%H}') + config.set('config', 'OBS_VAR1_NAME', 'ONAME_{init?fmt=%Y}') + config.set('config', 'OBS_VAR1_LEVELS', 'L{init?fmt=%H}, L{valid?fmt=%H}') + + time_info = ti_calculate(input_dict) + + actual_temp = config_metplus.parse_var_list(config) + + pp = pprint.PrettyPrinter() + print(f'Actual var list (before sub):') + pp.pprint(actual_temp) + + actual_list = config_metplus.sub_var_list(actual_temp, time_info) + print(f'Actual var list (after sub):') + pp.pprint(actual_list) + + assert len(actual_list) == len(expected_list) + for actual, expected in zip(actual_list, expected_list): + for key, value in expected.items(): + assert actual.get(key) == value diff --git a/internal/tests/pytests/util/met_util/test_met_util.py b/internal/tests/pytests/util/met_util/test_met_util.py deleted file mode 100644 index 784d4b0345..0000000000 --- a/internal/tests/pytests/util/met_util/test_met_util.py +++ /dev/null @@ -1,668 +0,0 @@ -#!/usr/bin/env python3 - -import pytest - -import datetime -import os -from dateutil.relativedelta import relativedelta -import pprint - -from metplus.util import met_util as util -from metplus.util import time_util -from metplus.util.config_metplus import parse_var_list - - -@pytest.mark.parametrize( - 'key, value', [ - ({"gt2.3", "gt5.5"}, True), - ({"ge2.3", "ge5.5"}, True), - ({"eq2.3"}, True), - ({"ne2.3"}, True), - ({"lt2.3", "lt1.1"}, True), - ({"le2.3", "le1.1"}, True), - ({">2.3", ">5.5"}, True), - ({">=2.3", ">=5.5"}, True), - ({"==2.3"}, True), - ({"!=.3"}, True), - ({"<2.3", "<1."}, True), - ({"<=2.3", "<=1.1"}, True), - ({"gta"}, False), - ({"gt"}, False), - ({">=a"}, False), - ({"2.3"}, False), - ({"<=2.3", "2.4", "gt2.7"}, False), - ({"<=2.3||>=4.2", "gt2.3&<4.2"}, True), - ({"gt2.3&<4.2a"}, True), - ({"gt2sd.3&<4.2"}, True), - ({"gt2.3&a<4.2"}, True), # invalid but is accepted - ({'gt4&<5&&ne4.5'}, True), - ({"<2.3", "ge5", ">SPF90"}, True), - (["NA"], True), - (["SFP70", ">SFP80", ">SFP90", ">SFP95"], True), - ([">SFP70", ">SFP80", ">SFP90", ">SFP95"], True), - ] -) -@pytest.mark.util -def test_threshold(key, value): - assert util.validate_thresholds(key) == value - - -# parses a threshold and returns a list of tuples of -# comparison and number, i.e.: -# 'gt4' => [('gt', 4)] -# gt4&<5 => [('gt', 4), ('lt', 5)] -@pytest.mark.parametrize( - 'key, value', [ - ('gt4', [('gt', 4)]), - ('gt4&<5', [('gt', 4), ('lt', 5)]), - ('gt4&<5&&ne4.5', [('gt', 4), ('lt', 5), ('ne', 4.5)]), - (">4.545", [('>', 4.545)]), - (">=4.0", [('>=', 4.0)]), - ("<4.5", [('<', 4.5)]), - ("<=4.5", [('<=', 4.5)]), - ("!=4.5", [('!=', 4.5)]), - ("==4.5", [('==', 4.5)]), - ("gt4.5", [('gt', 4.5)]), - ("ge4.5", [('ge', 4.5)]), - ("lt4.5", [('lt', 4.5)]), - ("le4.5", [('le', 4.5)]), - ("ne10.5", [('ne', 10.5)]), - ("eq4.5", [('eq', 4.5)]), - ("eq-4.5", [('eq', -4.5)]), - ("eq+4.5", [('eq', 4.5)]), - ("eq.5", [('eq', 0.5)]), - ("eq5.", [('eq', 5)]), - ("eq5.||ne0.0", [('eq', 5), ('ne', 0.0)]), - (">SFP90", [('>', 'SFP90')]), - ("SFP90", None), - ("gtSFP90", [('gt', 'SFP90')]), - ("goSFP90", None), - ("NA", [('NA', '')]), - ("2.3", ">5.5"}, True), + ({">=2.3", ">=5.5"}, True), + ({"==2.3"}, True), + ({"!=.3"}, True), + ({"<2.3", "<1."}, True), + ({"<=2.3", "<=1.1"}, True), + ({"gta"}, False), + ({"gt"}, False), + ({">=a"}, False), + ({"2.3"}, False), + ({"<=2.3", "2.4", "gt2.7"}, False), + ({"<=2.3||>=4.2", "gt2.3&<4.2"}, True), + ({"gt2.3&<4.2a"}, True), + ({"gt2sd.3&<4.2"}, True), + ({"gt2.3&a<4.2"}, True), # invalid but is accepted + ({'gt4&<5&&ne4.5'}, True), + ({"<2.3", "ge5", ">SPF90"}, True), + (["NA"], True), + (["SFP70", ">SFP80", ">SFP90", ">SFP95"], True), + ([">SFP70", ">SFP80", ">SFP90", ">SFP95"], True), + ] +) +@pytest.mark.util +def test_threshold(key, value): + assert validate_thresholds(key) == value + + +# parses a threshold and returns a list of tuples of +# comparison and number, i.e.: +# 'gt4' => [('gt', 4)] +# gt4&<5 => [('gt', 4), ('lt', 5)] +@pytest.mark.parametrize( + 'key, value', [ + ('gt4', [('gt', 4)]), + ('gt4&<5', [('gt', 4), ('lt', 5)]), + ('gt4&<5&&ne4.5', [('gt', 4), ('lt', 5), ('ne', 4.5)]), + (">4.545", [('>', 4.545)]), + (">=4.0", [('>=', 4.0)]), + ("<4.5", [('<', 4.5)]), + ("<=4.5", [('<=', 4.5)]), + ("!=4.5", [('!=', 4.5)]), + ("==4.5", [('==', 4.5)]), + ("gt4.5", [('gt', 4.5)]), + ("ge4.5", [('ge', 4.5)]), + ("lt4.5", [('lt', 4.5)]), + ("le4.5", [('le', 4.5)]), + ("ne10.5", [('ne', 10.5)]), + ("eq4.5", [('eq', 4.5)]), + ("eq-4.5", [('eq', -4.5)]), + ("eq+4.5", [('eq', 4.5)]), + ("eq.5", [('eq', 0.5)]), + ("eq5.", [('eq', 5)]), + ("eq5.||ne0.0", [('eq', 5), ('ne', 0.0)]), + (">SFP90", [('>', 'SFP90')]), + ("SFP90", None), + ("gtSFP90", [('gt', 'SFP90')]), + ("goSFP90", None), + ("NA", [('NA', '')]), + (" items for data type and/or met tool indices = [] if met_tool: - indices = find_var_name_indices(config, data_types, met_tool).keys() + indices = _find_var_name_indices(config, data_types, met_tool).keys() if not indices: - indices = find_var_name_indices(config, data_types).keys() + indices = _find_var_name_indices(config, data_types).keys() # get config name prefixes for each data type to find dt_search_prefixes = {} @@ -1606,7 +1610,7 @@ def parse_var_list(config, time_info=None, data_type=None, met_tool=None, index, search_prefixes) - field_info = format_var_items(field_configs, time_info) + field_info = _format_var_items(field_configs, time_info) if not isinstance(field_info, dict): config.logger.error(f'Could not process {current_type}_' f'VAR{index} variables: {field_info}') @@ -1707,7 +1711,7 @@ def parse_var_list(config, time_info=None, data_type=None, met_tool=None, ''' return sorted(var_list, key=lambda x: x['index']) -def find_var_name_indices(config, data_types, met_tool=None): +def _find_var_name_indices(config, data_types, met_tool=None): data_type_regex = f"{'|'.join(data_types)}" # if data_types includes FCST or OBS, also search for BOTH @@ -1728,6 +1732,94 @@ def find_var_name_indices(config, data_types, met_tool=None): index_index=2, id_index=1) + +def _format_var_items(field_configs, time_info=None): + """! Substitute time information into field information and format values. + + @param field_configs dictionary with config variable names to read + @param time_info dictionary containing time info for current run + @returns dictionary containing name, levels, and output_names, as + well as thresholds and extra options if found. If not enough + information was set in the METplusConfig object, an empty + dictionary is returned. + """ + # dictionary to hold field (var) item info + var_items = {} + + # set defaults for optional items + var_items['levels'] = [] + var_items['thresh'] = [] + var_items['extra'] = '' + var_items['output_names'] = [] + + # get name, return error string if not found + search_name = field_configs.get('name') + if not search_name: + return 'Name not found' + + # perform string substitution on name + if time_info: + search_name = do_string_sub(search_name, + skip_missing_tags=True, + **time_info) + var_items['name'] = search_name + + # get levels, performing string substitution on each item of list + for level in getlist(field_configs.get('levels')): + if time_info: + level = do_string_sub(level, + **time_info) + var_items['levels'].append(level) + + # if no levels are found, add an empty string + if not var_items['levels']: + var_items['levels'].append('') + + # get threshold list if it is set + # return error string if any thresholds not formatted properly + search_thresh = field_configs.get('thresh') + if search_thresh: + thresh = getlist(search_thresh) + if not validate_thresholds(thresh): + return 'Invalid threshold supplied' + + var_items['thresh'] = thresh + + # get extra options if it is set, format with semi-colons between items + search_extra = field_configs.get('options') + if search_extra: + if time_info: + search_extra = do_string_sub(search_extra, + **time_info) + + # strip off empty space around each value + extra_list = [item.strip() for item in search_extra.split(';')] + + # split up each item by semicolon, then add a semicolon to the end + # use list(filter(None to remove empty strings from list + extra_list = list(filter(None, extra_list)) + var_items['extra'] = f"{'; '.join(extra_list)};" + + # get output names if they are set + out_name_str = field_configs.get('output_names') + + # use input name for each level if not set + if not out_name_str: + for _ in var_items['levels']: + var_items['output_names'].append(var_items['name']) + else: + for out_name in getlist(out_name_str): + if time_info: + out_name = do_string_sub(out_name, + **time_info) + var_items['output_names'].append(out_name) + + if len(var_items['levels']) != len(var_items['output_names']): + return 'Number of levels does not match number of output names' + + return var_items + + def skip_field_info_validation(config): """!Check config to see if having corresponding FCST/OBS variables is necessary. If process list only contains reformatter wrappers, don't validate field info. Also, if MTD is in the process list and @@ -1934,3 +2026,170 @@ def get_field_config_variables(config, index, search_prefixes): break return field_configs + + +def is_loop_by_init(config): + """!Check config variables to determine if looping by valid or init time""" + if config.has_option('config', 'LOOP_BY'): + loop_by = config.getstr('config', 'LOOP_BY').lower() + if loop_by in ['init', 'retro']: + return True + elif loop_by in ['valid', 'realtime']: + return False + + if config.has_option('config', 'LOOP_BY_INIT'): + return config.getbool('config', 'LOOP_BY_INIT') + + msg = 'MUST SET LOOP_BY to VALID, INIT, RETRO, or REALTIME' + if config.logger is None: + print(msg) + else: + config.logger.error(msg) + + return None + + +def handle_tmp_dir(config): + """! if env var MET_TMP_DIR is set, override config TMP_DIR with value + if it differs from what is set + get config temp dir using getdir_nocheck to bypass check for /path/to + this is done so the user can set env MET_TMP_DIR instead of config TMP_DIR + and config TMP_DIR will be set automatically""" + handle_env_var_config(config, 'MET_TMP_DIR', 'TMP_DIR') + + # create temp dir if it doesn't exist already + # this will fail if TMP_DIR is not set correctly and + # env MET_TMP_DIR was not set + mkdir_p(config.getdir('TMP_DIR')) + + +def handle_env_var_config(config, env_var_name, config_name): + """! If environment variable is set, use that value + for the config variable and warn if the previous config value differs + + @param config METplusConfig object to read + @param env_var_name name of environment variable to read + @param config_name name of METplus config variable to check + """ + env_var_value = os.environ.get(env_var_name, '') + config_value = config.getdir_nocheck(config_name, '') + + # do nothing if environment variable is not set + if not env_var_value: + return + + # override config config variable to environment variable value + config.set('config', config_name, env_var_value) + + # if config config value differed from environment variable value, warn + if config_value == env_var_value: + return + + config.logger.warning(f'Config variable {config_name} ({config_value}) ' + 'will be overridden by the environment variable ' + f'{env_var_name} ({env_var_value})') + + +def write_all_commands(all_commands, config): + """! Write all commands that were run to a file in the log + directory. This includes the environment variables that + were set before each command. + + @param all_commands list of tuples with command run and + list of environment variables that were set + @param config METplusConfig object used to write log output + and get the log timestamp to name the output file + @returns False if no commands were provided, True otherwise + """ + if not all_commands: + config.logger.error("No commands were run. " + "Skip writing all_commands file") + return False + + log_timestamp = config.getstr('config', 'LOG_TIMESTAMP') + filename = os.path.join(config.getdir('LOG_DIR'), + f'.all_commands.{log_timestamp}') + config.logger.debug(f"Writing all commands and environment to {filename}") + with open(filename, 'w') as file_handle: + for command, envs in all_commands: + for env in envs: + file_handle.write(f"{env}\n") + + file_handle.write("COMMAND:\n") + file_handle.write(f"{command}\n\n") + + return True + + +def write_final_conf(config): + """! Write final conf file including default values that were set during + run. Move variables that are specific to the user's run to the [runtime] + section to avoid issues such as overwriting existing log files. + + @param config METplusConfig object to write to file + """ + final_conf = config.getstr('config', 'METPLUS_CONF') + + # remove variables that start with CURRENT + config.remove_current_vars() + + # move runtime variables to [runtime] section + config.move_runtime_configs() + + config.logger.info('Overwrite final conf here: %s' % (final_conf,)) + with open(final_conf, 'wt') as conf_file: + config.write(conf_file) + + +def log_runtime_banner(config, time_input, process): + loop_by = time_input['loop_by'] + run_time = time_input[loop_by].strftime("%Y-%m-%d %H:%M") + + process_name = process.__class__.__name__ + if process.instance: + process_name = f"{process_name}({process.instance})" + + config.logger.info("****************************************") + config.logger.info(f"* Running METplus {process_name}") + config.logger.info(f"* at {loop_by} time: {run_time}") + config.logger.info("****************************************") + + +def sub_var_list(var_list, time_info): + """! Perform string substitution on var list values with time info + + @param var_list list of field info to substitute values into + @param time_info dictionary containing time information + @returns var_list with values substituted + """ + if not var_list: + return [] + + out_var_list = [] + for var_info in var_list: + out_var_info = _sub_var_info(var_info, time_info) + out_var_list.append(out_var_info) + + return out_var_list + + +def _sub_var_info(var_info, time_info): + if not var_info: + return {} + + out_var_info = {} + for key, value in var_info.items(): + if isinstance(value, list): + out_value = [] + for item in value: + out_value.append(do_string_sub(item, + skip_missing_tags=True, + **time_info)) + else: + out_value = do_string_sub(value, + skip_missing_tags=True, + **time_info) + + out_var_info[key] = out_value + + return out_var_info diff --git a/metplus/util/constants.py b/metplus/util/constants.py index e56f9def51..5e6f3dcb99 100644 --- a/metplus/util/constants.py +++ b/metplus/util/constants.py @@ -108,3 +108,8 @@ # datetime year month day hour minute second (YYYYMMDD_HHMMSS) notation YMD_HMS = '%Y%m%d_%H%M%S' + +# missing data value used to check if integer values are not set +# we often check for None if a variable is not set, but 0 and None +# have the same result in a test. 0 may be a valid integer value +MISSING_DATA_VALUE = -9999 diff --git a/metplus/util/met_config.py b/metplus/util/met_config.py index 643e0db134..ddb4ef71ca 100644 --- a/metplus/util/met_config.py +++ b/metplus/util/met_config.py @@ -6,9 +6,8 @@ import os import re -from .constants import PYTHON_EMBEDDING_TYPES, CLIMO_TYPES -from .string_manip import getlist -from .met_util import get_threshold_via_regex, MISSING_DATA_VALUE +from .constants import PYTHON_EMBEDDING_TYPES, CLIMO_TYPES, MISSING_DATA_VALUE +from .string_manip import getlist, get_threshold_via_regex from .string_manip import remove_quotes as util_remove_quotes from .config_metplus import find_indices_in_config_section, parse_var_list from .field_util import format_all_field_info diff --git a/metplus/util/met_util.py b/metplus/util/met_util.py deleted file mode 100644 index d9fb9b6c5c..0000000000 --- a/metplus/util/met_util.py +++ /dev/null @@ -1,1476 +0,0 @@ -import os -import shutil -import sys -from datetime import datetime, timedelta, timezone -import re -import gzip -import bz2 -import zipfile -import struct -import getpass -from dateutil.relativedelta import relativedelta -from pathlib import Path -from importlib import import_module - -from .string_manip import getlist, getlistint -from .string_template_substitution import do_string_sub -from .string_template_substitution import parse_template -from . import time_util as time_util -from .time_looping import time_generator -from .. import get_metplus_version - -"""!@namespace met_util - @brief Provides Utility functions for METplus. -""" - -from .constants import * - -# missing data value used to check if integer values are not set -# we often check for None if a variable is not set, but 0 and None -# have the same result in a test. 0 may be a valid integer value -MISSING_DATA_VALUE = -9999 - -def pre_run_setup(config_inputs): - from . import config_metplus - version_number = get_metplus_version() - print(f'Starting METplus v{version_number}') - - # Read config inputs and return a config instance - config = config_metplus.setup(config_inputs) - - logger = config.logger - - user_info = get_user_info() - user_string = f' as user {user_info} ' if user_info else ' ' - - config.set('config', 'METPLUS_VERSION', version_number) - logger.info('Running METplus v%s%swith command: %s', - version_number, user_string, ' '.join(sys.argv)) - - logger.info(f"Log file: {config.getstr('config', 'LOG_METPLUS')}") - logger.info(f"METplus Base: {config.getdir('METPLUS_BASE')}") - logger.info(f"Final Conf: {config.getstr('config', 'METPLUS_CONF')}") - config_list = config.getstr('config', 'CONFIG_INPUT').split(',') - for config_item in config_list: - logger.info(f"Config Input: {config_item}") - - # validate configuration variables - isOK_A, isOK_B, isOK_C, isOK_D, all_sed_cmds = config_metplus.validate_configuration_variables(config) - if not (isOK_A and isOK_B and isOK_C and isOK_D): - # if any sed commands were generated, write them to the sed file - if all_sed_cmds: - sed_file = os.path.join(config.getdir('OUTPUT_BASE'), 'sed_commands.txt') - # remove if sed file exists - if os.path.exists(sed_file): - os.remove(sed_file) - - write_list_to_file(sed_file, all_sed_cmds) - config.logger.error(f"Find/Replace commands have been generated in {sed_file}") - - logger.error("Correct configuration variables and rerun. Exiting.") - sys.exit(1) - - if not config.getdir('MET_INSTALL_DIR', must_exist=True): - logger.error('MET_INSTALL_DIR must be set correctly to run METplus') - sys.exit(1) - - # set staging dir to OUTPUT_BASE/stage if not set - if not config.has_option('config', 'STAGING_DIR'): - config.set('config', 'STAGING_DIR', - os.path.join(config.getdir('OUTPUT_BASE'), "stage")) - - # handle dir to write temporary files - handle_tmp_dir(config) - - # handle OMP_NUM_THREADS environment variable - handle_env_var_config(config, - env_var_name='OMP_NUM_THREADS', - config_name='OMP_NUM_THREADS') - - config.env = os.environ.copy() - - return config - -def run_metplus(config, process_list): - total_errors = 0 - - try: - processes = [] - for process, instance in process_list: - try: - logname = f"{process}.{instance}" if instance else process - logger = config.log(logname) - package_name = ('metplus.wrappers.' - f'{camel_to_underscore(process)}_wrapper') - module = import_module(package_name) - command_builder = ( - getattr(module, f"{process}Wrapper")(config, - instance=instance) - ) - - # if Usage specified in PROCESS_LIST, print usage and exit - if process == 'Usage': - command_builder.run_all_times() - return 0 - except AttributeError: - logger.error("There was a problem loading " - f"{process} wrapper.") - return 1 - except ModuleNotFoundError: - logger.error(f"Could not load {process} wrapper. " - "Wrapper may have been disabled.") - return 1 - - processes.append(command_builder) - - # check if all processes initialized correctly - allOK = True - for process in processes: - if not process.isOK: - allOK = False - class_name = process.__class__.__name__.replace('Wrapper', '') - logger.error("{} was not initialized properly".format(class_name)) - - # exit if any wrappers did not initialized properly - if not allOK: - logger.info("Refer to ERROR messages above to resolve issues.") - return 1 - - all_commands = [] - for process in processes: - new_commands = process.run_all_times() - if new_commands: - all_commands.extend(new_commands) - - # if process list contains any wrapper that should run commands - if any([item[0] not in NO_COMMAND_WRAPPERS for item in process_list]): - # write out all commands and environment variables to file - if not write_all_commands(all_commands, config): - # report an error if no commands were generated - total_errors += 1 - - # compute total number of errors that occurred and output results - for process in processes: - if process.errors != 0: - process_name = process.__class__.__name__.replace('Wrapper', '') - error_msg = '{} had {} error'.format(process_name, process.errors) - if process.errors > 1: - error_msg += 's' - error_msg += '.' - logger.error(error_msg) - total_errors += process.errors - - return total_errors - except: - logger.exception("Fatal error occurred") - logger.info(f"Check the log file for more information: {config.getstr('config', 'LOG_METPLUS')}") - return 1 - -def post_run_cleanup(config, app_name, total_errors): - logger = config.logger - # scrub staging directory if requested - if (config.getbool('config', 'SCRUB_STAGING_DIR') and - os.path.exists(config.getdir('STAGING_DIR'))): - staging_dir = config.getdir('STAGING_DIR') - logger.info("Scrubbing staging dir: %s", staging_dir) - logger.info('Set SCRUB_STAGING_DIR to False to preserve ' - 'intermediate files.') - shutil.rmtree(staging_dir) - - # save log file path and clock time before writing final conf file - log_message = (f"Check the log file for more information: " - f"{config.getstr('config', 'LOG_METPLUS')}") - - start_clock_time = datetime.strptime(config.getstr('config', 'CLOCK_TIME'), - '%Y%m%d%H%M%S') - - # rewrite final conf so it contains all of the default values used - write_final_conf(config) - - # compute time it took to run - end_clock_time = datetime.now() - total_run_time = end_clock_time - start_clock_time - logger.debug(f"{app_name} took {total_run_time} to run.") - - user_info = get_user_info() - user_string = f' as user {user_info}' if user_info else '' - if not total_errors: - logger.info(log_message) - logger.info('%s has successfully finished running%s.', - app_name, user_string) - return - - error_msg = (f'{app_name} has finished running{user_string} ' - f'but had {total_errors} error') - if total_errors > 1: - error_msg += 's' - error_msg += '.' - logger.error(error_msg) - logger.info(log_message) - sys.exit(1) - -def get_user_info(): - """! Get user information from OS. Note that some OS cannot obtain user ID - and some cannot obtain username. - @returns username(uid) if both username and user ID can be read, - username if only username can be read, uid if only user ID can be read, - or an empty string if neither can be read. - """ - try: - username = getpass.getuser() - except OSError: - username = None - - try: - uid = os.getuid() - except AttributeError: - uid = None - - if username and uid: - return f'{username}({uid})' - - if username: - return username - - if uid: - return uid - - return '' - -def write_all_commands(all_commands, config): - """! Write all commands that were run to a file in the log - directory. This includes the environment variables that - were set before each command. - - @param all_commands list of tuples with command run and - list of environment variables that were set - @param config METplusConfig object used to write log output - and get the log timestamp to name the output file - @returns False if no commands were provided, True otherwise - """ - if not all_commands: - config.logger.error("No commands were run. " - "Skip writing all_commands file") - return False - - log_timestamp = config.getstr('config', 'LOG_TIMESTAMP') - filename = os.path.join(config.getdir('LOG_DIR'), - f'.all_commands.{log_timestamp}') - config.logger.debug(f"Writing all commands and environment to {filename}") - with open(filename, 'w') as file_handle: - for command, envs in all_commands: - for env in envs: - file_handle.write(f"{env}\n") - - file_handle.write("COMMAND:\n") - file_handle.write(f"{command}\n\n") - - return True - -def handle_tmp_dir(config): - """! if env var MET_TMP_DIR is set, override config TMP_DIR with value - if it differs from what is set - get config temp dir using getdir_nocheck to bypass check for /path/to - this is done so the user can set env MET_TMP_DIR instead of config TMP_DIR - and config TMP_DIR will be set automatically""" - handle_env_var_config(config, 'MET_TMP_DIR', 'TMP_DIR') - - # create temp dir if it doesn't exist already - # this will fail if TMP_DIR is not set correctly and - # env MET_TMP_DIR was not set - mkdir_p(config.getdir('TMP_DIR')) - -def handle_env_var_config(config, env_var_name, config_name): - """! If environment variable is set, use that value - for the config variable and warn if the previous config value differs - - @param config METplusConfig object to read - @param env_var_name name of environment variable to read - @param config_name name of METplus config variable to check - """ - env_var_value = os.environ.get(env_var_name, '') - config_value = config.getdir_nocheck(config_name, '') - - # do nothing if environment variable is not set - if not env_var_value: - return - - # override config config variable to environment variable value - config.set('config', config_name, env_var_value) - - # if config config value differed from environment variable value, warn - if config_value != env_var_value: - config.logger.warning(f'Config variable {config_name} ({config_value}) ' - 'will be overridden by the environment variable ' - f'{env_var_name} ({env_var_value})') - -def get_skip_times(config, wrapper_name=None): - """! Read SKIP_TIMES config variable and populate dictionary of times that should be skipped. - SKIP_TIMES should be in the format: "%m:begin_end_incr(3,11,1)", "%d:30,31", "%Y%m%d:20201031" - where each item inside quotes is a datetime format, colon, then a list of times in that format - to skip. - Args: - @param config configuration object to pull SKIP_TIMES - @param wrapper_name name of wrapper if supporting - skipping times only for certain wrappers, i.e. grid_stat - @returns dictionary containing times to skip - """ - skip_times_dict = {} - skip_times_string = None - - # if wrapper name is set, look for wrapper-specific _SKIP_TIMES variable - if wrapper_name: - skip_times_string = config.getstr('config', - f'{wrapper_name.upper()}_SKIP_TIMES', '') - - # if skip times string has not been found, check for generic SKIP_TIMES - if not skip_times_string: - skip_times_string = config.getstr('config', 'SKIP_TIMES', '') - - # if no generic SKIP_TIMES, return empty dictionary - if not skip_times_string: - return {} - - # get list of skip items, but don't expand begin_end_incr yet - skip_list = getlist(skip_times_string, expand_begin_end_incr=False) - - for skip_item in skip_list: - try: - time_format, skip_times = skip_item.split(':') - - # get list of skip times for the time format, expanding begin_end_incr - skip_times_list = getlist(skip_times) - - # if time format is already in skip times dictionary, extend list - if time_format in skip_times_dict: - skip_times_dict[time_format].extend(skip_times_list) - else: - skip_times_dict[time_format] = skip_times_list - - except ValueError: - config.logger.error(f"SKIP_TIMES item does not match format: {skip_item}") - return None - - return skip_times_dict - -def skip_time(time_info, skip_times): - """!Used to check the valid time of the current run time against list of times to skip. - Args: - @param time_info dictionary with time information to check - @param skip_times dictionary of times to skip, i.e. {'%d': [31]} means skip 31st day - @returns True if run time should be skipped, False if not - """ - if not skip_times: - return False - - for time_format, skip_time_list in skip_times.items(): - # extract time information from valid time based on skip time format - run_time_value = time_info.get('valid') - if not run_time_value: - return False - - run_time_value = run_time_value.strftime(time_format) - - # loop over times to skip for this format and check if it matches - for skip_time in skip_time_list: - if int(run_time_value) == int(skip_time): - return True - - # if skip time never matches, return False - return False - -def write_final_conf(config): - """! Write final conf file including default values that were set during - run. Move variables that are specific to the user's run to the [runtime] - section to avoid issues such as overwriting existing log files. - - @param config METplusConfig object to write to file - """ - final_conf = config.getstr('config', 'METPLUS_CONF') - - # remove variables that start with CURRENT - config.remove_current_vars() - - # move runtime variables to [runtime] section - config.move_runtime_configs() - - config.logger.info('Overwrite final conf here: %s' % (final_conf,)) - with open(final_conf, 'wt') as conf_file: - config.write(conf_file) - -def is_loop_by_init(config): - """!Check config variables to determine if looping by valid or init time""" - if config.has_option('config', 'LOOP_BY'): - loop_by = config.getstr('config', 'LOOP_BY').lower() - if loop_by in ['init', 'retro']: - return True - elif loop_by in ['valid', 'realtime']: - return False - - if config.has_option('config', 'LOOP_BY_INIT'): - return config.getbool('config', 'LOOP_BY_INIT') - - msg = 'MUST SET LOOP_BY to VALID, INIT, RETRO, or REALTIME' - if config.logger is None: - print(msg) - else: - config.logger.error(msg) - - return None - -def loop_over_times_and_call(config, processes, custom=None): - """! Loop over all run times and call wrappers listed in config - - @param config METplusConfig object - @param processes list of CommandBuilder subclass objects (Wrappers) to call - @param custom (optional) custom loop string value - @returns list of tuples with all commands run and the environment variables - that were set for each - """ - # keep track of commands that were run - all_commands = [] - for time_input in time_generator(config): - if not isinstance(processes, list): - processes = [processes] - - for process in processes: - # if time could not be read, increment errors for each process - if time_input is None: - process.errors += 1 - continue - - log_runtime_banner(config, time_input, process) - add_to_time_input(time_input, - instance=process.instance, - custom=custom) - - process.clear() - process.run_at_time(time_input) - if process.all_commands: - all_commands.extend(process.all_commands) - process.all_commands.clear() - - return all_commands - -def log_runtime_banner(config, time_input, process): - loop_by = time_input['loop_by'] - run_time = time_input[loop_by].strftime("%Y-%m-%d %H:%M") - - process_name = process.__class__.__name__ - if process.instance: - process_name = f"{process_name}({process.instance})" - - config.logger.info("****************************************") - config.logger.info(f"* Running METplus {process_name}") - config.logger.info(f"* at {loop_by} time: {run_time}") - config.logger.info("****************************************") - -def add_to_time_input(time_input, clock_time=None, instance=None, custom=None): - if clock_time: - clock_dt = datetime.strptime(clock_time, '%Y%m%d%H%M%S') - time_input['now'] = clock_dt - - # if instance is set, use that value, otherwise use empty string - time_input['instance'] = instance if instance else '' - - # if custom is specified, set it - # otherwise leave it unset so it can be set within the wrapper - if custom: - time_input['custom'] = custom - -def get_lead_sequence(config, input_dict=None, wildcard_if_empty=False): - """!Get forecast lead list from LEAD_SEQ or compute it from INIT_SEQ. - Restrict list by LEAD_SEQ_[MIN/MAX] if set. Now returns list of relativedelta objects - Args: - @param config METplusConfig object to query config variable values - @param input_dict time dictionary needed to handle using INIT_SEQ. Must contain - valid key if processing INIT_SEQ - @param wildcard_if_empty if no lead sequence was set, return a - list with '*' if this is True, otherwise return a list with 0 - @returns list of relativedelta objects or a list containing 0 if none are found - """ - - out_leads = [] - lead_min, lead_max, no_max = get_lead_min_max(config) - - # check if LEAD_SEQ, INIT_SEQ, or LEAD_SEQ_ are set - # if more than one is set, report an error and exit - lead_seq = getlist(config.getstr('config', 'LEAD_SEQ', '')) - init_seq = getlistint(config.getstr('config', 'INIT_SEQ', '')) - lead_groups = get_lead_sequence_groups(config) - - if not are_lead_configs_ok(lead_seq, - init_seq, - lead_groups, - config, - input_dict, - no_max): - return None - - if lead_seq: - # return lead sequence if wildcard characters are used - if lead_seq == ['*']: - return lead_seq - - out_leads = handle_lead_seq(config, - lead_seq, - lead_min, - lead_max) - - # use INIT_SEQ to build lead list based on the valid time - elif init_seq: - out_leads = handle_init_seq(init_seq, - input_dict, - lead_min, - lead_max) - elif lead_groups: - out_leads = handle_lead_groups(lead_groups) - - if not out_leads: - if wildcard_if_empty: - return ['*'] - - return [0] - - return out_leads - -def are_lead_configs_ok(lead_seq, init_seq, lead_groups, - config, input_dict, no_max): - if lead_groups is None: - return False - - error_message = ('are both listed in the configuration. ' - 'Only one may be used at a time.') - if lead_seq: - if init_seq: - config.logger.error(f'LEAD_SEQ and INIT_SEQ {error_message}') - return False - - if lead_groups: - config.logger.error(f'LEAD_SEQ and LEAD_SEQ_ {error_message}') - return False - - if init_seq and lead_groups: - config.logger.error(f'INIT_SEQ and LEAD_SEQ_ {error_message}') - return False - - if init_seq: - # if input dictionary not passed in, - # cannot compute lead sequence from it, so exit - if input_dict is None: - config.logger.error('Cannot run using INIT_SEQ for this wrapper') - return False - - # if looping by init, fail and exit - if 'valid' not in input_dict.keys(): - log_msg = ('INIT_SEQ specified while looping by init time.' - ' Use LEAD_SEQ or change to loop by valid time') - config.logger.error(log_msg) - return False - - # maximum lead must be specified to run with INIT_SEQ - if no_max: - config.logger.error('LEAD_SEQ_MAX must be set to use INIT_SEQ') - return False - - return True - -def get_lead_min_max(config): - # remove any items that are outside of the range specified - # by LEAD_SEQ_MIN and LEAD_SEQ_MAX - # convert min and max to relativedelta objects, then use current time - # to compare them to each forecast lead - # this is an approximation because relative time offsets depend on - # each runtime - huge_max = '4000Y' - lead_min_str = config.getstr_nocheck('config', 'LEAD_SEQ_MIN', '0') - lead_max_str = config.getstr_nocheck('config', 'LEAD_SEQ_MAX', huge_max) - no_max = lead_max_str == huge_max - lead_min = time_util.get_relativedelta(lead_min_str, 'H') - lead_max = time_util.get_relativedelta(lead_max_str, 'H') - return lead_min, lead_max, no_max - -def handle_lead_seq(config, lead_strings, lead_min=None, lead_max=None): - out_leads = [] - leads = [] - for lead in lead_strings: - relative_delta = time_util.get_relativedelta(lead, 'H') - if relative_delta is not None: - leads.append(relative_delta) - else: - config.logger.error(f'Invalid item {lead} in LEAD_SEQ. Exiting.') - return None - - if lead_min is None and lead_max is None: - return leads - - # add current time to leads to approximate month and year length - now_time = datetime.now() - lead_min_approx = now_time + lead_min - lead_max_approx = now_time + lead_max - for lead in leads: - lead_approx = now_time + lead - if lead_approx >= lead_min_approx and lead_approx <= lead_max_approx: - out_leads.append(lead) - - return out_leads - -def handle_init_seq(init_seq, input_dict, lead_min, lead_max): - out_leads = [] - lead_min_hours = time_util.ti_get_hours_from_relativedelta(lead_min) - lead_max_hours = time_util.ti_get_hours_from_relativedelta(lead_max) - - valid_hr = int(input_dict['valid'].strftime('%H')) - for init in init_seq: - if valid_hr >= init: - current_lead = valid_hr - init - else: - current_lead = valid_hr + (24 - init) - - while current_lead <= lead_max_hours: - if current_lead >= lead_min_hours: - out_leads.append(relativedelta(hours=current_lead)) - current_lead += 24 - - out_leads = sorted(out_leads, key=lambda - rd: time_util.ti_get_seconds_from_relativedelta(rd, - input_dict['valid'])) - return out_leads - -def handle_lead_groups(lead_groups): - """! Read groups of forecast leads and create a list with all unique items - - @param lead_group dictionary where the values are lists of forecast - leads stored as relativedelta objects - @returns list of forecast leads stored as relativedelta objects - """ - out_leads = [] - for _, lead_seq in lead_groups.items(): - for lead in lead_seq: - if lead not in out_leads: - out_leads.append(lead) - - return out_leads - -def get_lead_sequence_groups(config): - # output will be a dictionary where the key will be the - # label specified and the value will be the list of forecast leads - lead_seq_dict = {} - # used in plotting - all_conf = config.keys('config') - indices = [] - regex = re.compile(r"LEAD_SEQ_(\d+)") - for conf in all_conf: - result = regex.match(conf) - if result is not None: - indices.append(result.group(1)) - - # loop over all possible variables and add them to list - for index in indices: - if config.has_option('config', f"LEAD_SEQ_{index}_LABEL"): - label = config.getstr('config', f"LEAD_SEQ_{index}_LABEL") - else: - log_msg = (f'Need to set LEAD_SEQ_{index}_LABEL to describe ' - f'LEAD_SEQ_{index}') - config.logger.error(log_msg) - return None - - # get forecast list for n - lead_string_list = getlist(config.getstr('config', f'LEAD_SEQ_{index}')) - lead_seq = handle_lead_seq(config, - lead_string_list, - lead_min=None, - lead_max=None) - # add to output dictionary - lead_seq_dict[label] = lead_seq - - return lead_seq_dict - -def round_0p5(val): - """! Round to the nearest point five (ie 3.3 rounds to 3.5, 3.1 - rounds to 3.0) Take the input value, multiply by two, round to integer - (no decimal places) then divide by two. Expect any input value of n.0, - n.1, or n.2 to round down to n.0, and any input value of n.5, n.6 or - n.7 to round to n.5. Finally, any input value of n.8 or n.9 will - round to (n+1).0 - Args: - @param val : The number to be rounded to the nearest .5 - Returns: - pt_five: The n.0, n.5, or (n+1).0 value as - a result of rounding the input value, val. - """ - - return round(val * 2) / 2 - -def mkdir_p(path): - """! - From stackoverflow.com/questions/600268/mkdir-p-functionality-in-python - Creates the entire directory path if it doesn't exist (including any - required intermediate directories). - Args: - @param path : The full directory path to be created - Returns - None: Creates the full directory path if it doesn't exist, - does nothing otherwise. - """ - Path(path).mkdir(parents=True, exist_ok=True) - -def get_storms(filter_filename, id_only=False, sort_column='STORM_ID'): - """! Get each storm as identified by a column in the input file. - Create dictionary storm ID as the key and a list of lines for that - storm as the value. - - @param filter_filename name of tcst file to read and extract storm id - @param sort_column column to use to sort and group storms. Default - value is STORM_ID - @returns 2 item tuple - 1)dictionary where key is storm ID and value - is list of relevant lines from tcst file, 2) header line from tcst - file. Item with key 'header' contains the header of the tcst file - """ - # Initialize a set because we want unique storm ids. - storm_id_list = set() - - try: - with open(filter_filename, "r") as file_handle: - header, *lines = file_handle.readlines() - - storm_id_column = header.split().index(sort_column) - for line in lines: - storm_id_list.add(line.split()[storm_id_column]) - except (ValueError, FileNotFoundError): - if id_only: - return [] - return {} - - # sort the unique storm ids, copy the original - # set by using sorted rather than sort. - sorted_storms = sorted(storm_id_list) - if id_only: - return sorted_storms - - if not sorted_storms: - return {} - - storm_dict = {'header': header} - # for each storm, get all lines for that storm - for storm in sorted_storms: - storm_dict[storm] = [line for line in lines if storm in line] - - return storm_dict - -def get_files(filedir, filename_regex, logger=None): - """! Get all the files (with a particular - naming format) by walking - through the directories. - Args: - @param filedir: The topmost directory from which the - search begins. - @param filename_regex: The regular expression that - defines the naming format - of the files of interest. - Returns: - file_paths (string): a list of filenames (with full filepath) - """ - file_paths = [] - - # Walk the tree - for root, _, files in os.walk(filedir): - for filename in files: - # add it to the list only if it is a match - # to the specified format - match = re.match(filename_regex, filename) - if match: - # Join the two strings to form the full - # filepath. - filepath = os.path.join(root, filename) - file_paths.append(filepath) - else: - continue - return sorted(file_paths) - -def prune_empty(output_dir, logger): - """! Start from the output_dir, and recursively check - all directories and files. If there are any empty - files or directories, delete/remove them so they - don't cause performance degradation or errors - when performing subsequent tasks. - Input: - @param output_dir: The directory from which searching - should begin. - @param logger: The logger to which all logging is - directed. - """ - - # Check for empty files. - for root, dirs, files in os.walk(output_dir): - # Create a full file path by joining the path - # and filename. - for a_file in files: - a_file = os.path.join(root, a_file) - if os.stat(a_file).st_size == 0: - logger.debug("Empty file: " + a_file + - "...removing") - os.remove(a_file) - - # Now check for any empty directories, some - # may have been created when removing - # empty files. - for root, dirs, files in os.walk(output_dir): - for direc in dirs: - full_dir = os.path.join(root, direc) - if not os.listdir(full_dir): - logger.debug("Empty directory: " + full_dir + - "...removing") - os.rmdir(full_dir) - -def camel_to_underscore(camel): - """! Change camel case notation to underscore notation, i.e. GridStatWrapper to grid_stat_wrapper - Multiple capital letters are excluded, i.e. PCPCombineWrapper to pcp_combine_wrapper - Numerals are also skipped, i.e. ASCII2NCWrapper to ascii2nc_wrapper - Args: - @param camel string to convert - @returns string in underscore notation - """ - s1 = re.sub(r'([^\d])([A-Z][a-z]+)', r'\1_\2', camel) - return re.sub(r'([a-z])([A-Z])', r'\1_\2', s1).lower() - -def shift_time_seconds(time_str, shift): - """ Adjust time by shift seconds. Format is %Y%m%d%H%M%S - Args: - @param time_str: Start time in %Y%m%d%H%M%S - @param shift: Amount to adjust time in seconds - Returns: - New time in format %Y%m%d%H%M%S - """ - return (datetime.strptime(time_str, "%Y%m%d%H%M%S") + - timedelta(seconds=shift)).strftime("%Y%m%d%H%M%S") - -def get_threshold_via_regex(thresh_string): - """!Ensure thresh values start with >,>=,==,!=,<,<=,gt,ge,eq,ne,lt,le and then a number - Optionally can have multiple comparison/number pairs separated with && or ||. - Args: - @param thresh_string: String to examine, i.e. <=3.4 - Returns: - None if string does not match any valid comparison operators or does - not contain a number afterwards - regex match object with comparison operator in group 1 and - number in group 2 if valid - """ - - comparison_number_list = [] - # split thresh string by || or && - thresh_split = re.split(r'\|\||&&', thresh_string) - # check each threshold for validity - for thresh in thresh_split: - found_match = False - for comp in list(VALID_COMPARISONS)+list(VALID_COMPARISONS.values()): - # if valid, add to list of tuples - # must be one of the valid comparison operators followed by - # at least 1 digit or NA - if thresh == 'NA': - comparison_number_list.append((thresh, '')) - found_match = True - break - - match = re.match(r'^('+comp+r')(.*\d.*)$', thresh) - if match: - comparison = match.group(1) - number = match.group(2) - # try to convert to float if it can, but allow string - try: - number = float(number) - except ValueError: - pass - - comparison_number_list.append((comparison, number)) - found_match = True - break - - # if no match was found for the item, return None - if not found_match: - return None - - if not comparison_number_list: - return None - - return comparison_number_list - - -def validate_thresholds(thresh_list): - """ Checks list of thresholds to ensure all of them have the correct format - Should be a comparison operator with number pair combined with || or && - i.e. gt4 or >3&&<5 or gt3||lt1 - Args: - @param thresh_list list of strings to check - Returns: - True if all items in the list are valid format, False if not - """ - valid = True - for thresh in thresh_list: - match = get_threshold_via_regex(thresh) - if match is None: - valid = False - - if valid is False: - print("ERROR: Threshold values must use >,>=,==,!=,<,<=,gt,ge,eq,ne,lt, or le with a number, " - "optionally combined with && or ||") - return False - return True - -def write_list_to_file(filename, output_list): - with open(filename, 'w+') as f: - for line in output_list: - f.write(f"{line}\n") - -def format_var_items(field_configs, time_info=None): - """! Substitute time information into field information and format values. - - @param field_configs dictionary with config variable names to read - @param time_info dictionary containing time info for current run - @returns dictionary containing name, levels, and output_names, as - well as thresholds and extra options if found. If not enough - information was set in the METplusConfig object, an empty - dictionary is returned. - """ - # dictionary to hold field (var) item info - var_items = {} - - # set defaults for optional items - var_items['levels'] = [] - var_items['thresh'] = [] - var_items['extra'] = '' - var_items['output_names'] = [] - - # get name, return error string if not found - search_name = field_configs.get('name') - if not search_name: - return 'Name not found' - - # perform string substitution on name - if time_info: - search_name = do_string_sub(search_name, - skip_missing_tags=True, - **time_info) - var_items['name'] = search_name - - # get levels, performing string substitution on each item of list - for level in getlist(field_configs.get('levels')): - if time_info: - level = do_string_sub(level, - **time_info) - var_items['levels'].append(level) - - # if no levels are found, add an empty string - if not var_items['levels']: - var_items['levels'].append('') - - # get threshold list if it is set - # return error string if any thresholds not formatted properly - search_thresh = field_configs.get('thresh') - if search_thresh: - thresh = getlist(search_thresh) - if not validate_thresholds(thresh): - return 'Invalid threshold supplied' - - var_items['thresh'] = thresh - - # get extra options if it is set, format with semi-colons between items - search_extra = field_configs.get('options') - if search_extra: - if time_info: - search_extra = do_string_sub(search_extra, - **time_info) - - # strip off empty space around each value - extra_list = [item.strip() for item in search_extra.split(';')] - - # split up each item by semicolon, then add a semicolon to the end - # use list(filter(None to remove empty strings from list - extra_list = list(filter(None, extra_list)) - var_items['extra'] = f"{'; '.join(extra_list)};" - - # get output names if they are set - out_name_str = field_configs.get('output_names') - - # use input name for each level if not set - if not out_name_str: - for _ in var_items['levels']: - var_items['output_names'].append(var_items['name']) - else: - for out_name in getlist(out_name_str): - if time_info: - out_name = do_string_sub(out_name, - **time_info) - var_items['output_names'].append(out_name) - - if len(var_items['levels']) != len(var_items['output_names']): - return 'Number of levels does not match number of output names' - - return var_items - -def sub_var_info(var_info, time_info): - if not var_info: - return {} - - out_var_info = {} - for key, value in var_info.items(): - if isinstance(value, list): - out_value = [] - for item in value: - out_value.append(do_string_sub(item, - skip_missing_tags=True, - **time_info)) - else: - out_value = do_string_sub(value, - skip_missing_tags=True, - **time_info) - - out_var_info[key] = out_value - - return out_var_info - -def sub_var_list(var_list, time_info): - """! Perform string substitution on var list values with time info - - @param var_list list of field info to substitute values into - @param time_info dictionary containing time information - @returns var_list with values substituted - """ - if not var_list: - return [] - - out_var_list = [] - for var_info in var_list: - out_var_info = sub_var_info(var_info, time_info) - out_var_list.append(out_var_info) - - return out_var_list - -def split_level(level): - """! If level value starts with a letter, then separate that letter from - the rest of the string. i.e. 'A03' will be returned as 'A', '03'. If no - level type letter is found and the level value consists of alpha-numeric - characters, return an empty string as the level type and the full level - string as the level value - - @param level input string to parse/split - @returns tuple of level type and level value - """ - if not level: - return '', '' - - match = re.match(r'^([a-zA-Z])(\w+)$', level) - if match: - level_type = match.group(1) - level = match.group(2) - return level_type, level - - match = re.match(r'^[\w]+$', level) - if match: - return '', level - - return '', '' - -def get_filetype(filepath, logger=None): - """!This function determines if the filepath is a NETCDF or GRIB file - based on the first eight bytes of the file. - It returns the string GRIB, NETCDF, or a None object. - - Note: If it is NOT determined to ba a NETCDF file, - it returns GRIB, regardless. - Unless there is an IOError exception, such as filepath refers - to a non-existent file or filepath is only a directory, than - None is returned, without a system exit. - - Args: - @param filepath: path/to/filename - @param logger the logger, optional - Returns: - @returns The string GRIB, NETCDF or a None object - """ - # Developer Note - # Since we have the impending code-freeze, keeping the behavior the same, - # just changing the implementation. - # The previous logic did not test for GRIB it would just return 'GRIB' - # if you couldn't run ncdump on the file. - # Also note: - # As John indicated ... there is the case when a grib file - # may not start with GRIB ... and if you pass the MET command filtetype=GRIB - # MET will handle it ok ... - - # Notes on file format and determining type. - # https://www.wmo.int/pages/prog/www/WDM/Guides/Guide-binary-2.html - # https://www.unidata.ucar.edu/software/netcdf/docs/faq.html - # http: // www.hdfgroup.org / HDF5 / doc / H5.format.html - - # Interpreting single byte by byte - so ok to ignore endianess - # od command: - # od -An -c -N8 foo.nc - # od -tx1 -N8 foo.nc - # GRIB - # Octet no. IS Content - # 1-4 'GRIB' (Coded CCITT-ITA No. 5) (ASCII); - # 5-7 Total length, in octets, of GRIB message(including Sections 0 & 5); - # 8 Edition number - currently 1 - # NETCDF .. ie. od -An -c -N4 foo.nc which will output - # C D F 001 - # C D F 002 - # 211 H D F - # HDF5 - # Magic numbers Hex: 89 48 44 46 0d 0a 1a 0a - # ASCII: \211 HDF \r \n \032 \n - - # Below is a reference that may be used in the future to - # determine grib version. - # import struct - # with open ("foo.grb2","rb")as binary_file: - # binary_file.seek(7) - # one_byte = binary_file.read(1) - # - # This would return an integer with value 1 or 2, - # B option is an unsigned char. - # struct.unpack('B',one_byte)[0] - - # if filepath is set to None, return None to avoid crash - if filepath == None: - return None - - try: - # read will return up to 8 bytes, if file is 0 bytes in length, - # than first_eight_bytes will be the empty string ''. - # Don't test the file length, just adds more time overhead. - with open(filepath, "rb") as binary_file: - binary_file.seek(0) - first_eight_bytes = binary_file.read(8) - - # From the first eight bytes of the file, unpack the bytes - # of the known identifier byte locations, in to a string. - # Example, if this was a netcdf file than ONLY name_cdf would - # equal 'CDF' the other variables, name_hdf would be 'DF ' - # name_grid 'CDF ' - name_cdf, name_hdf, name_grib = [None] * 3 - if len(first_eight_bytes) == 8: - name_cdf = struct.unpack('3s', first_eight_bytes[:3])[0] - name_hdf = struct.unpack('3s', first_eight_bytes[1:4])[0] - name_grib = struct.unpack('4s', first_eight_bytes[:4])[0] - - # Why not just use a else, instead of elif else if we are going to - # return GRIB ? It allows for expansion, ie. Maybe we pass in a - # logger and log the cases we can't determine the type. - if name_cdf == 'CDF' or name_hdf == 'HDF': - return "NETCDF" - elif name_grib == 'GRIB': - return "GRIB" - else: - # This mimicks previous behavoir, were we at least will always return GRIB. - # It also handles the case where GRIB was not in the first 4 bytes - # of a legitimate grib file, see John. - # logger.info('Can't determine type, returning GRIB - # as default %s'%filepath) - return "GRIB" - - except IOError: - # Skip the IOError, and keep processing data. - # ie. filepath references a file that does not exist - # or filepath is a directory. - return None - - # Previous Logic - # ncdump_exe = config.getexe('NCDUMP') - #try: - # result = subprocess.check_output([ncdump_exe, filepath]) - - #except subprocess.CalledProcessError: - # return "GRIB" - - #regex = re.search("netcdf", result) - #if regex is not None: - # return "NETCDF" - #else: - # return None - -def preprocess_file(filename, data_type, config, allow_dir=False): - """ Decompress gzip, bzip, or zip files or convert Gempak files to NetCDF - Args: - @param filename: Path to file without zip extensions - @param config: Config object - Returns: - Path to staged unzipped file or original file if already unzipped - """ - if not filename: - return None - - if allow_dir and os.path.isdir(filename): - return filename - - # if using python embedding for input, return the keyword - if os.path.basename(filename) in PYTHON_EMBEDDING_TYPES: - return os.path.basename(filename) - - # if filename starts with a python embedding type, return the full value - for py_embed_type in PYTHON_EMBEDDING_TYPES: - if filename.startswith(py_embed_type): - return filename - - # if _INPUT_DATATYPE value contains PYTHON, return the full value - if data_type is not None and 'PYTHON' in data_type: - return filename - - stage_dir = config.getdir('STAGING_DIR') - - if os.path.isfile(filename): - # if filename provided ends with a valid compression extension, - # remove the extension and call function again so the - # file will be uncompressed properly. This is done so that - # the function will handle files passed to it with an - # extension the same way as files passed - # without an extension but the compressed equivalent exists - for ext in COMPRESSION_EXTENSIONS: - if filename.endswith(ext): - return preprocess_file(filename[:-len(ext)], data_type, config) - # if extension is grd (Gempak), then look in staging dir for nc file - if filename.endswith('.grd') or data_type == "GEMPAK": - if filename.endswith('.grd'): - stagefile = stage_dir + filename[:-3]+"nc" - else: - stagefile = stage_dir + filename+".nc" - if os.path.isfile(stagefile): - return stagefile - # if it does not exist, run GempakToCF and return staged nc file - # Create staging area if it does not exist - mkdir_p(os.path.dirname(stagefile)) - - # only import GempakToCF if needed - from ..wrappers import GempakToCFWrapper - - run_g2c = GempakToCFWrapper(config) - run_g2c.infiles.append(filename) - run_g2c.set_output_path(stagefile) - cmd = run_g2c.get_command() - if cmd is None: - config.logger.error("GempakToCF could not generate command") - return None - if config.logger: - config.logger.debug("Converting Gempak file into {}".format(stagefile)) - run_g2c.build() - return stagefile - - return filename - - # nc file requested and the Gempak equivalent exists - if os.path.isfile(filename[:-2]+'grd'): - return preprocess_file(filename[:-2]+'grd', data_type, config) - - # if file exists in the staging area, return that path - outpath = stage_dir + filename - if os.path.isfile(outpath): - return outpath - - # Create staging area directory only if file has compression extension - if any([os.path.isfile(f'{filename}{ext}') - for ext in COMPRESSION_EXTENSIONS]): - mkdir_p(os.path.dirname(outpath)) - - # uncompress gz, bz2, or zip file - if os.path.isfile(filename+".gz"): - if config.logger: - config.logger.debug("Uncompressing gz file to {}".format(outpath)) - with gzip.open(filename+".gz", 'rb') as infile: - with open(outpath, 'wb') as outfile: - outfile.write(infile.read()) - infile.close() - outfile.close() - return outpath - elif os.path.isfile(filename+".bz2"): - if config.logger: - config.logger.debug("Uncompressing bz2 file to {}".format(outpath)) - with open(filename+".bz2", 'rb') as infile: - with open(outpath, 'wb') as outfile: - outfile.write(bz2.decompress(infile.read())) - infile.close() - outfile.close() - return outpath - elif os.path.isfile(filename+".zip"): - if config.logger: - config.logger.debug("Uncompressing zip file to {}".format(outpath)) - with zipfile.ZipFile(filename+".zip") as z: - with open(outpath, 'wb') as f: - f.write(z.read(os.path.basename(filename))) - return outpath - - # if input doesn't need to exist, return filename - if not config.getbool('config', 'INPUT_MUST_EXIST', True): - return filename - - return None - -def template_to_regex(template, time_info): - in_template = re.sub(r'\.', '\\.', template) - in_template = re.sub(r'{lead.*?}', '.*', in_template) - return do_string_sub(in_template, - **time_info) - -def is_python_script(name): - """ Check if field name is a python script by checking if any of the words - in the string end with .py - - @param name string to check - @returns True if the name is determined to be a python script command - """ - if not name: - return False - - all_items = name.split(' ') - if any(item.endswith('.py') for item in all_items): - return True - - return False - -def expand_int_string_to_list(int_string): - """! Expand string into a list of integer values. Items are separated by - commas. Items that are formatted X-Y will be expanded into each number - from X to Y inclusive. If the string ends with +, then add a str '+' - to the end of the list. Used in .github/jobs/get_use_case_commands.py - - @param int_string String containing a comma-separated list of integers - @returns List of integers and potentially '+' as the last item - """ - subset_list = [] - - # if string ends with +, remove it and add it back at the end - if int_string.strip().endswith('+'): - int_string = int_string.strip(' +') - hasPlus = True - else: - hasPlus = False - - # separate into list by comma - comma_list = int_string.split(',') - for comma_item in comma_list: - dash_list = comma_item.split('-') - # if item contains X-Y, expand it - if len(dash_list) == 2: - for i in range(int(dash_list[0].strip()), - int(dash_list[1].strip())+1, - 1): - subset_list.append(i) - else: - subset_list.append(int(comma_item.strip())) - - if hasPlus: - subset_list.append('+') - - return subset_list - -def subset_list(full_list, subset_definition): - """! Extract subset of items from full_list based on subset_definition - Used in internal/tests/use_cases/metplus_use_case_suite.py - - @param full_list List of all use cases that were requested - @param subset_definition Defines how to subset the full list. If None, - no subsetting occurs. If an integer value, select that index only. - If a slice object, i.e. slice(2,4,1), pass slice object into list. - If list, subset full list by integer index values in list. If - last item in list is '+' then subset list up to 2nd last index, then - get all items from 2nd last item and above - """ - if subset_definition is not None: - subset_list = [] - - # if case slice is a list, use only the indices in the list - if isinstance(subset_definition, list): - # if last slice value is a plus sign, get rest of items - # after 2nd last slice value - if subset_definition[-1] == '+': - plus_value = subset_definition[-2] - # add all values before last index before plus - subset_list.extend([full_list[i] - for i in subset_definition[:-2]]) - # add last index listed + all items above - subset_list.extend(full_list[plus_value:]) - else: - # list of integers, so get items based on indices - subset_list = [full_list[i] for i in subset_definition] - else: - subset_list = full_list[subset_definition] - else: - subset_list = full_list - - # if only 1 item is left, make it a list before returning - if not isinstance(subset_list, list): - subset_list = [subset_list] - - return subset_list - -def is_met_netcdf(file_path): - """! Check if a file is a MET-generated NetCDF file. - If the file is not a NetCDF file, OSError occurs. - If the MET_version attribute doesn't exist, AttributeError occurs. - If the netCDF4 package is not available, ImportError should occur. - All of these situations result in the file being considered not - a MET-generated NetCDF file - Args: - @param file_path full path to file to check - @returns True if file is a MET-generated NetCDF file and False if - it is not or it can't be determined. - """ - try: - from netCDF4 import Dataset - nc_file = Dataset(file_path, 'r') - getattr(nc_file, 'MET_version') - except (AttributeError, OSError, ImportError): - return False - - return True - -def netcdf_has_var(file_path, name, level): - """! Check if name is a variable in the NetCDF file. If not, check if - {name}_{level} (with level prefix letter removed, i.e. 06 from A06) - If the file is not a NetCDF file, OSError occurs. - If the MET_version attribute doesn't exist, AttributeError occurs. - If the netCDF4 package is not available, ImportError should occur. - All of these situations result in the file being considered not - a MET-generated NetCDF file - Args: - @param file_path full path to file to check - @returns True if file is a MET-generated NetCDF file and False if - it is not or it can't be determined. - """ - try: - from netCDF4 import Dataset - - nc_file = Dataset(file_path, 'r') - variables = nc_file.variables.keys() - - # if name is a variable, return that name - if name in variables: - return name - - - # if name_level is a variable, return that - name_underscore_level = f"{name}_{split_level(level)[1]}" - if name_underscore_level in variables: - return name_underscore_level - - # requested variable name is not found in file - return None - - except (AttributeError, OSError, ImportError): - return False - -def generate_tmp_filename(): - import random - import string - random_string = ''.join(random.choice(string.ascii_letters) - for i in range(10)) - return f"metplus_tmp_{random_string}" - -def format_level(level): - """! Format level string to prevent NetCDF level values from creating - filenames and field names with bad characters. Replaces '*' with 'all' - and ',' with '_' - - @param level string of level to format - @returns formatted string - """ - return level.replace('*', 'all').replace(',', '_') diff --git a/metplus/util/run_util.py b/metplus/util/run_util.py new file mode 100644 index 0000000000..fb7b743b35 --- /dev/null +++ b/metplus/util/run_util.py @@ -0,0 +1,194 @@ +import sys +import os +import shutil +from datetime import datetime +from importlib import import_module + +from .constants import NO_COMMAND_WRAPPERS +from .system_util import get_user_info, write_list_to_file +from .. import get_metplus_version +from . import config_metplus +from . import camel_to_underscore + +def pre_run_setup(config_inputs): + + version_number = get_metplus_version() + print(f'Starting METplus v{version_number}') + + # Read config inputs and return a config instance + config = config_metplus.setup(config_inputs) + + logger = config.logger + + user_info = get_user_info() + user_string = f' as user {user_info} ' if user_info else ' ' + + config.set('config', 'METPLUS_VERSION', version_number) + logger.info('Running METplus v%s%swith command: %s', + version_number, user_string, ' '.join(sys.argv)) + + logger.info(f"Log file: {config.getstr('config', 'LOG_METPLUS')}") + logger.info(f"METplus Base: {config.getdir('METPLUS_BASE')}") + logger.info(f"Final Conf: {config.getstr('config', 'METPLUS_CONF')}") + config_list = config.getstr('config', 'CONFIG_INPUT').split(',') + for config_item in config_list: + logger.info(f"Config Input: {config_item}") + + # validate configuration variables + isOK_A, isOK_B, isOK_C, isOK_D, all_sed_cmds = config_metplus.validate_configuration_variables(config) + if not (isOK_A and isOK_B and isOK_C and isOK_D): + # if any sed commands were generated, write them to the sed file + if all_sed_cmds: + sed_file = os.path.join(config.getdir('OUTPUT_BASE'), 'sed_commands.txt') + # remove if sed file exists + if os.path.exists(sed_file): + os.remove(sed_file) + + write_list_to_file(sed_file, all_sed_cmds) + config.logger.error(f"Find/Replace commands have been generated in {sed_file}") + + logger.error("Correct configuration variables and rerun. Exiting.") + sys.exit(1) + + if not config.getdir('MET_INSTALL_DIR', must_exist=True): + logger.error('MET_INSTALL_DIR must be set correctly to run METplus') + sys.exit(1) + + # set staging dir to OUTPUT_BASE/stage if not set + if not config.has_option('config', 'STAGING_DIR'): + config.set('config', 'STAGING_DIR', + os.path.join(config.getdir('OUTPUT_BASE'), "stage")) + + # handle dir to write temporary files + config_metplus.handle_tmp_dir(config) + + # handle OMP_NUM_THREADS environment variable + config_metplus.handle_env_var_config(config, + env_var_name='OMP_NUM_THREADS', + config_name='OMP_NUM_THREADS') + + config.env = os.environ.copy() + + return config + + +def run_metplus(config): + total_errors = 0 + + # Use config object to get the list of processes to call + process_list = config_metplus.get_process_list(config) + + try: + processes = [] + for process, instance in process_list: + try: + logname = f"{process}.{instance}" if instance else process + logger = config.log(logname) + package_name = ('metplus.wrappers.' + f'{camel_to_underscore(process)}_wrapper') + module = import_module(package_name) + command_builder = ( + getattr(module, f"{process}Wrapper")(config, + instance=instance) + ) + + # if Usage specified in PROCESS_LIST, print usage and exit + if process == 'Usage': + command_builder.run_all_times() + return 0 + except AttributeError: + logger.error("There was a problem loading " + f"{process} wrapper.") + return 1 + except ModuleNotFoundError: + logger.error(f"Could not load {process} wrapper. " + "Wrapper may have been disabled.") + return 1 + + processes.append(command_builder) + + # check if all processes initialized correctly + allOK = True + for process in processes: + if not process.isOK: + allOK = False + class_name = process.__class__.__name__.replace('Wrapper', '') + logger.error("{} was not initialized properly".format(class_name)) + + # exit if any wrappers did not initialized properly + if not allOK: + logger.info("Refer to ERROR messages above to resolve issues.") + return 1 + + all_commands = [] + for process in processes: + new_commands = process.run_all_times() + if new_commands: + all_commands.extend(new_commands) + + # if process list contains any wrapper that should run commands + if any([item[0] not in NO_COMMAND_WRAPPERS for item in process_list]): + # write out all commands and environment variables to file + if not config_metplus.write_all_commands(all_commands, config): + # report an error if no commands were generated + total_errors += 1 + + # compute total number of errors that occurred and output results + for process in processes: + if process.errors != 0: + process_name = process.__class__.__name__.replace('Wrapper', '') + error_msg = '{} had {} error'.format(process_name, process.errors) + if process.errors > 1: + error_msg += 's' + error_msg += '.' + logger.error(error_msg) + total_errors += process.errors + + return total_errors + except: + logger.exception("Fatal error occurred") + logger.info(f"Check the log file for more information: {config.getstr('config', 'LOG_METPLUS')}") + return 1 + +def post_run_cleanup(config, app_name, total_errors): + logger = config.logger + # scrub staging directory if requested + if (config.getbool('config', 'SCRUB_STAGING_DIR') and + os.path.exists(config.getdir('STAGING_DIR'))): + staging_dir = config.getdir('STAGING_DIR') + logger.info("Scrubbing staging dir: %s", staging_dir) + logger.info('Set SCRUB_STAGING_DIR to False to preserve ' + 'intermediate files.') + shutil.rmtree(staging_dir) + + # save log file path and clock time before writing final conf file + log_message = (f"Check the log file for more information: " + f"{config.getstr('config', 'LOG_METPLUS')}") + + start_clock_time = datetime.strptime(config.getstr('config', 'CLOCK_TIME'), + '%Y%m%d%H%M%S') + + # rewrite final conf so it contains all of the default values used + config_metplus.write_final_conf(config) + + # compute time it took to run + end_clock_time = datetime.now() + total_run_time = end_clock_time - start_clock_time + logger.debug(f"{app_name} took {total_run_time} to run.") + + user_info = get_user_info() + user_string = f' as user {user_info}' if user_info else '' + if not total_errors: + logger.info(log_message) + logger.info('%s has successfully finished running%s.', + app_name, user_string) + return + + error_msg = (f'{app_name} has finished running{user_string} ' + f'but had {total_errors} error') + if total_errors > 1: + error_msg += 's' + error_msg += '.' + logger.error(error_msg) + logger.info(log_message) + sys.exit(1) diff --git a/metplus/util/string_manip.py b/metplus/util/string_manip.py index 2779fa7ebe..5ddb62e867 100644 --- a/metplus/util/string_manip.py +++ b/metplus/util/string_manip.py @@ -6,6 +6,8 @@ import re from csv import reader +import random +import string from .constants import VALID_COMPARISONS @@ -249,3 +251,245 @@ def format_thresh(thresh_str): formatted_thresh_list.append(thresh_letter) return ','.join(formatted_thresh_list) + + +def is_python_script(name): + """ Check if field name is a python script by checking if any of the words + in the string end with .py + + @param name string to check + @returns True if the name is determined to be a python script command + """ + if not name: + return False + + all_items = name.split(' ') + if any(item.endswith('.py') for item in all_items): + return True + + return False + + +def camel_to_underscore(camel): + """! Change camel case notation to underscore notation, i.e. GridStatWrapper to grid_stat_wrapper + Multiple capital letters are excluded, i.e. PCPCombineWrapper to pcp_combine_wrapper + Numerals are also skipped, i.e. ASCII2NCWrapper to ascii2nc_wrapper + Args: + @param camel string to convert + @returns string in underscore notation + """ + s1 = re.sub(r'([^\d])([A-Z][a-z]+)', r'\1_\2', camel) + return re.sub(r'([a-z])([A-Z])', r'\1_\2', s1).lower() + + +def get_threshold_via_regex(thresh_string): + """!Ensure thresh values start with >,>=,==,!=,<,<=,gt,ge,eq,ne,lt,le and then a number + Optionally can have multiple comparison/number pairs separated with && or ||. + Args: + @param thresh_string: String to examine, i.e. <=3.4 + Returns: + None if string does not match any valid comparison operators or does + not contain a number afterwards + regex match object with comparison operator in group 1 and + number in group 2 if valid + """ + + comparison_number_list = [] + # split thresh string by || or && + thresh_split = re.split(r'\|\||&&', thresh_string) + # check each threshold for validity + for thresh in thresh_split: + found_match = False + for comp in list(VALID_COMPARISONS)+list(VALID_COMPARISONS.values()): + # if valid, add to list of tuples + # must be one of the valid comparison operators followed by + # at least 1 digit or NA + if thresh == 'NA': + comparison_number_list.append((thresh, '')) + found_match = True + break + + match = re.match(r'^('+comp+r')(.*\d.*)$', thresh) + if match: + comparison = match.group(1) + number = match.group(2) + # try to convert to float if it can, but allow string + try: + number = float(number) + except ValueError: + pass + + comparison_number_list.append((comparison, number)) + found_match = True + break + + # if no match was found for the item, return None + if not found_match: + return None + + if not comparison_number_list: + return None + + return comparison_number_list + + +def validate_thresholds(thresh_list): + """ Checks list of thresholds to ensure all of them have the correct format + Should be a comparison operator with number pair combined with || or && + i.e. gt4 or >3&&<5 or gt3||lt1 + Args: + @param thresh_list list of strings to check + Returns: + True if all items in the list are valid format, False if not + """ + valid = True + for thresh in thresh_list: + match = get_threshold_via_regex(thresh) + if match is None: + valid = False + + if valid is False: + print("ERROR: Threshold values must use >,>=,==,!=,<,<=,gt,ge,eq,ne,lt, or le with a number, " + "optionally combined with && or ||") + return False + return True + + +def round_0p5(val): + """! Round to the nearest point five (ie 3.3 rounds to 3.5, 3.1 + rounds to 3.0) Take the input value, multiply by two, round to integer + (no decimal places) then divide by two. Expect any input value of n.0, + n.1, or n.2 to round down to n.0, and any input value of n.5, n.6 or + n.7 to round to n.5. Finally, any input value of n.8 or n.9 will + round to (n+1).0 + + @param val : The number to be rounded to the nearest .5 + @returns n.0, n.5, or (n+1).0 value as a result of rounding + """ + return round(val * 2) / 2 + + +def generate_tmp_filename(): + random_string = ''.join(random.choice(string.ascii_letters) + for i in range(10)) + return f"metplus_tmp_{random_string}" + + +def template_to_regex(template): + in_template = re.sub(r'\.', '\\.', template) + return re.sub(r'{lead.*?}', '.*', in_template) + + +def split_level(level): + """! If level value starts with a letter, then separate that letter from + the rest of the string. i.e. 'A03' will be returned as 'A', '03'. If no + level type letter is found and the level value consists of alpha-numeric + characters, return an empty string as the level type and the full level + string as the level value + + @param level input string to parse/split + @returns tuple of level type and level value + """ + if not level: + return '', '' + + match = re.match(r'^([a-zA-Z])(\w+)$', level) + if match: + level_type = match.group(1) + level = match.group(2) + return level_type, level + + match = re.match(r'^[\w]+$', level) + if match: + return '', level + + return '', '' + + +def format_level(level): + """! Format level string to prevent NetCDF level values from creating + filenames and field names with bad characters. Replaces '*' with 'all' + and ',' with '_' + + @param level string of level to format + @returns formatted string + """ + return level.replace('*', 'all').replace(',', '_') + + +def expand_int_string_to_list(int_string): + """! Expand string into a list of integer values. Items are separated by + commas. Items that are formatted X-Y will be expanded into each number + from X to Y inclusive. If the string ends with +, then add a str '+' + to the end of the list. Used in .github/jobs/get_use_case_commands.py + + @param int_string String containing a comma-separated list of integers + @returns List of integers and potentially '+' as the last item + """ + subset_list = [] + + # if string ends with +, remove it and add it back at the end + if int_string.strip().endswith('+'): + int_string = int_string.strip(' +') + hasPlus = True + else: + hasPlus = False + + # separate into list by comma + comma_list = int_string.split(',') + for comma_item in comma_list: + dash_list = comma_item.split('-') + # if item contains X-Y, expand it + if len(dash_list) == 2: + for i in range(int(dash_list[0].strip()), + int(dash_list[1].strip())+1, + 1): + subset_list.append(i) + else: + subset_list.append(int(comma_item.strip())) + + if hasPlus: + subset_list.append('+') + + return subset_list + + +def subset_list(full_list, subset_definition): + """! Extract subset of items from full_list based on subset_definition + Used in internal/tests/use_cases/metplus_use_case_suite.py + + @param full_list List of all use cases that were requested + @param subset_definition Defines how to subset the full list. If None, + no subsetting occurs. If an integer value, select that index only. + If a slice object, i.e. slice(2,4,1), pass slice object into list. + If list, subset full list by integer index values in list. If + last item in list is '+' then subset list up to 2nd last index, then + get all items from 2nd last item and above + """ + if subset_definition is not None: + subset_list = [] + + # if case slice is a list, use only the indices in the list + if isinstance(subset_definition, list): + # if last slice value is a plus sign, get rest of items + # after 2nd last slice value + if subset_definition[-1] == '+': + plus_value = subset_definition[-2] + # add all values before last index before plus + subset_list.extend([full_list[i] + for i in subset_definition[:-2]]) + # add last index listed + all items above + subset_list.extend(full_list[plus_value:]) + else: + # list of integers, so get items based on indices + subset_list = [full_list[i] for i in subset_definition] + else: + subset_list = full_list[subset_definition] + else: + subset_list = full_list + + # if only 1 item is left, make it a list before returning + if not isinstance(subset_list, list): + subset_list = [subset_list] + + return subset_list diff --git a/metplus/util/system_util.py b/metplus/util/system_util.py new file mode 100644 index 0000000000..f47d9a8936 --- /dev/null +++ b/metplus/util/system_util.py @@ -0,0 +1,460 @@ +""" +Program Name: system_manip.py +Contact(s): George McCabe +Description: METplus utility to handle OS/system calls +""" + +import os +import re +from pathlib import Path +import getpass +import gzip +import bz2 +import zipfile +import struct + +from .constants import PYTHON_EMBEDDING_TYPES, COMPRESSION_EXTENSIONS + + +def mkdir_p(path): + """! + From stackoverflow.com/questions/600268/mkdir-p-functionality-in-python + Creates the entire directory path if it doesn't exist (including any + required intermediate directories). + Args: + @param path : The full directory path to be created + Returns + None: Creates the full directory path if it doesn't exist, + does nothing otherwise. + """ + Path(path).mkdir(parents=True, exist_ok=True) + + +def get_user_info(): + """! Get user information from OS. Note that some OS cannot obtain user ID + and some cannot obtain username. + @returns username(uid) if both username and user ID can be read, + username if only username can be read, uid if only user ID can be read, + or an empty string if neither can be read. + """ + try: + username = getpass.getuser() + except OSError: + username = None + + try: + uid = os.getuid() + except AttributeError: + uid = None + + if username and uid: + return f'{username}({uid})' + + if username: + return username + + if uid: + return uid + + return '' + + +def write_list_to_file(filename, output_list): + with open(filename, 'w+') as f: + for line in output_list: + f.write(f"{line}\n") + + +def get_storms(filter_filename, id_only=False, sort_column='STORM_ID'): + """! Get each storm as identified by a column in the input file. + Create dictionary storm ID as the key and a list of lines for that + storm as the value. + + @param filter_filename name of tcst file to read and extract storm id + @param sort_column column to use to sort and group storms. Default + value is STORM_ID + @returns 2 item tuple - 1)dictionary where key is storm ID and value + is list of relevant lines from tcst file, 2) header line from tcst + file. Item with key 'header' contains the header of the tcst file + """ + # Initialize a set because we want unique storm ids. + storm_id_list = set() + + try: + with open(filter_filename, "r") as file_handle: + header, *lines = file_handle.readlines() + + storm_id_column = header.split().index(sort_column) + for line in lines: + storm_id_list.add(line.split()[storm_id_column]) + except (ValueError, FileNotFoundError): + if id_only: + return [] + return {} + + # sort the unique storm ids, copy the original + # set by using sorted rather than sort. + sorted_storms = sorted(storm_id_list) + if id_only: + return sorted_storms + + if not sorted_storms: + return {} + + storm_dict = {'header': header} + # for each storm, get all lines for that storm + for storm in sorted_storms: + storm_dict[storm] = [line for line in lines if storm in line] + + return storm_dict + + +def prune_empty(output_dir, logger): + """! Start from the output_dir, and recursively check + all directories and files. If there are any empty + files or directories, delete/remove them so they + don't cause performance degradation or errors + when performing subsequent tasks. + + @param output_dir The directory from which searching should begin. + @param logger The logger to which all logging is directed. + """ + + # Check for empty files. + for root, dirs, files in os.walk(output_dir): + # Create a full file path by joining the path + # and filename. + for a_file in files: + a_file = os.path.join(root, a_file) + if os.stat(a_file).st_size == 0: + logger.debug("Empty file: " + a_file + + "...removing") + os.remove(a_file) + + # Now check for any empty directories, some + # may have been created when removing + # empty files. + for root, dirs, files in os.walk(output_dir): + for direc in dirs: + full_dir = os.path.join(root, direc) + if not os.listdir(full_dir): + logger.debug("Empty directory: " + full_dir + + "...removing") + os.rmdir(full_dir) + + +def get_files(filedir, filename_regex, logger=None): + """! Get all the files (with a particular naming format) by walking + through the directories. + + @param filedir The topmost directory from which the search begins. + @param filename_regex The regular expression that defines the naming + format of the files of interest. + @returns list of filenames (with full filepath) + """ + file_paths = [] + + # Walk the tree + for root, _, files in os.walk(filedir): + for filename in files: + # add it to the list only if it is a match + # to the specified format + match = re.match(filename_regex, filename) + if match: + # Join the two strings to form the full + # filepath. + filepath = os.path.join(root, filename) + file_paths.append(filepath) + else: + continue + return sorted(file_paths) + + +def preprocess_file(filename, data_type, config, allow_dir=False): + """ Decompress gzip, bzip, or zip files or convert Gempak files to NetCDF + Args: + @param filename: Path to file without zip extensions + @param config: Config object + Returns: + Path to staged unzipped file or original file if already unzipped + """ + if not filename: + return None + + if allow_dir and os.path.isdir(filename): + return filename + + # if using python embedding for input, return the keyword + if os.path.basename(filename) in PYTHON_EMBEDDING_TYPES: + return os.path.basename(filename) + + # if filename starts with a python embedding type, return the full value + for py_embed_type in PYTHON_EMBEDDING_TYPES: + if filename.startswith(py_embed_type): + return filename + + # if _INPUT_DATATYPE value contains PYTHON, return the full value + if data_type is not None and 'PYTHON' in data_type: + return filename + + stage_dir = config.getdir('STAGING_DIR') + + if os.path.isfile(filename): + # if filename provided ends with a valid compression extension, + # remove the extension and call function again so the + # file will be uncompressed properly. This is done so that + # the function will handle files passed to it with an + # extension the same way as files passed + # without an extension but the compressed equivalent exists + for ext in COMPRESSION_EXTENSIONS: + if filename.endswith(ext): + return preprocess_file(filename[:-len(ext)], data_type, config) + # if extension is grd (Gempak), then look in staging dir for nc file + if filename.endswith('.grd') or data_type == "GEMPAK": + if filename.endswith('.grd'): + stagefile = stage_dir + filename[:-3]+"nc" + else: + stagefile = stage_dir + filename+".nc" + if os.path.isfile(stagefile): + return stagefile + # if it does not exist, run GempakToCF and return staged nc file + # Create staging area if it does not exist + mkdir_p(os.path.dirname(stagefile)) + + # only import GempakToCF if needed + from ..wrappers import GempakToCFWrapper + + run_g2c = GempakToCFWrapper(config) + run_g2c.infiles.append(filename) + run_g2c.set_output_path(stagefile) + cmd = run_g2c.get_command() + if cmd is None: + config.logger.error("GempakToCF could not generate command") + return None + if config.logger: + config.logger.debug("Converting Gempak file into {}".format(stagefile)) + run_g2c.build() + return stagefile + + return filename + + # nc file requested and the Gempak equivalent exists + if os.path.isfile(filename[:-2]+'grd'): + return preprocess_file(filename[:-2]+'grd', data_type, config) + + # if file exists in the staging area, return that path + outpath = stage_dir + filename + if os.path.isfile(outpath): + return outpath + + # Create staging area directory only if file has compression extension + if any([os.path.isfile(f'{filename}{ext}') + for ext in COMPRESSION_EXTENSIONS]): + mkdir_p(os.path.dirname(outpath)) + + # uncompress gz, bz2, or zip file + if os.path.isfile(filename+".gz"): + if config.logger: + config.logger.debug("Uncompressing gz file to {}".format(outpath)) + with gzip.open(filename+".gz", 'rb') as infile: + with open(outpath, 'wb') as outfile: + outfile.write(infile.read()) + infile.close() + outfile.close() + return outpath + elif os.path.isfile(filename+".bz2"): + if config.logger: + config.logger.debug("Uncompressing bz2 file to {}".format(outpath)) + with open(filename+".bz2", 'rb') as infile: + with open(outpath, 'wb') as outfile: + outfile.write(bz2.decompress(infile.read())) + infile.close() + outfile.close() + return outpath + elif os.path.isfile(filename+".zip"): + if config.logger: + config.logger.debug("Uncompressing zip file to {}".format(outpath)) + with zipfile.ZipFile(filename+".zip") as z: + with open(outpath, 'wb') as f: + f.write(z.read(os.path.basename(filename))) + return outpath + + # if input doesn't need to exist, return filename + if not config.getbool('config', 'INPUT_MUST_EXIST', True): + return filename + + return None + + +def netcdf_has_var(file_path, name, level): + """! Check if name is a variable in the NetCDF file. If not, check if + {name}_{level} (with level prefix letter removed, i.e. 06 from A06) + If the file is not a NetCDF file, OSError occurs. + If the MET_version attribute doesn't exist, AttributeError occurs. + If the netCDF4 package is not available, ImportError should occur. + All of these situations result in the file being considered not + a MET-generated NetCDF file. (CURRENTLY UNUSED) + + @param file_path full path to file to check + @returns True if file is a MET-generated NetCDF file and False if + it is not or it can't be determined. + """ + try: + from netCDF4 import Dataset + + nc_file = Dataset(file_path, 'r') + variables = nc_file.variables.keys() + + # if name is a variable, return that name + if name in variables: + return name + + # if name_level is a variable, return that + name_underscore_level = f"{name}_{split_level(level)[1]}" + if name_underscore_level in variables: + return name_underscore_level + + # requested variable name is not found in file + return None + + except (AttributeError, OSError, ImportError): + return False + + +def is_met_netcdf(file_path): + """! Check if a file is a MET-generated NetCDF file. + If the file is not a NetCDF file, OSError occurs. + If the MET_version attribute doesn't exist, AttributeError occurs. + If the netCDF4 package is not available, ImportError should occur. + All of these situations result in the file being considered not + a MET-generated NetCDF file (CURRENTLY NOT USED) + + @param file_path full path to file to check + @returns True if file is a MET-generated NetCDF file and False if + it is not or it can't be determined. + """ + try: + from netCDF4 import Dataset + nc_file = Dataset(file_path, 'r') + getattr(nc_file, 'MET_version') + except (AttributeError, OSError, ImportError): + return False + + return True + + +def get_filetype(filepath, logger=None): + """!This function determines if the filepath is a NETCDF or GRIB file + based on the first eight bytes of the file. + It returns the string GRIB, NETCDF, or a None object. + + Note: If it is NOT determined to ba a NETCDF file, + it returns GRIB, regardless. + Unless there is an IOError exception, such as filepath refers + to a non-existent file or filepath is only a directory, than + None is returned, without a system exit. (CURRENTLY NOT USED) + + @param filepath: path/to/filename + @param logger the logger, optional + @returns The string GRIB, NETCDF or a None object + """ + # Developer Note + # Since we have the impending code-freeze, keeping the behavior the same, + # just changing the implementation. + # The previous logic did not test for GRIB it would just return 'GRIB' + # if you couldn't run ncdump on the file. + # Also note: + # As John indicated ... there is the case when a grib file + # may not start with GRIB ... and if you pass the MET command filtetype=GRIB + # MET will handle it ok ... + + # Notes on file format and determining type. + # https://www.wmo.int/pages/prog/www/WDM/Guides/Guide-binary-2.html + # https://www.unidata.ucar.edu/software/netcdf/docs/faq.html + # http: // www.hdfgroup.org / HDF5 / doc / H5.format.html + + # Interpreting single byte by byte - so ok to ignore endianess + # od command: + # od -An -c -N8 foo.nc + # od -tx1 -N8 foo.nc + # GRIB + # Octet no. IS Content + # 1-4 'GRIB' (Coded CCITT-ITA No. 5) (ASCII); + # 5-7 Total length, in octets, of GRIB message(including Sections 0 & 5); + # 8 Edition number - currently 1 + # NETCDF .. ie. od -An -c -N4 foo.nc which will output + # C D F 001 + # C D F 002 + # 211 H D F + # HDF5 + # Magic numbers Hex: 89 48 44 46 0d 0a 1a 0a + # ASCII: \211 HDF \r \n \032 \n + + # Below is a reference that may be used in the future to + # determine grib version. + # import struct + # with open ("foo.grb2","rb")as binary_file: + # binary_file.seek(7) + # one_byte = binary_file.read(1) + # + # This would return an integer with value 1 or 2, + # B option is an unsigned char. + # struct.unpack('B',one_byte)[0] + + # if filepath is set to None, return None to avoid crash + if filepath == None: + return None + + try: + # read will return up to 8 bytes, if file is 0 bytes in length, + # than first_eight_bytes will be the empty string ''. + # Don't test the file length, just adds more time overhead. + with open(filepath, "rb") as binary_file: + binary_file.seek(0) + first_eight_bytes = binary_file.read(8) + + # From the first eight bytes of the file, unpack the bytes + # of the known identifier byte locations, in to a string. + # Example, if this was a netcdf file than ONLY name_cdf would + # equal 'CDF' the other variables, name_hdf would be 'DF ' + # name_grid 'CDF ' + name_cdf, name_hdf, name_grib = [None] * 3 + if len(first_eight_bytes) == 8: + name_cdf = struct.unpack('3s', first_eight_bytes[:3])[0] + name_hdf = struct.unpack('3s', first_eight_bytes[1:4])[0] + name_grib = struct.unpack('4s', first_eight_bytes[:4])[0] + + # Why not just use a else, instead of elif else if we are going to + # return GRIB ? It allows for expansion, ie. Maybe we pass in a + # logger and log the cases we can't determine the type. + if name_cdf == 'CDF' or name_hdf == 'HDF': + return "NETCDF" + elif name_grib == 'GRIB': + return "GRIB" + else: + # This mimicks previous behavoir, were we at least will always return GRIB. + # It also handles the case where GRIB was not in the first 4 bytes + # of a legitimate grib file, see John. + # logger.info('Can't determine type, returning GRIB + # as default %s'%filepath) + return "GRIB" + + except IOError: + # Skip the IOError, and keep processing data. + # ie. filepath references a file that does not exist + # or filepath is a directory. + return None + + # Previous Logic + # ncdump_exe = config.getexe('NCDUMP') + #try: + # result = subprocess.check_output([ncdump_exe, filepath]) + + #except subprocess.CalledProcessError: + # return "GRIB" + + #regex = re.search("netcdf", result) + #if regex is not None: + # return "NETCDF" + #else: + # return None diff --git a/metplus/util/time_looping.py b/metplus/util/time_looping.py index 2b4cdb2cd6..2cd124ff8d 100644 --- a/metplus/util/time_looping.py +++ b/metplus/util/time_looping.py @@ -1,8 +1,12 @@ +import re from datetime import datetime, timedelta -from .string_manip import getlist -from .time_util import get_relativedelta +from .string_manip import getlist, getlistint +from .time_util import get_relativedelta, add_to_time_input +from .time_util import ti_get_hours_from_relativedelta +from .time_util import ti_get_seconds_from_relativedelta from .string_template_substitution import do_string_sub +from .config_metplus import log_runtime_banner def time_generator(config): """! Generator used to read METplusConfig variables for time looping @@ -82,6 +86,7 @@ def time_generator(config): current_dt += time_interval + def get_start_and_end_times(config): prefix = get_time_prefix(config) if not prefix: @@ -120,6 +125,42 @@ def get_start_and_end_times(config): return start_dt, end_dt + +def loop_over_times_and_call(config, processes, custom=None): + """! Loop over all run times and call wrappers listed in config + + @param config METplusConfig object + @param processes list of CommandBuilder subclass objects (Wrappers) to call + @param custom (optional) custom loop string value + @returns list of tuples with all commands run and the environment variables + that were set for each + """ + # keep track of commands that were run + all_commands = [] + for time_input in time_generator(config): + if not isinstance(processes, list): + processes = [processes] + + for process in processes: + # if time could not be read, increment errors for each process + if time_input is None: + process.errors += 1 + continue + + log_runtime_banner(config, time_input, process) + add_to_time_input(time_input, + instance=process.instance, + custom=custom) + + process.clear() + process.run_at_time(time_input) + if process.all_commands: + all_commands.extend(process.all_commands) + process.all_commands.clear() + + return all_commands + + def _validate_time_values(start_dt, end_dt, time_interval, prefix, logger): if not start_dt: logger.error(f"Could not read {prefix}_BEG") @@ -142,6 +183,7 @@ def _validate_time_values(start_dt, end_dt, time_interval, prefix, logger): return True + def _create_time_input_dict(prefix, current_dt, clock_dt): return { 'loop_by': prefix.lower(), @@ -150,6 +192,7 @@ def _create_time_input_dict(prefix, current_dt, clock_dt): 'today': clock_dt.strftime('%Y%m%d'), } + def get_time_prefix(config): """! Read the METplusConfig object and determine the prefix for the time looping variables. @@ -179,6 +222,7 @@ def get_time_prefix(config): config.logger.error('MUST SET LOOP_BY to VALID, INIT, RETRO, or REALTIME') return None + def _get_current_dt(time_string, time_format, clock_dt, logger): """! Use time format to get datetime object from time string, substituting values for today or now template tags if specified. @@ -204,3 +248,288 @@ def _get_current_dt(time_string, time_format, clock_dt, logger): return None return current_dt + + +def get_skip_times(config, wrapper_name=None): + """! Read SKIP_TIMES config variable and populate dictionary of times that should be skipped. + SKIP_TIMES should be in the format: "%m:begin_end_incr(3,11,1)", "%d:30,31", "%Y%m%d:20201031" + where each item inside quotes is a datetime format, colon, then a list of times in that format + to skip. + Args: + @param config configuration object to pull SKIP_TIMES + @param wrapper_name name of wrapper if supporting + skipping times only for certain wrappers, i.e. grid_stat + @returns dictionary containing times to skip + """ + skip_times_dict = {} + skip_times_string = None + + # if wrapper name is set, look for wrapper-specific _SKIP_TIMES variable + if wrapper_name: + skip_times_string = config.getstr('config', + f'{wrapper_name.upper()}_SKIP_TIMES', '') + + # if skip times string has not been found, check for generic SKIP_TIMES + if not skip_times_string: + skip_times_string = config.getstr('config', 'SKIP_TIMES', '') + + # if no generic SKIP_TIMES, return empty dictionary + if not skip_times_string: + return {} + + # get list of skip items, but don't expand begin_end_incr yet + skip_list = getlist(skip_times_string, expand_begin_end_incr=False) + + for skip_item in skip_list: + try: + time_format, skip_times = skip_item.split(':') + + # get list of skip times for the time format, expanding begin_end_incr + skip_times_list = getlist(skip_times) + + # if time format is already in skip times dictionary, extend list + if time_format in skip_times_dict: + skip_times_dict[time_format].extend(skip_times_list) + else: + skip_times_dict[time_format] = skip_times_list + + except ValueError: + config.logger.error(f"SKIP_TIMES item does not match format: {skip_item}") + return None + + return skip_times_dict + + +def skip_time(time_info, skip_times): + """!Used to check the valid time of the current run time against list of times to skip. + Args: + @param time_info dictionary with time information to check + @param skip_times dictionary of times to skip, i.e. {'%d': [31]} means skip 31st day + @returns True if run time should be skipped, False if not + """ + if not skip_times: + return False + + for time_format, skip_time_list in skip_times.items(): + # extract time information from valid time based on skip time format + run_time_value = time_info.get('valid') + if not run_time_value: + return False + + run_time_value = run_time_value.strftime(time_format) + + # loop over times to skip for this format and check if it matches + for skip_time in skip_time_list: + if int(run_time_value) == int(skip_time): + return True + + # if skip time never matches, return False + return False + + +def get_lead_sequence(config, input_dict=None, wildcard_if_empty=False): + """!Get forecast lead list from LEAD_SEQ or compute it from INIT_SEQ. + Restrict list by LEAD_SEQ_[MIN/MAX] if set. Now returns list of relativedelta objects + Args: + @param config METplusConfig object to query config variable values + @param input_dict time dictionary needed to handle using INIT_SEQ. Must contain + valid key if processing INIT_SEQ + @param wildcard_if_empty if no lead sequence was set, return a + list with '*' if this is True, otherwise return a list with 0 + @returns list of relativedelta objects or a list containing 0 if none are found + """ + + out_leads = [] + lead_min, lead_max, no_max = _get_lead_min_max(config) + + # check if LEAD_SEQ, INIT_SEQ, or LEAD_SEQ_ are set + # if more than one is set, report an error and exit + lead_seq = getlist(config.getstr('config', 'LEAD_SEQ', '')) + init_seq = getlistint(config.getstr('config', 'INIT_SEQ', '')) + lead_groups = get_lead_sequence_groups(config) + + if not _are_lead_configs_ok(lead_seq, + init_seq, + lead_groups, + config, + input_dict, + no_max): + return None + + if lead_seq: + # return lead sequence if wildcard characters are used + if lead_seq == ['*']: + return lead_seq + + out_leads = _handle_lead_seq(config, + lead_seq, + lead_min, + lead_max) + + # use INIT_SEQ to build lead list based on the valid time + elif init_seq: + out_leads = _handle_init_seq(init_seq, + input_dict, + lead_min, + lead_max) + elif lead_groups: + out_leads = _handle_lead_groups(lead_groups) + + if not out_leads: + if wildcard_if_empty: + return ['*'] + + return [0] + + return out_leads + +def _are_lead_configs_ok(lead_seq, init_seq, lead_groups, + config, input_dict, no_max): + if lead_groups is None: + return False + + error_message = ('are both listed in the configuration. ' + 'Only one may be used at a time.') + if lead_seq: + if init_seq: + config.logger.error(f'LEAD_SEQ and INIT_SEQ {error_message}') + return False + + if lead_groups: + config.logger.error(f'LEAD_SEQ and LEAD_SEQ_ {error_message}') + return False + + if init_seq and lead_groups: + config.logger.error(f'INIT_SEQ and LEAD_SEQ_ {error_message}') + return False + + if init_seq: + # if input dictionary not passed in, + # cannot compute lead sequence from it, so exit + if input_dict is None: + config.logger.error('Cannot run using INIT_SEQ for this wrapper') + return False + + # if looping by init, fail and exit + if 'valid' not in input_dict.keys(): + log_msg = ('INIT_SEQ specified while looping by init time.' + ' Use LEAD_SEQ or change to loop by valid time') + config.logger.error(log_msg) + return False + + # maximum lead must be specified to run with INIT_SEQ + if no_max: + config.logger.error('LEAD_SEQ_MAX must be set to use INIT_SEQ') + return False + + return True + +def _get_lead_min_max(config): + # remove any items that are outside of the range specified + # by LEAD_SEQ_MIN and LEAD_SEQ_MAX + # convert min and max to relativedelta objects, then use current time + # to compare them to each forecast lead + # this is an approximation because relative time offsets depend on + # each runtime + huge_max = '4000Y' + lead_min_str = config.getstr_nocheck('config', 'LEAD_SEQ_MIN', '0') + lead_max_str = config.getstr_nocheck('config', 'LEAD_SEQ_MAX', huge_max) + no_max = lead_max_str == huge_max + lead_min = get_relativedelta(lead_min_str, 'H') + lead_max = get_relativedelta(lead_max_str, 'H') + return lead_min, lead_max, no_max + +def _handle_lead_seq(config, lead_strings, lead_min=None, lead_max=None): + out_leads = [] + leads = [] + for lead in lead_strings: + relative_delta = get_relativedelta(lead, 'H') + if relative_delta is not None: + leads.append(relative_delta) + else: + config.logger.error(f'Invalid item {lead} in LEAD_SEQ. Exiting.') + return None + + if lead_min is None and lead_max is None: + return leads + + # add current time to leads to approximate month and year length + now_time = datetime.now() + lead_min_approx = now_time + lead_min + lead_max_approx = now_time + lead_max + for lead in leads: + lead_approx = now_time + lead + if lead_approx >= lead_min_approx and lead_approx <= lead_max_approx: + out_leads.append(lead) + + return out_leads + +def _handle_init_seq(init_seq, input_dict, lead_min, lead_max): + out_leads = [] + lead_min_hours = ti_get_hours_from_relativedelta(lead_min) + lead_max_hours = ti_get_hours_from_relativedelta(lead_max) + + valid_hr = int(input_dict['valid'].strftime('%H')) + for init in init_seq: + if valid_hr >= init: + current_lead = valid_hr - init + else: + current_lead = valid_hr + (24 - init) + + while current_lead <= lead_max_hours: + if current_lead >= lead_min_hours: + out_leads.append(get_relativedelta(current_lead, default_unit='H')) + current_lead += 24 + + out_leads = sorted(out_leads, key=lambda + rd: ti_get_seconds_from_relativedelta(rd, input_dict['valid'])) + return out_leads + +def _handle_lead_groups(lead_groups): + """! Read groups of forecast leads and create a list with all unique items + + @param lead_group dictionary where the values are lists of forecast + leads stored as relativedelta objects + @returns list of forecast leads stored as relativedelta objects + """ + out_leads = [] + for _, lead_seq in lead_groups.items(): + for lead in lead_seq: + if lead not in out_leads: + out_leads.append(lead) + + return out_leads + +def get_lead_sequence_groups(config): + # output will be a dictionary where the key will be the + # label specified and the value will be the list of forecast leads + lead_seq_dict = {} + # used in plotting + all_conf = config.keys('config') + indices = [] + regex = re.compile(r"LEAD_SEQ_(\d+)") + for conf in all_conf: + result = regex.match(conf) + if result is not None: + indices.append(result.group(1)) + + # loop over all possible variables and add them to list + for index in indices: + if config.has_option('config', f"LEAD_SEQ_{index}_LABEL"): + label = config.getstr('config', f"LEAD_SEQ_{index}_LABEL") + else: + log_msg = (f'Need to set LEAD_SEQ_{index}_LABEL to describe ' + f'LEAD_SEQ_{index}') + config.logger.error(log_msg) + return None + + # get forecast list for n + lead_string_list = getlist(config.getstr('config', f'LEAD_SEQ_{index}')) + lead_seq = _handle_lead_seq(config, + lead_string_list, + lead_min=None, + lead_max=None) + # add to output dictionary + lead_seq_dict[label] = lead_seq + + return lead_seq_dict diff --git a/metplus/util/time_util.py b/metplus/util/time_util.py index e1bd4b1f93..6e6c5cfc03 100755 --- a/metplus/util/time_util.py +++ b/metplus/util/time_util.py @@ -33,6 +33,18 @@ } +def shift_time_seconds(time_str, shift): + """ Adjust time by shift seconds. Format is %Y%m%d%H%M%S + Args: + @param time_str: Start time in %Y%m%d%H%M%S + @param shift: Amount to adjust time in seconds + Returns: + New time in format %Y%m%d%H%M%S + """ + return (datetime.datetime.strptime(time_str, "%Y%m%d%H%M%S") + + datetime.timedelta(seconds=shift)).strftime("%Y%m%d%H%M%S") + + def get_relativedelta(value, default_unit='S'): """!Converts time values ending in Y, m, d, H, M, or S to relativedelta object Args: @@ -483,3 +495,17 @@ def ti_calculate(input_dict_preserve): out_dict['lead_seconds'] = total_seconds return out_dict + + +def add_to_time_input(time_input, clock_time=None, instance=None, custom=None): + if clock_time: + clock_dt = datetime.datetime.strptime(clock_time, '%Y%m%d%H%M%S') + time_input['now'] = clock_dt + + # if instance is set, use that value, otherwise use empty string + time_input['instance'] = instance if instance else '' + + # if custom is specified, set it + # otherwise leave it unset so it can be set within the wrapper + if custom: + time_input['custom'] = custom diff --git a/metplus/wrappers/ascii2nc_wrapper.py b/metplus/wrappers/ascii2nc_wrapper.py index 641b336f9a..02a06fd65e 100755 --- a/metplus/wrappers/ascii2nc_wrapper.py +++ b/metplus/wrappers/ascii2nc_wrapper.py @@ -12,10 +12,9 @@ import os -from ..util import met_util as util from ..util import time_util from . import CommandBuilder -from ..util import do_string_sub +from ..util import do_string_sub, skip_time, get_lead_sequence '''!@namespace ASCII2NCWrapper @brief Wraps the ASCII2NC tool to reformat ascii format to NetCDF @@ -242,14 +241,14 @@ def run_at_time(self, input_dict): Args: @param input_dict dictionary containing timing information """ - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: self.clear() input_dict['lead'] = lead time_info = time_util.ti_calculate(input_dict) - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') continue diff --git a/metplus/wrappers/command_builder.py b/metplus/wrappers/command_builder.py index 31cf9da942..43e424cea7 100755 --- a/metplus/wrappers/command_builder.py +++ b/metplus/wrappers/command_builder.py @@ -19,19 +19,19 @@ from .command_runner import CommandRunner from ..util.constants import PYTHON_EMBEDDING_TYPES -from ..util import getlist -from ..util import met_util as util +from ..util import getlist, preprocess_file, loop_over_times_and_call from ..util import do_string_sub, ti_calculate, get_seconds_from_string -from ..util import get_time_from_file +from ..util import get_time_from_file, shift_time_seconds from ..util import config_metplus from ..util import METConfig from ..util import MISSING_DATA_VALUE from ..util import get_custom_string_list from ..util import get_wrapped_met_config_file, add_met_config_item, format_met_config -from ..util import remove_quotes +from ..util import remove_quotes, split_level from ..util import get_field_info, format_field_info -from ..util import get_wrapper_name +from ..util import get_wrapper_name, is_python_script from ..util.met_config import add_met_config_dict, handle_climo_dict +from ..util import mkdir_p, get_skip_times # pylint:disable=pointless-string-statement '''!@namespace CommandBuilder @@ -174,8 +174,7 @@ def create_c_dict(self): c_dict['CUSTOM_LOOP_LIST'] = get_custom_string_list(self.config, app_name) - c_dict['SKIP_TIMES'] = util.get_skip_times(self.config, - app_name) + c_dict['SKIP_TIMES'] = get_skip_times(self.config, app_name) c_dict['MANDATORY'] = ( self.config.getbool('config', @@ -533,7 +532,7 @@ def find_data(self, time_info, var_info=None, data_type='', mandatory=True, # separate character from beginning of numeric # level value if applicable - level = util.split_level(v_level)[1] + level = split_level(v_level)[1] # set level to 0 character if it is not a number if not level.isdigit(): @@ -660,10 +659,10 @@ def find_exact_file(self, level, data_type, time_info, mandatory=True, # check if file exists input_data_type = self.c_dict.get(data_type + 'INPUT_DATATYPE', '') - processed_path = util.preprocess_file(file_path, - input_data_type, - self.config, - allow_dir=allow_dir) + processed_path = preprocess_file(file_path, + input_data_type, + self.config, + allow_dir=allow_dir) # report error if file path could not be found if not processed_path: @@ -706,9 +705,9 @@ def find_file_in_window(self, level, data_type, time_info, mandatory=True, # get range of times that will be considered valid_range_lower = self.c_dict.get(data_type + 'FILE_WINDOW_BEGIN', 0) valid_range_upper = self.c_dict.get(data_type + 'FILE_WINDOW_END', 0) - lower_limit = int(datetime.strptime(util.shift_time_seconds(valid_time, valid_range_lower), + lower_limit = int(datetime.strptime(shift_time_seconds(valid_time, valid_range_lower), "%Y%m%d%H%M%S").strftime("%s")) - upper_limit = int(datetime.strptime(util.shift_time_seconds(valid_time, valid_range_upper), + upper_limit = int(datetime.strptime(shift_time_seconds(valid_time, valid_range_upper), "%Y%m%d%H%M%S").strftime("%s")) msg = f"Looking for {data_type}INPUT files under {data_dir} within range " +\ @@ -767,16 +766,16 @@ def find_file_in_window(self, level, data_type, time_info, mandatory=True, # check if file(s) needs to be preprocessed before returning the path # if one file was found and return_list if False, return single file if len(closest_files) == 1 and not return_list: - return util.preprocess_file(closest_files[0], - self.c_dict.get(data_type + 'INPUT_DATATYPE', ''), - self.config) + return preprocess_file(closest_files[0], + self.c_dict.get(data_type + 'INPUT_DATATYPE', ''), + self.config) # return list if multiple files are found out = [] for close_file in closest_files: - outfile = util.preprocess_file(close_file, - self.c_dict.get(data_type + 'INPUT_DATATYPE', ''), - self.config) + outfile = preprocess_file(close_file, + self.c_dict.get(data_type + 'INPUT_DATATYPE', ''), + self.config) out.append(outfile) return out @@ -909,7 +908,7 @@ def write_list_file(self, filename, file_list, output_dir=None): list_path = os.path.join(list_dir, filename) - util.mkdir_p(list_dir) + mkdir_p(list_dir) self.logger.debug("Writing list of filenames...") with open(list_path, 'w') as file_handle: @@ -1004,7 +1003,7 @@ def find_and_check_output_file(self, time_info=None, if (not os.path.exists(parent_dir) and not self.c_dict.get('DO_NOT_RUN_EXE', False)): self.logger.debug(f"Creating output directory: {parent_dir}") - util.mkdir_p(parent_dir) + mkdir_p(parent_dir) if not output_exists or not skip_if_output_exists: return True @@ -1107,7 +1106,7 @@ def check_for_python_embedding(self, input_type, var_info): # reset file type to empty string to handle if python embedding is used for one field but not for the next self.c_dict[f'{input_type}_FILE_TYPE'] = '' - if not util.is_python_script(var_info[f"{var_input_type}_name"]): + if not is_python_script(var_info[f"{var_input_type}_name"]): # if not a python script, return var name return var_info[f"{var_input_type}_name"] @@ -1218,7 +1217,7 @@ def get_command(self): self.log_error('Must specify path to output file') return None - util.mkdir_p(parent_dir) + mkdir_p(parent_dir) cmd += " " + out_path @@ -1284,7 +1283,7 @@ def run_all_times(self, custom=None): @param custom (optional) custom loop string value """ - return util.loop_over_times_and_call(self.config, self, custom=custom) + return loop_over_times_and_call(self.config, self, custom=custom) @staticmethod def format_met_config_dict(c_dict, name, keys=None): diff --git a/metplus/wrappers/cyclone_plotter_wrapper.py b/metplus/wrappers/cyclone_plotter_wrapper.py index a0e0c9db5a..e6ceda8be1 100644 --- a/metplus/wrappers/cyclone_plotter_wrapper.py +++ b/metplus/wrappers/cyclone_plotter_wrapper.py @@ -37,9 +37,9 @@ WRAPPER_CANNOT_RUN = True EXCEPTION_ERR = err_msg -from ..util import met_util as util from ..util import do_string_sub from ..util import time_generator, add_to_time_input +from ..util import mkdir_p, get_files from . import CommandBuilder @@ -194,8 +194,7 @@ def retrieve_data(self): self.logger.debug("Get data from all files in the directory " + self.input_data) # Get the list of all files (full file path) in this directory - all_input_files = util.get_files(self.input_data, ".*.tcst", - self.logger) + all_input_files = get_files(self.input_data, ".*.tcst", self.logger) # read each file into pandas then concatenate them together df_list = [pd.read_csv(file, delim_whitespace=True) for file in all_input_files] @@ -343,7 +342,7 @@ def retrieve_data(self): # which is used to generate the plot. if self.gen_ascii: self.logger.debug(f" output dir: {self.output_dir}") - util.mkdir_p(self.output_dir) + mkdir_p(self.output_dir) ascii_track_parts = [self.init_date, '.csv'] ascii_track_output_name = ''.join(ascii_track_parts) final_df_filename = os.path.join(self.output_dir, ascii_track_output_name) @@ -425,7 +424,7 @@ def create_plot(self): plt.text(60, -130, watermark, fontsize=5, alpha=0.25) # Make sure the output directory exists, and create it if it doesn't. - util.mkdir_p(self.output_dir) + mkdir_p(self.output_dir) # get the points for the scatter plots (and the relevant information for annotations, etc.) points_list = self.get_plot_points() diff --git a/metplus/wrappers/ensemble_stat_wrapper.py b/metplus/wrappers/ensemble_stat_wrapper.py index e31ff82679..aa392e9b58 100755 --- a/metplus/wrappers/ensemble_stat_wrapper.py +++ b/metplus/wrappers/ensemble_stat_wrapper.py @@ -13,10 +13,9 @@ import os import glob -from ..util import met_util as util +from ..util import sub_var_list +from ..util import do_string_sub, parse_var_list, PYTHON_EMBEDDING_TYPES from . import CompareGriddedWrapper -from ..util import do_string_sub -from ..util import parse_var_list """!@namespace EnsembleStatWrapper @brief Wraps the MET tool ensemble_stat to compare ensemble datasets @@ -136,8 +135,8 @@ def create_c_dict(self): # check if more than 1 obs datatype is set to python embedding, # only one can be used - if (c_dict['OBS_POINT_INPUT_DATATYPE'] in util.PYTHON_EMBEDDING_TYPES and - c_dict['OBS_GRID_INPUT_DATATYPE'] in util.PYTHON_EMBEDDING_TYPES): + if (c_dict['OBS_POINT_INPUT_DATATYPE'] in PYTHON_EMBEDDING_TYPES and + c_dict['OBS_GRID_INPUT_DATATYPE'] in PYTHON_EMBEDDING_TYPES): self.log_error("Both OBS_ENSEMBLE_STAT_INPUT_POINT_DATATYPE and " "OBS_ENSEMBLE_STAT_INPUT_GRID_DATATYPE" " are set to Python Embedding types. " @@ -145,9 +144,9 @@ def create_c_dict(self): # if either are set, set OBS_INPUT_DATATYPE to that value so # it can be found by the check_for_python_embedding function - elif c_dict['OBS_POINT_INPUT_DATATYPE'] in util.PYTHON_EMBEDDING_TYPES: + elif c_dict['OBS_POINT_INPUT_DATATYPE'] in PYTHON_EMBEDDING_TYPES: c_dict['OBS_INPUT_DATATYPE'] = c_dict['OBS_POINT_INPUT_DATATYPE'] - elif c_dict['OBS_GRID_INPUT_DATATYPE'] in util.PYTHON_EMBEDDING_TYPES: + elif c_dict['OBS_GRID_INPUT_DATATYPE'] in PYTHON_EMBEDDING_TYPES: c_dict['OBS_INPUT_DATATYPE'] = c_dict['OBS_GRID_INPUT_DATATYPE'] c_dict['N_MEMBERS'] = ( @@ -424,8 +423,7 @@ def run_at_time_all_fields(self, time_info): return # parse optional var list for FCST and/or OBS fields - var_list = util.sub_var_list(self.c_dict['VAR_LIST_TEMP'], - time_info) + var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) # if empty var list for FCST/OBS, use None as first var, # else use first var in list diff --git a/metplus/wrappers/extract_tiles_wrapper.py b/metplus/wrappers/extract_tiles_wrapper.py index b0e3f99824..ed11b38356 100755 --- a/metplus/wrappers/extract_tiles_wrapper.py +++ b/metplus/wrappers/extract_tiles_wrapper.py @@ -13,9 +13,9 @@ from datetime import datetime import re -from ..util import met_util as util -from ..util import do_string_sub, ti_calculate -from ..util import parse_var_list +from ..util import do_string_sub, ti_calculate, skip_time +from ..util import get_lead_sequence, sub_var_list +from ..util import parse_var_list, round_0p5, get_storms, prune_empty from .regrid_data_plane_wrapper import RegridDataPlaneWrapper from . import CommandBuilder @@ -206,7 +206,7 @@ def run_at_time(self, input_dict): """ # loop of forecast leads and process each - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: input_dict['lead'] = lead @@ -217,7 +217,7 @@ def run_at_time(self, input_dict): f"Processing forecast lead {time_info['lead_string']}" ) - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') continue @@ -247,7 +247,7 @@ def run_at_time_loop_string(self, time_info): # get unique storm ids or object cats from the input file # store list of lines from tcst/mtd file for each ID as the value - storm_dict = util.get_storms( + storm_dict = get_storms( input_path, sort_column=self.SORT_COLUMN[location_input] ) @@ -267,7 +267,7 @@ def run_at_time_loop_string(self, time_info): else: self.use_tc_stat_input(storm_dict, idx_dict) - util.prune_empty(self.c_dict['OUTPUT_DIR'], self.logger) + prune_empty(self.c_dict['OUTPUT_DIR'], self.logger) def use_tc_stat_input(self, storm_dict, idx_dict): """! Find storms in TCStat input file and create tiles using the storm. @@ -383,8 +383,7 @@ def get_object_indices(object_cats): def call_regrid_data_plane(self, time_info, track_data, input_type): # set var list from config using time info - var_list = util.sub_var_list(self.c_dict['VAR_LIST_TEMP'], - time_info) + var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) for data_type in ['FCST', 'OBS']: grid = self.get_grid(data_type, track_data[data_type], @@ -515,8 +514,8 @@ def get_grid_info(self, lat, lon, data_type): # float(lon) - lon_subtr adj_lon = float(lon) - self.c_dict['LON_ADJ'] adj_lat = float(lat) - self.c_dict['LAT_ADJ'] - lon0 = str(util.round_0p5(adj_lon)) - lat0 = str(util.round_0p5(adj_lat)) + lon0 = round_0p5(adj_lon) + lat0 = round_0p5(adj_lat) self.logger.debug(f'{data_type} ' f'lat: {lat} (track lat) => {lat0} (lat lower left), ' diff --git a/metplus/wrappers/gempak_to_cf_wrapper.py b/metplus/wrappers/gempak_to_cf_wrapper.py index 6f618427c9..53a5a5cb71 100755 --- a/metplus/wrappers/gempak_to_cf_wrapper.py +++ b/metplus/wrappers/gempak_to_cf_wrapper.py @@ -12,8 +12,7 @@ import os -from ..util import met_util as util -from ..util import do_string_sub +from ..util import do_string_sub, skip_time, get_lead_sequence from ..util import time_util from . import CommandBuilder @@ -75,7 +74,7 @@ def run_at_time(self, input_dict): Args: @param input_dict dictionary containing timing information """ - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: self.clear() input_dict['lead'] = lead @@ -87,7 +86,7 @@ def run_at_time(self, input_dict): time_info = time_util.ti_calculate(input_dict) - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') continue diff --git a/metplus/wrappers/grid_diag_wrapper.py b/metplus/wrappers/grid_diag_wrapper.py index d44026af25..eb1c5e98bc 100755 --- a/metplus/wrappers/grid_diag_wrapper.py +++ b/metplus/wrappers/grid_diag_wrapper.py @@ -12,11 +12,9 @@ import os -from ..util import met_util as util from ..util import time_util from . import RuntimeFreqWrapper -from ..util import do_string_sub -from ..util import parse_var_list +from ..util import do_string_sub, parse_var_list, sub_var_list '''!@namespace GridDiagWrapper @brief Wraps the Grid-Diag tool @@ -187,7 +185,7 @@ def set_data_field(self, time_info): @param time_info time dictionary to use for string substitution @returns True if field list could be built, False if not. """ - field_list = util.sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) + field_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) if not field_list: self.log_error("Could not get field information from config.") return False diff --git a/metplus/wrappers/grid_stat_wrapper.py b/metplus/wrappers/grid_stat_wrapper.py index ad7650c0f9..379e6a6282 100755 --- a/metplus/wrappers/grid_stat_wrapper.py +++ b/metplus/wrappers/grid_stat_wrapper.py @@ -12,7 +12,6 @@ import os -from ..util import met_util as util from . import CompareGriddedWrapper # pylint:disable=pointless-string-statement diff --git a/metplus/wrappers/met_db_load_wrapper.py b/metplus/wrappers/met_db_load_wrapper.py index 1ed678cb9f..421d440cee 100755 --- a/metplus/wrappers/met_db_load_wrapper.py +++ b/metplus/wrappers/met_db_load_wrapper.py @@ -13,10 +13,9 @@ import os from datetime import datetime -from ..util import met_util as util -from ..util import time_util +from ..util import ti_calculate from . import RuntimeFreqWrapper -from ..util import do_string_sub, getlist +from ..util import do_string_sub, getlist, generate_tmp_filename '''!@namespace METDbLoadWrapper @brief Parent class for wrappers that run over a grouping of times @@ -118,7 +117,7 @@ def run_at_time_once(self, time_info): if time_info.get('lead') != '*': if (time_info.get('init') != '*' or time_info.get('valid') != '*'): - time_info = time_util.ti_calculate(time_info) + time_info = ti_calculate(time_info) self.set_environment_variables(time_info) @@ -234,7 +233,7 @@ def replace_values_in_xml(self, time_info): output_lines.append(output_line) # write tmp file with XML content with substituted values - out_filename = util.generate_tmp_filename() + out_filename = generate_tmp_filename() out_path = os.path.join(self.config.getdir('TMP_DIR'), out_filename) with open(out_path, 'w') as file_handle: diff --git a/metplus/wrappers/mode_wrapper.py b/metplus/wrappers/mode_wrapper.py index bec9f67cf2..1a539ea021 100755 --- a/metplus/wrappers/mode_wrapper.py +++ b/metplus/wrappers/mode_wrapper.py @@ -12,7 +12,6 @@ import os -from ..util import met_util as util from . import CompareGriddedWrapper from ..util import do_string_sub diff --git a/metplus/wrappers/mtd_wrapper.py b/metplus/wrappers/mtd_wrapper.py index fdc9b97761..217427badc 100755 --- a/metplus/wrappers/mtd_wrapper.py +++ b/metplus/wrappers/mtd_wrapper.py @@ -12,9 +12,9 @@ import os -from ..util import met_util as util -from ..util import time_util -from ..util import do_string_sub +from ..util import get_lead_sequence, sub_var_list +from ..util import ti_calculate +from ..util import do_string_sub, skip_time from ..util import parse_var_list from . import CompareGriddedWrapper @@ -179,7 +179,7 @@ def run_at_time(self, input_dict): @param input_dict dictionary containing timing information """ - if util.skip_time(input_dict, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(input_dict, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') return @@ -197,8 +197,7 @@ def run_at_time_loop_string(self, input_dict): Args: @param input_dict dictionary containing timing information """ - var_list = util.sub_var_list(self.c_dict['VAR_LIST_TEMP'], - input_dict) + var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], input_dict) # if only processing a single data set (FCST or OBS) then only read # that var list and process @@ -219,7 +218,7 @@ def run_at_time_loop_string(self, input_dict): for var_info in var_list: if self.c_dict.get('EXPLICIT_FILE_LIST', False): - time_info = time_util.ti_calculate(input_dict) + time_info = ti_calculate(input_dict) model_list_path = do_string_sub(self.c_dict['FCST_FILE_LIST'], **time_info) self.logger.debug(f"Explicit FCST file: {model_list_path}") @@ -246,13 +245,13 @@ def run_at_time_loop_string(self, input_dict): obs_list = [] # find files for each forecast lead time - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) tasks = [] for lead in lead_seq: input_dict['lead'] = lead - time_info = time_util.ti_calculate(input_dict) + time_info = ti_calculate(input_dict) tasks.append(time_info) for current_task in tasks: @@ -282,7 +281,7 @@ def run_at_time_loop_string(self, input_dict): # write ascii file with list of files to process input_dict['lead'] = lead_seq[0] - time_info = time_util.ti_calculate(input_dict) + time_info = ti_calculate(input_dict) # if var name is a python embedding script, check type of python # input and name file list file accordingly @@ -313,7 +312,7 @@ def run_single_mode(self, input_dict, var_info): data_src = self.c_dict.get('SINGLE_DATA_SRC') if self.c_dict.get('EXPLICIT_FILE_LIST', False): - time_info = time_util.ti_calculate(input_dict) + time_info = ti_calculate(input_dict) single_list_path = do_string_sub( self.c_dict[f'{data_src}_FILE_LIST'], **time_info @@ -330,10 +329,10 @@ def run_single_mode(self, input_dict, var_info): else: find_method = self.find_model - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: input_dict['lead'] = lead - current_task = time_util.ti_calculate(input_dict) + current_task = ti_calculate(input_dict) single_file = find_method(current_task, var_info) if single_file is None: @@ -346,7 +345,7 @@ def run_single_mode(self, input_dict, var_info): # write ascii file with list of files to process input_dict['lead'] = lead_seq[0] - time_info = time_util.ti_calculate(input_dict) + time_info = ti_calculate(input_dict) file_ext = self.check_for_python_embedding(data_src, var_info) if not file_ext: return diff --git a/metplus/wrappers/pb2nc_wrapper.py b/metplus/wrappers/pb2nc_wrapper.py index 82519417c4..fff7783f79 100755 --- a/metplus/wrappers/pb2nc_wrapper.py +++ b/metplus/wrappers/pb2nc_wrapper.py @@ -13,9 +13,8 @@ import os import re -from ..util import getlistint -from ..util import met_util as util -from ..util import time_util +from ..util import getlistint, skip_time, get_lead_sequence +from ..util import ti_calculate from ..util import do_string_sub from . import CommandBuilder @@ -258,11 +257,11 @@ def set_valid_window_variables(self, time_info): def run_at_time(self, input_dict): """! Loop over each forecast lead and build pb2nc command """ # loop of forecast leads and process each - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: input_dict['lead'] = lead - lead_string = time_util.ti_calculate(input_dict)['lead_string'] + lead_string = ti_calculate(input_dict)['lead_string'] self.logger.info("Processing forecast lead {}".format(lead_string)) for custom_string in self.c_dict['CUSTOM_LOOP_LIST']: @@ -287,7 +286,7 @@ def run_at_time_once(self, input_dict): if time_info is None: return - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') return diff --git a/metplus/wrappers/pcp_combine_wrapper.py b/metplus/wrappers/pcp_combine_wrapper.py index 8f476647b0..f87b07fadf 100755 --- a/metplus/wrappers/pcp_combine_wrapper.py +++ b/metplus/wrappers/pcp_combine_wrapper.py @@ -7,12 +7,11 @@ import os from datetime import timedelta -from ..util import met_util as util -from ..util import do_string_sub, getlist +from ..util import do_string_sub, getlist, preprocess_file from ..util import get_seconds_from_string, ti_get_lead_string, ti_calculate from ..util import get_relativedelta, ti_get_seconds_from_relativedelta from ..util import time_string_to_met_time, seconds_to_met_time -from ..util import parse_var_list +from ..util import parse_var_list, template_to_regex, split_level from . import ReformatGriddedWrapper '''!@namespace PCPCombineWrapper @@ -348,9 +347,9 @@ def setup_subtract_method(self, time_info, accum, data_src): # get first file filepath1 = do_string_sub(full_template, **time_info) - file1 = util.preprocess_file(filepath1, - self.c_dict[data_src+'_INPUT_DATATYPE'], - self.config) + file1 = preprocess_file(filepath1, + self.c_dict[data_src+'_INPUT_DATATYPE'], + self.config) if file1 is None: self.log_error(f'Could not find {data_src} file {filepath1} ' @@ -394,9 +393,9 @@ def setup_subtract_method(self, time_info, accum, data_src): time_info2['custom'] = time_info.get('custom', '') filepath2 = do_string_sub(full_template, **time_info2) - file2 = util.preprocess_file(filepath2, - self.c_dict[data_src+'_INPUT_DATATYPE'], - self.config) + file2 = preprocess_file(filepath2, + self.c_dict[data_src+'_INPUT_DATATYPE'], + self.config) if file2 is None: self.log_error(f'Could not find {data_src} file {filepath2} ' @@ -445,10 +444,10 @@ def setup_sum_method(self, time_info, lookback, data_src): out_accum = time_string_to_met_time(lookback, 'S') time_info['level'] = in_accum - pcp_regex = util.template_to_regex( - self.c_dict[f'{data_src}_INPUT_TEMPLATE'], - time_info + pcp_regex = template_to_regex( + self.c_dict[f'{data_src}_INPUT_TEMPLATE'] ) + pcp_regex = do_string_sub(pcp_regex, **time_info) pcp_regex_split = pcp_regex.split('/') pcp_dir = os.path.join(self.c_dict[f'{data_src}_INPUT_DIR'], *pcp_regex_split[0:-1]) @@ -611,7 +610,7 @@ def _get_lookback_seconds(self, time_info, var_info, data_src): else: lookback = '0' - _, lookback = util.split_level(lookback) + _, lookback = split_level(lookback) lookback_seconds = get_seconds_from_string( lookback, @@ -791,7 +790,7 @@ def get_lowest_fcst_file(self, valid_time, data_src): search_file = do_string_sub(search_file, **time_info) self.logger.debug(f"Looking for {search_file}") - search_file = util.preprocess_file( + search_file = preprocess_file( search_file, self.c_dict[data_src+'_INPUT_DATATYPE'], self.config) @@ -817,8 +816,7 @@ def get_field_string(self, time_info=None, search_accum=0, name=None, # string sub values into full field info string using search time info if time_info: - field_info = do_string_sub(field_info, - **time_info) + field_info = do_string_sub(field_info, **time_info) return field_info def find_input_file(self, init_time, valid_time, search_accum, data_src): @@ -848,9 +846,9 @@ def find_input_file(self, init_time, valid_time, search_accum, data_src): in_template) input_path = do_string_sub(input_path, **time_info) - return util.preprocess_file(input_path, - self.c_dict[f'{data_src}_INPUT_DATATYPE'], - self.config), lead + return preprocess_file(input_path, + self.c_dict[f'{data_src}_INPUT_DATATYPE'], + self.config), lead def get_template_accum(self, accum_dict, search_time, lead, data_src): # apply string substitution to accum amount diff --git a/metplus/wrappers/plot_data_plane_wrapper.py b/metplus/wrappers/plot_data_plane_wrapper.py index 71d1b61eda..4f874c42f1 100755 --- a/metplus/wrappers/plot_data_plane_wrapper.py +++ b/metplus/wrappers/plot_data_plane_wrapper.py @@ -12,10 +12,9 @@ import os -from ..util import met_util as util from ..util import time_util from . import CommandBuilder -from ..util import do_string_sub, remove_quotes +from ..util import do_string_sub, remove_quotes, skip_time, get_lead_sequence '''!@namespace PlotDataPlaneWrapper @brief Wraps the PlotDataPlane tool to plot data @@ -115,14 +114,14 @@ def run_at_time(self, input_dict): Args: @param input_dict dictionary containing timing information """ - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: self.clear() input_dict['lead'] = lead time_info = time_util.ti_calculate(input_dict) - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') continue diff --git a/metplus/wrappers/point_stat_wrapper.py b/metplus/wrappers/point_stat_wrapper.py index 9f5a1645c2..3115b0c80e 100755 --- a/metplus/wrappers/point_stat_wrapper.py +++ b/metplus/wrappers/point_stat_wrapper.py @@ -13,7 +13,6 @@ import os from ..util import getlistint -from ..util import met_util as util from ..util import time_util from ..util import do_string_sub from . import CompareGriddedWrapper diff --git a/metplus/wrappers/py_embed_ingest_wrapper.py b/metplus/wrappers/py_embed_ingest_wrapper.py index c59847deda..accd8ad1fa 100755 --- a/metplus/wrappers/py_embed_ingest_wrapper.py +++ b/metplus/wrappers/py_embed_ingest_wrapper.py @@ -13,11 +13,10 @@ import os import re -from ..util import met_util as util from ..util import time_util from . import CommandBuilder from . import RegridDataPlaneWrapper -from ..util import do_string_sub +from ..util import do_string_sub, get_lead_sequence VALID_PYTHON_EMBED_TYPES = ['NUMPY', 'XARRAY', 'PANDAS'] @@ -132,7 +131,7 @@ def run_at_time(self, input_dict): generally contains 'now' (current) time and 'init' or 'valid' time """ # get forecast leads to loop over - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) for lead in lead_seq: # set forecast lead time in hours diff --git a/metplus/wrappers/reformat_gridded_wrapper.py b/metplus/wrappers/reformat_gridded_wrapper.py index da38e4f1de..9acb458595 100755 --- a/metplus/wrappers/reformat_gridded_wrapper.py +++ b/metplus/wrappers/reformat_gridded_wrapper.py @@ -12,8 +12,8 @@ import os -from ..util import met_util as util -from ..util import time_util +from ..util import get_lead_sequence, sub_var_list +from ..util import time_util, skip_time from . import CommandBuilder # pylint:disable=pointless-string-statement @@ -52,7 +52,7 @@ def run_at_time(self, input_dict): """ app_name_caps = self.app_name.upper() class_name = self.__class__.__name__[0: -7] - lead_seq = util.get_lead_sequence(self.config, input_dict) + lead_seq = get_lead_sequence(self.config, input_dict) run_list = [] if self.config.getbool('config', 'FCST_'+app_name_caps+'_RUN', False): @@ -78,7 +78,7 @@ def run_at_time(self, input_dict): self.logger.info("Processing forecast lead " f"{time_info['lead_string']}") - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES')): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES')): self.logger.debug('Skipping run time') continue @@ -93,8 +93,8 @@ def run_at_time(self, input_dict): self.c_dict['CUSTOM_STRING'] = custom_string var_list_name = f'VAR_LIST_{to_run}' var_list = ( - util.sub_var_list(self.c_dict.get(var_list_name, ''), - time_info) + sub_var_list(self.c_dict.get(var_list_name, ''), + time_info) ) if not var_list: var_list = None diff --git a/metplus/wrappers/regrid_data_plane_wrapper.py b/metplus/wrappers/regrid_data_plane_wrapper.py index 1cf8de3142..c58ebfc7c2 100755 --- a/metplus/wrappers/regrid_data_plane_wrapper.py +++ b/metplus/wrappers/regrid_data_plane_wrapper.py @@ -12,12 +12,11 @@ import os -from ..util import met_util as util from ..util import time_util from ..util import do_string_sub from ..util import parse_var_list from ..util import get_process_list -from ..util import remove_quotes +from ..util import remove_quotes, split_level, format_level from . import ReformatGriddedWrapper # pylint:disable=pointless-string-statement @@ -173,7 +172,7 @@ def handle_output_file(self, time_info, field_info, data_type): @returns True if command should be run, False if it should not be run """ - _, level = util.split_level(field_info[f'{data_type.lower()}_level']) + _, level = split_level(field_info[f'{data_type.lower()}_level']) time_info['level'] = time_util.get_seconds_from_string(level, 'H') return self.find_and_check_output_file(time_info) @@ -255,7 +254,7 @@ def get_output_names(self, var_list, data_type): for field_info in var_list: input_name = field_info[f'{data_type.lower()}_name'] input_level = field_info[f'{data_type.lower()}_level'] - input_level = util.format_level(input_level) + input_level = format_level(input_level) output_name = f"{input_name}_{input_level}" output_names.append(output_name) diff --git a/metplus/wrappers/tc_gen_wrapper.py b/metplus/wrappers/tc_gen_wrapper.py index ab8873f96a..bec1e1a567 100755 --- a/metplus/wrappers/tc_gen_wrapper.py +++ b/metplus/wrappers/tc_gen_wrapper.py @@ -14,9 +14,8 @@ import datetime import re -from ..util import met_util as util from ..util import time_util -from ..util import do_string_sub +from ..util import do_string_sub, skip_time, get_lead_sequence from ..util import time_generator from . import CommandBuilder @@ -355,7 +354,7 @@ def run_at_time(self, input_dict): input_dict['custom'] = custom_string time_info = time_util.ti_calculate(input_dict) - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') continue @@ -426,7 +425,7 @@ def find_input_files(self, time_info): ) # set METPLUS_LEAD_LIST to list of forecast leads used - lead_seq = util.get_lead_sequence(self.config, time_info) + lead_seq = get_lead_sequence(self.config, time_info) if lead_seq != [0]: lead_list = [] for lead in lead_seq: diff --git a/metplus/wrappers/tcrmw_wrapper.py b/metplus/wrappers/tcrmw_wrapper.py index 5881f9eb32..12a4cad6a2 100755 --- a/metplus/wrappers/tcrmw_wrapper.py +++ b/metplus/wrappers/tcrmw_wrapper.py @@ -12,11 +12,10 @@ import os -from ..util import met_util as util from ..util import time_util from . import CommandBuilder -from ..util import do_string_sub -from ..util import parse_var_list +from ..util import do_string_sub, skip_time, get_lead_sequence +from ..util import parse_var_list, sub_var_list '''!@namespace TCRMWWrapper @brief Wraps the TC-RMW tool @@ -212,7 +211,7 @@ def run_at_time(self, input_dict): time_info = time_util.ti_calculate(input_dict) - if util.skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): + if skip_time(time_info, self.c_dict.get('SKIP_TIMES', {})): self.logger.debug('Skipping run time') continue @@ -258,8 +257,7 @@ def set_data_field(self, time_info): @param time_info time dictionary to use for string substitution @returns True if field list could be built, False if not. """ - field_list = util.sub_var_list(self.c_dict['VAR_LIST_TEMP'], - time_info) + field_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) if not field_list: self.log_error("Could not get field information from config.") return False @@ -293,7 +291,7 @@ def find_input_files(self, time_info): self.c_dict['DECK_FILE'] = deck_file - lead_seq = util.get_lead_sequence(self.config, time_info) + lead_seq = get_lead_sequence(self.config, time_info) # get input files if self.c_dict['INPUT_FILE_LIST']: diff --git a/metplus/wrappers/usage_wrapper.py b/metplus/wrappers/usage_wrapper.py index d3fb8cf852..77c26b5758 100644 --- a/metplus/wrappers/usage_wrapper.py +++ b/metplus/wrappers/usage_wrapper.py @@ -13,7 +13,7 @@ class UsageWrapper(CommandBuilder): def __init__(self, config, instance=None): self.app_name = 'Usage' super().__init__(config, instance=instance) - # get unique list of processes from met_util + # get unique list of processes self.available_processes = list(set(val for val in LOWER_TO_WRAPPER_NAME.values())) self.available_processes.sort() diff --git a/metplus/wrappers/user_script_wrapper.py b/metplus/wrappers/user_script_wrapper.py index 50384c0190..32e50ac385 100755 --- a/metplus/wrappers/user_script_wrapper.py +++ b/metplus/wrappers/user_script_wrapper.py @@ -13,7 +13,6 @@ import os from datetime import datetime -from ..util import met_util as util from ..util import time_util from . import RuntimeFreqWrapper from ..util import do_string_sub diff --git a/ush/run_metplus.py b/ush/run_metplus.py index ecb0686eda..39149b7e42 100755 --- a/ush/run_metplus.py +++ b/ush/run_metplus.py @@ -27,13 +27,13 @@ from metplus.util import metplus_check from metplus.util import pre_run_setup, run_metplus, post_run_cleanup -from metplus.util import get_process_list from metplus import __version__ as metplus_version '''!@namespace run_metplus Main script the processes all the tasks in the PROCESS_LIST ''' + def main(): """!Main program. METplus script that invokes the necessary Python scripts @@ -49,13 +49,11 @@ def main(): "This script name will be removed in a future version.") config.logger.warning(msg) - # Use config object to get the list of processes to call - process_list = get_process_list(config) - - total_errors = run_metplus(config, process_list) + total_errors = run_metplus(config) post_run_cleanup(config, 'METplus', total_errors) + def usage(): """! How to call this script. """ @@ -73,6 +71,7 @@ def usage(): '''%(filename)) sys.exit(2) + def get_config_inputs_from_command_line(): """! Read command line arguments. Pull out configuration files and configuration variable overrides. Display @@ -121,6 +120,7 @@ def get_config_inputs_from_command_line(): return config_inputs + if __name__ == "__main__": try: produtil.setup.setup(send_dbn=False, jobname='run-METplus')