diff --git a/METdbLoad/conftest.py b/METdbLoad/conftest.py index 5fe7dfe..8e41f43 100644 --- a/METdbLoad/conftest.py +++ b/METdbLoad/conftest.py @@ -3,7 +3,6 @@ import os import pymysql from pathlib import Path -from unittest.mock import patch from METdataio.METdbLoad.ush.run_sql import RunSql from METdataio.METdbLoad.test.utils import ( diff --git a/METdbLoad/test/data/rhist/ensemble_stat_RRFS_GEFS_GF.SPP.SPPT_RETOP_MRMS_20220507_120000V.stat b/METdbLoad/test/data/rhist/ensemble_stat_RRFS_GEFS_GF.SPP.SPPT_RETOP_MRMS_20220507_120000V.stat new file mode 100644 index 0000000..fe6ccd4 --- /dev/null +++ b/METdbLoad/test/data/rhist/ensemble_stat_RRFS_GEFS_GF.SPP.SPPT_RETOP_MRMS_20220507_120000V.stat @@ -0,0 +1,9 @@ +VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA ECNT 1888451 10 6.1671 NA 4.10602 -10.99955 12.20766 4.80348 NA NA NA NA NA 11.5788 NA NA +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RHIST 1888451 11 4212 9438 10585 10737 12398 16378 22545 34502 58414 131190 1578052 +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA PHIST 321894 0.05 20 4483 5225 3333 3377 5000 4844 5907 6645 9166 12171 15018 18114 22435 23127 26574 27849 29769 35125 34203 29529 +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RELP 1888451 10 186572.76786 189042.8504 185906.64366 190353.03136 186699.12262 193114.35913 185968.12183 188071.31945 191924.05755 190798.7262 +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA ECNT 942667 10 5.70001 NA 4.00375 -11.88084 12.68579 4.93033 NA NA NA NA NA 12.38987 NA NA +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RHIST 942667 11 2413 5389 5596 6592 8077 10938 15850 24123 35918 63123 764648 +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA PHIST 183510 0.05 20 2370 2243 1986 1214 2293 2296 2623 3138 3989 5920 7913 10604 12837 14690 16127 16845 18292 22671 20813 14646 +V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RELP 942667 10 93297.89802 95576.19722 92626.90794 93687.40913 93422.46865 96683.925 92821.59802 94516.88452 96700.49008 93333.22143 diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py index 1d31619..2b45e26 100644 --- a/METdbLoad/test/test_met_db_load.py +++ b/METdbLoad/test/test_met_db_load.py @@ -14,6 +14,7 @@ MODE_DATA_DIR, TCDIAG_DATA_DIR, VSDB_DATA_DIR, + RHIST_DATA_DIR, ) @@ -103,6 +104,19 @@ def assert_count_rows(cur, table, expected_count): "line_data_sl1l2": 5, }, ), + ( + RHIST_DATA_DIR, + "ensemble_stat", + { + "line_data_rhist": 2, + "line_data_rhist_rank":22, + "line_data_phist": 2, + "line_data_phist_bin": 40, + "line_data_ecnt": 2, + "line_data_relp": 2, + "line_data_relp_ens": 20, + }, + ), ], ) def test_met_db_table_counts( @@ -133,7 +147,7 @@ def test_met_db_indexes( testRunSql, tmp_path, ): - # set up to only apply indexes + # set up to "apply_indexes" test_args = dict_to_args( { "xmlfile": str( @@ -168,3 +182,68 @@ def test_met_db_indexes( with pytest.raises(SystemExit): with patch.object(RunSql, "apply_indexes", side_effect=KeyError): load_main(test_args) + +@pytest.mark.parametrize( + "met_data_dir, met_tool, expected_counts, local_infile", + [ + ( + POINT_STAT_DATA_DIR, + "point_stat", + { + "line_data_vcnt": 1, + "line_data_fho": 24, + "line_data_cts": 24, + "line_data_ctc": 24, + "line_data_cnt": 10, + "line_data_vl1l2": 1, + }, + 'false', + ), + ( + POINT_STAT_DATA_DIR, + "point_stat", + { + "line_data_vcnt": 1, + "line_data_fho": 24, + "line_data_cts": 24, + "line_data_ctc": 24, + "line_data_cnt": 10, + "line_data_vl1l2": 1, + }, + 'true', + ), + ( + MTD_DATA_DIR, + "mtd", + { + "mtd_2d_obj": 278, + "mtd_3d_obj_single": 8, + }, + 'false', + ), + ( + MTD_DATA_DIR, + "mtd", + { + "mtd_2d_obj": 278, + "mtd_3d_obj_single": 8, + }, + 'true', + ), + ], +) +def test_local_in_file(emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, expected_counts, local_infile): + """check we get the same result when local_file is on or off""" + + test_args = dict_to_args( + { + "xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool, local_infile=local_infile)), + "index": "false", + "tmpdir": [str(tmp_path)], + } + ) + + load_main(test_args) + + for table, expected_count in expected_counts.items(): + assert_count_rows(testRunSql.cur, table, expected_count) diff --git a/METdbLoad/test/utils.py b/METdbLoad/test/utils.py index 57ccb50..d587021 100644 --- a/METdbLoad/test/utils.py +++ b/METdbLoad/test/utils.py @@ -14,6 +14,7 @@ def abs_path(rel_path): POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat") TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr") + # This data is copied from MET test data # https://hub.docker.com/r/dtcenter/met-data-output MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/") @@ -21,6 +22,7 @@ def abs_path(rel_path): # Very small data sample for testing VSDB_DATA_DIR = abs_path("METdbLoad/test/data/vsdb/") +RHIST_DATA_DIR = abs_path("METdbLoad/test/data/rhist") DEFAULT_LOAD_FLAGS = { "stat_header_db_check": "true", @@ -42,7 +44,7 @@ def _dict_to_xml(flags_dict): return "\n ".join(flags) -def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS): +def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS, local_infile=True): """Return the xml load specification with substitute values. Args: @@ -65,6 +67,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS mv_test root root_password + {local_infile} {met_data_dir} @@ -81,7 +84,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS """ -def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}): +def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}, local_infile=True): """Write test_load_specification.xml and return path Args: @@ -95,7 +98,7 @@ def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}): """ xml_path = tmp_path / "test_load_specification.xml" with open(xml_path, "w") as text_file: - text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags)) + text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags, local_infile)) return xml_path diff --git a/METdbLoad/ush/read_load_xml.py b/METdbLoad/ush/read_load_xml.py index 2c832a5..a14eab2 100644 --- a/METdbLoad/ush/read_load_xml.py +++ b/METdbLoad/ush/read_load_xml.py @@ -257,6 +257,10 @@ def read_db_connect(self, root): self.connection['db_management_system'] = \ root.xpath('connection')[0].xpath('management_system')[0].text + if root.xpath('connection')[0].xpath('local_infile'): + self.connection['db_local_infile'] = \ + root.xpath('connection')[0].xpath('local_infile')[0].text + except (RuntimeError, TypeError, NameError, KeyError): self.logger.error("*** %s in read_xml read_db_connect ***", sys.exc_info()[0]) sys.exit("*** Error(s) found while reading XML file connection tag!") diff --git a/METdbLoad/ush/run_sql.py b/METdbLoad/ush/run_sql.py index 3c01bec..41f7b35 100644 --- a/METdbLoad/ush/run_sql.py +++ b/METdbLoad/ush/run_sql.py @@ -43,6 +43,12 @@ def sql_on(self, connection): N/A """ + if 'db_local_infile' in connection.keys() and connection['db_local_infile'].lower() == 'false': + local_infile = False + else: + # Default behaviour + local_infile = True + try: if (not 'db_host' in connection) or (not 'db_user' in connection): logging.error("XML Load file does not have enough connection tags") @@ -54,7 +60,7 @@ def sql_on(self, connection): user=connection['db_user'], passwd=connection['db_password'], db=connection['db_database'], - local_infile=True) + local_infile=local_infile) except pymysql.OperationalError as pop_err: logging.error("*** %s in run_sql ***", str(pop_err)) @@ -71,8 +77,15 @@ def sql_on(self, connection): # look at database to see whether we can use the local infile method self.cur.execute("SHOW GLOBAL VARIABLES LIKE 'local_infile';") result = self.cur.fetchall() - self.local_infile = result[0][1] - logging.debug("local_infile is %s", result[0][1]) + db_infile = result[0][1] + + # Check that both the connection and the database support local_infile + if db_infile == 'ON' and self.conn._local_infile: + self.local_infile = 'ON' + else: + self.local_infile = 'OFF' + logging.debug("local_infile is %s", self.local_infile) + @staticmethod def sql_off(conn, cur): @@ -155,9 +168,9 @@ def write_to_sql(raw_data, col_list, sql_table, sql_query, tmp_dir, sql_cur, loc raw_data['obs_valid_beg'] = raw_data['obs_valid_beg'].astype(str) raw_data['obs_valid_end'] = raw_data['obs_valid_end'].astype(str) elif sql_table in (CN.MODE_HEADER, CN.MTD_HEADER): - raw_data['fcst_valid'] = raw_data['fcst_valid_beg'].astype(str) - raw_data['fcst_init'] = raw_data['fcst_valid_end'].astype(str) - raw_data['obs_valid'] = raw_data['fcst_init_beg'].astype(str) + raw_data['fcst_valid'] = raw_data['fcst_valid'].astype(str) + raw_data['fcst_init'] = raw_data['fcst_valid'].astype(str) + raw_data['obs_valid'] = raw_data['fcst_init'].astype(str) # make a copy of the dataframe that is a list of lists and write to database dfile = raw_data[col_list].values.tolist() sql_cur.executemany(sql_query, dfile)