diff --git a/METdbLoad/conftest.py b/METdbLoad/conftest.py
index 5fe7dfe..8e41f43 100644
--- a/METdbLoad/conftest.py
+++ b/METdbLoad/conftest.py
@@ -3,7 +3,6 @@
import os
import pymysql
from pathlib import Path
-from unittest.mock import patch
from METdataio.METdbLoad.ush.run_sql import RunSql
from METdataio.METdbLoad.test.utils import (
diff --git a/METdbLoad/test/data/rhist/ensemble_stat_RRFS_GEFS_GF.SPP.SPPT_RETOP_MRMS_20220507_120000V.stat b/METdbLoad/test/data/rhist/ensemble_stat_RRFS_GEFS_GF.SPP.SPPT_RETOP_MRMS_20220507_120000V.stat
new file mode 100644
index 0000000..fe6ccd4
--- /dev/null
+++ b/METdbLoad/test/data/rhist/ensemble_stat_RRFS_GEFS_GF.SPP.SPPT_RETOP_MRMS_20220507_120000V.stat
@@ -0,0 +1,9 @@
+VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA ECNT 1888451 10 6.1671 NA 4.10602 -10.99955 12.20766 4.80348 NA NA NA NA NA 11.5788 NA NA
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RHIST 1888451 11 4212 9438 10585 10737 12398 16378 22545 34502 58414 131190 1578052
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA PHIST 321894 0.05 20 4483 5225 3333 3377 5000 4844 5907 6645 9166 12171 15018 18114 22435 23127 26574 27849 29769 35125 34203 29529
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RELP 1888451 10 186572.76786 189042.8504 185906.64366 190353.03136 186699.12262 193114.35913 185968.12183 188071.31945 191924.05755 190798.7262
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA ECNT 942667 10 5.70001 NA 4.00375 -11.88084 12.68579 4.93033 NA NA NA NA NA 12.38987 NA NA
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RHIST 942667 11 2413 5389 5596 6592 8077 10938 15850 24123 35918 63123 764648
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA PHIST 183510 0.05 20 2370 2243 1986 1214 2293 2296 2623 3138 3989 5920 7913 10604 12837 14690 16127 16845 18292 22671 20813 14646
+V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RELP 942667 10 93297.89802 95576.19722 92626.90794 93687.40913 93422.46865 96683.925 92821.59802 94516.88452 96700.49008 93333.22143
diff --git a/METdbLoad/test/test_met_db_load.py b/METdbLoad/test/test_met_db_load.py
index 1d31619..2b45e26 100644
--- a/METdbLoad/test/test_met_db_load.py
+++ b/METdbLoad/test/test_met_db_load.py
@@ -14,6 +14,7 @@
MODE_DATA_DIR,
TCDIAG_DATA_DIR,
VSDB_DATA_DIR,
+ RHIST_DATA_DIR,
)
@@ -103,6 +104,19 @@ def assert_count_rows(cur, table, expected_count):
"line_data_sl1l2": 5,
},
),
+ (
+ RHIST_DATA_DIR,
+ "ensemble_stat",
+ {
+ "line_data_rhist": 2,
+ "line_data_rhist_rank":22,
+ "line_data_phist": 2,
+ "line_data_phist_bin": 40,
+ "line_data_ecnt": 2,
+ "line_data_relp": 2,
+ "line_data_relp_ens": 20,
+ },
+ ),
],
)
def test_met_db_table_counts(
@@ -133,7 +147,7 @@ def test_met_db_indexes(
testRunSql,
tmp_path,
):
- # set up to only apply indexes
+ # set up to "apply_indexes"
test_args = dict_to_args(
{
"xmlfile": str(
@@ -168,3 +182,68 @@ def test_met_db_indexes(
with pytest.raises(SystemExit):
with patch.object(RunSql, "apply_indexes", side_effect=KeyError):
load_main(test_args)
+
+@pytest.mark.parametrize(
+ "met_data_dir, met_tool, expected_counts, local_infile",
+ [
+ (
+ POINT_STAT_DATA_DIR,
+ "point_stat",
+ {
+ "line_data_vcnt": 1,
+ "line_data_fho": 24,
+ "line_data_cts": 24,
+ "line_data_ctc": 24,
+ "line_data_cnt": 10,
+ "line_data_vl1l2": 1,
+ },
+ 'false',
+ ),
+ (
+ POINT_STAT_DATA_DIR,
+ "point_stat",
+ {
+ "line_data_vcnt": 1,
+ "line_data_fho": 24,
+ "line_data_cts": 24,
+ "line_data_ctc": 24,
+ "line_data_cnt": 10,
+ "line_data_vl1l2": 1,
+ },
+ 'true',
+ ),
+ (
+ MTD_DATA_DIR,
+ "mtd",
+ {
+ "mtd_2d_obj": 278,
+ "mtd_3d_obj_single": 8,
+ },
+ 'false',
+ ),
+ (
+ MTD_DATA_DIR,
+ "mtd",
+ {
+ "mtd_2d_obj": 278,
+ "mtd_3d_obj_single": 8,
+ },
+ 'true',
+ ),
+ ],
+)
+def test_local_in_file(emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, expected_counts, local_infile):
+ """check we get the same result when local_file is on or off"""
+
+ test_args = dict_to_args(
+ {
+ "xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool, local_infile=local_infile)),
+ "index": "false",
+ "tmpdir": [str(tmp_path)],
+ }
+ )
+
+ load_main(test_args)
+
+ for table, expected_count in expected_counts.items():
+ assert_count_rows(testRunSql.cur, table, expected_count)
diff --git a/METdbLoad/test/utils.py b/METdbLoad/test/utils.py
index 57ccb50..d587021 100644
--- a/METdbLoad/test/utils.py
+++ b/METdbLoad/test/utils.py
@@ -14,6 +14,7 @@ def abs_path(rel_path):
POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat")
TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr")
+
# This data is copied from MET test data
# https://hub.docker.com/r/dtcenter/met-data-output
MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/")
@@ -21,6 +22,7 @@ def abs_path(rel_path):
# Very small data sample for testing
VSDB_DATA_DIR = abs_path("METdbLoad/test/data/vsdb/")
+RHIST_DATA_DIR = abs_path("METdbLoad/test/data/rhist")
DEFAULT_LOAD_FLAGS = {
"stat_header_db_check": "true",
@@ -42,7 +44,7 @@ def _dict_to_xml(flags_dict):
return "\n ".join(flags)
-def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS):
+def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS, local_infile=True):
"""Return the xml load specification with substitute values.
Args:
@@ -65,6 +67,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS
mv_test
root
root_password
+ {local_infile}
{met_data_dir}
@@ -81,7 +84,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS
"""
-def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
+def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}, local_infile=True):
"""Write test_load_specification.xml and return path
Args:
@@ -95,7 +98,7 @@ def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
"""
xml_path = tmp_path / "test_load_specification.xml"
with open(xml_path, "w") as text_file:
- text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags))
+ text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags, local_infile))
return xml_path
diff --git a/METdbLoad/ush/read_load_xml.py b/METdbLoad/ush/read_load_xml.py
index 2c832a5..a14eab2 100644
--- a/METdbLoad/ush/read_load_xml.py
+++ b/METdbLoad/ush/read_load_xml.py
@@ -257,6 +257,10 @@ def read_db_connect(self, root):
self.connection['db_management_system'] = \
root.xpath('connection')[0].xpath('management_system')[0].text
+ if root.xpath('connection')[0].xpath('local_infile'):
+ self.connection['db_local_infile'] = \
+ root.xpath('connection')[0].xpath('local_infile')[0].text
+
except (RuntimeError, TypeError, NameError, KeyError):
self.logger.error("*** %s in read_xml read_db_connect ***", sys.exc_info()[0])
sys.exit("*** Error(s) found while reading XML file connection tag!")
diff --git a/METdbLoad/ush/run_sql.py b/METdbLoad/ush/run_sql.py
index 3c01bec..41f7b35 100644
--- a/METdbLoad/ush/run_sql.py
+++ b/METdbLoad/ush/run_sql.py
@@ -43,6 +43,12 @@ def sql_on(self, connection):
N/A
"""
+ if 'db_local_infile' in connection.keys() and connection['db_local_infile'].lower() == 'false':
+ local_infile = False
+ else:
+ # Default behaviour
+ local_infile = True
+
try:
if (not 'db_host' in connection) or (not 'db_user' in connection):
logging.error("XML Load file does not have enough connection tags")
@@ -54,7 +60,7 @@ def sql_on(self, connection):
user=connection['db_user'],
passwd=connection['db_password'],
db=connection['db_database'],
- local_infile=True)
+ local_infile=local_infile)
except pymysql.OperationalError as pop_err:
logging.error("*** %s in run_sql ***", str(pop_err))
@@ -71,8 +77,15 @@ def sql_on(self, connection):
# look at database to see whether we can use the local infile method
self.cur.execute("SHOW GLOBAL VARIABLES LIKE 'local_infile';")
result = self.cur.fetchall()
- self.local_infile = result[0][1]
- logging.debug("local_infile is %s", result[0][1])
+ db_infile = result[0][1]
+
+ # Check that both the connection and the database support local_infile
+ if db_infile == 'ON' and self.conn._local_infile:
+ self.local_infile = 'ON'
+ else:
+ self.local_infile = 'OFF'
+ logging.debug("local_infile is %s", self.local_infile)
+
@staticmethod
def sql_off(conn, cur):
@@ -155,9 +168,9 @@ def write_to_sql(raw_data, col_list, sql_table, sql_query, tmp_dir, sql_cur, loc
raw_data['obs_valid_beg'] = raw_data['obs_valid_beg'].astype(str)
raw_data['obs_valid_end'] = raw_data['obs_valid_end'].astype(str)
elif sql_table in (CN.MODE_HEADER, CN.MTD_HEADER):
- raw_data['fcst_valid'] = raw_data['fcst_valid_beg'].astype(str)
- raw_data['fcst_init'] = raw_data['fcst_valid_end'].astype(str)
- raw_data['obs_valid'] = raw_data['fcst_init_beg'].astype(str)
+ raw_data['fcst_valid'] = raw_data['fcst_valid'].astype(str)
+ raw_data['fcst_init'] = raw_data['fcst_valid'].astype(str)
+ raw_data['obs_valid'] = raw_data['fcst_init'].astype(str)
# make a copy of the dataframe that is a list of lists and write to database
dfile = raw_data[col_list].values.tolist()
sql_cur.executemany(sql_query, dfile)