Skip to content

Commit

Permalink
Feature #318 test local infile (#332)
Browse files Browse the repository at this point in the history
* 318: tests and refactor for local_infile

* 318: test rhist data and local_infile

* small refactor to make "local_infile" configurable
from XML load file.

* fix apparent bugin run_sql  in when load_infile = false

* add test that same result loaded when  local_infile true/false

* add rhist data and test

* 318: fix whitespace
  • Loading branch information
John-Sharples authored Sep 13, 2024
1 parent 89c92a8 commit 49f7d37
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 11 deletions.
1 change: 0 additions & 1 deletion METdbLoad/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import pymysql
from pathlib import Path
from unittest.mock import patch

from METdataio.METdbLoad.ush.run_sql import RunSql
from METdataio.METdbLoad.test.utils import (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA ECNT 1888451 10 6.1671 NA 4.10602 -10.99955 12.20766 4.80348 NA NA NA NA NA 11.5788 NA NA
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RHIST 1888451 11 4212 9438 10585 10737 12398 16378 22545 34502 58414 131190 1578052
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA PHIST 321894 0.05 20 4483 5225 3333 3377 5000 4844 5907 6645 9166 12171 15018 18114 22435 23127 26574 27849 29769 35125 34203 29529
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RELP 1888451 10 186572.76786 189042.8504 185906.64366 190353.03136 186699.12262 193114.35913 185968.12183 188071.31945 191924.05755 190798.7262
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA ECNT 942667 10 5.70001 NA 4.00375 -11.88084 12.68579 4.93033 NA NA NA NA NA 12.38987 NA NA
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RHIST 942667 11 2413 5389 5596 6592 8077 10938 15850 24123 35918 63123 764648
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA PHIST 183510 0.05 20 2370 2243 1986 1214 2293 2296 2623 3138 3989 5920 7913 10604 12837 14690 16127 16845 18292 22671 20813 14646
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RELP 942667 10 93297.89802 95576.19722 92626.90794 93687.40913 93422.46865 96683.925 92821.59802 94516.88452 96700.49008 93333.22143
81 changes: 80 additions & 1 deletion METdbLoad/test/test_met_db_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
MODE_DATA_DIR,
TCDIAG_DATA_DIR,
VSDB_DATA_DIR,
RHIST_DATA_DIR,
)


Expand Down Expand Up @@ -103,6 +104,19 @@ def assert_count_rows(cur, table, expected_count):
"line_data_sl1l2": 5,
},
),
(
RHIST_DATA_DIR,
"ensemble_stat",
{
"line_data_rhist": 2,
"line_data_rhist_rank":22,
"line_data_phist": 2,
"line_data_phist_bin": 40,
"line_data_ecnt": 2,
"line_data_relp": 2,
"line_data_relp_ens": 20,
},
),
],
)
def test_met_db_table_counts(
Expand Down Expand Up @@ -133,7 +147,7 @@ def test_met_db_indexes(
testRunSql,
tmp_path,
):
# set up to only apply indexes
# set up to "apply_indexes"
test_args = dict_to_args(
{
"xmlfile": str(
Expand Down Expand Up @@ -168,3 +182,68 @@ def test_met_db_indexes(
with pytest.raises(SystemExit):
with patch.object(RunSql, "apply_indexes", side_effect=KeyError):
load_main(test_args)

@pytest.mark.parametrize(
"met_data_dir, met_tool, expected_counts, local_infile",
[
(
POINT_STAT_DATA_DIR,
"point_stat",
{
"line_data_vcnt": 1,
"line_data_fho": 24,
"line_data_cts": 24,
"line_data_ctc": 24,
"line_data_cnt": 10,
"line_data_vl1l2": 1,
},
'false',
),
(
POINT_STAT_DATA_DIR,
"point_stat",
{
"line_data_vcnt": 1,
"line_data_fho": 24,
"line_data_cts": 24,
"line_data_ctc": 24,
"line_data_cnt": 10,
"line_data_vl1l2": 1,
},
'true',
),
(
MTD_DATA_DIR,
"mtd",
{
"mtd_2d_obj": 278,
"mtd_3d_obj_single": 8,
},
'false',
),
(
MTD_DATA_DIR,
"mtd",
{
"mtd_2d_obj": 278,
"mtd_3d_obj_single": 8,
},
'true',
),
],
)
def test_local_in_file(emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, expected_counts, local_infile):
"""check we get the same result when local_file is on or off"""

test_args = dict_to_args(
{
"xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool, local_infile=local_infile)),
"index": "false",
"tmpdir": [str(tmp_path)],
}
)

load_main(test_args)

for table, expected_count in expected_counts.items():
assert_count_rows(testRunSql.cur, table, expected_count)
9 changes: 6 additions & 3 deletions METdbLoad/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ def abs_path(rel_path):
POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat")
TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr")


# This data is copied from MET test data
# https://hub.docker.com/r/dtcenter/met-data-output
MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/")
MODE_DATA_DIR = abs_path("METdbLoad/test/data/mode/")

# Very small data sample for testing
VSDB_DATA_DIR = abs_path("METdbLoad/test/data/vsdb/")
RHIST_DATA_DIR = abs_path("METdbLoad/test/data/rhist")

DEFAULT_LOAD_FLAGS = {
"stat_header_db_check": "true",
Expand All @@ -42,7 +44,7 @@ def _dict_to_xml(flags_dict):
return "\n ".join(flags)


def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS):
def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS, local_infile=True):
"""Return the xml load specification with substitute values.
Args:
Expand All @@ -65,6 +67,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS
<database>mv_test</database>
<user>root</user>
<password>root_password</password>
<local_infile>{local_infile}</local_infile>
</connection>
<folder_tmpl>{met_data_dir}</folder_tmpl>
Expand All @@ -81,7 +84,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS
</load_spec>"""


def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}, local_infile=True):
"""Write test_load_specification.xml and return path
Args:
Expand All @@ -95,7 +98,7 @@ def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
"""
xml_path = tmp_path / "test_load_specification.xml"
with open(xml_path, "w") as text_file:
text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags))
text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags, local_infile))
return xml_path


Expand Down
4 changes: 4 additions & 0 deletions METdbLoad/ush/read_load_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ def read_db_connect(self, root):
self.connection['db_management_system'] = \
root.xpath('connection')[0].xpath('management_system')[0].text

if root.xpath('connection')[0].xpath('local_infile'):
self.connection['db_local_infile'] = \
root.xpath('connection')[0].xpath('local_infile')[0].text

except (RuntimeError, TypeError, NameError, KeyError):
self.logger.error("*** %s in read_xml read_db_connect ***", sys.exc_info()[0])
sys.exit("*** Error(s) found while reading XML file connection tag!")
Expand Down
25 changes: 19 additions & 6 deletions METdbLoad/ush/run_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def sql_on(self, connection):
N/A
"""

if 'db_local_infile' in connection.keys() and connection['db_local_infile'].lower() == 'false':
local_infile = False
else:
# Default behaviour
local_infile = True

try:
if (not 'db_host' in connection) or (not 'db_user' in connection):
logging.error("XML Load file does not have enough connection tags")
Expand All @@ -54,7 +60,7 @@ def sql_on(self, connection):
user=connection['db_user'],
passwd=connection['db_password'],
db=connection['db_database'],
local_infile=True)
local_infile=local_infile)

except pymysql.OperationalError as pop_err:
logging.error("*** %s in run_sql ***", str(pop_err))
Expand All @@ -71,8 +77,15 @@ def sql_on(self, connection):
# look at database to see whether we can use the local infile method
self.cur.execute("SHOW GLOBAL VARIABLES LIKE 'local_infile';")
result = self.cur.fetchall()
self.local_infile = result[0][1]
logging.debug("local_infile is %s", result[0][1])
db_infile = result[0][1]

# Check that both the connection and the database support local_infile
if db_infile == 'ON' and self.conn._local_infile:
self.local_infile = 'ON'
else:
self.local_infile = 'OFF'
logging.debug("local_infile is %s", self.local_infile)


@staticmethod
def sql_off(conn, cur):
Expand Down Expand Up @@ -155,9 +168,9 @@ def write_to_sql(raw_data, col_list, sql_table, sql_query, tmp_dir, sql_cur, loc
raw_data['obs_valid_beg'] = raw_data['obs_valid_beg'].astype(str)
raw_data['obs_valid_end'] = raw_data['obs_valid_end'].astype(str)
elif sql_table in (CN.MODE_HEADER, CN.MTD_HEADER):
raw_data['fcst_valid'] = raw_data['fcst_valid_beg'].astype(str)
raw_data['fcst_init'] = raw_data['fcst_valid_end'].astype(str)
raw_data['obs_valid'] = raw_data['fcst_init_beg'].astype(str)
raw_data['fcst_valid'] = raw_data['fcst_valid'].astype(str)
raw_data['fcst_init'] = raw_data['fcst_valid'].astype(str)
raw_data['obs_valid'] = raw_data['fcst_init'].astype(str)
# make a copy of the dataframe that is a list of lists and write to database
dfile = raw_data[col_list].values.tolist()
sql_cur.executemany(sql_query, dfile)
Expand Down

0 comments on commit 49f7d37

Please sign in to comment.