Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature #318 test local infile #332

Merged
merged 3 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion METdbLoad/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import pymysql
from pathlib import Path
from unittest.mock import patch

from METdataio.METdbLoad.ush.run_sql import RunSql
from METdataio.METdbLoad.test.utils import (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
VERSION MODEL DESC FCST_LEAD FCST_VALID_BEG FCST_VALID_END OBS_LEAD OBS_VALID_BEG OBS_VALID_END FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBTYPE VX_MASK INTERP_MTHD INTERP_PNTS FCST_THRESH OBS_THRESH COV_THRESH ALPHA LINE_TYPE
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA ECNT 1888451 10 6.1671 NA 4.10602 -10.99955 12.20766 4.80348 NA NA NA NA NA 11.5788 NA NA
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RHIST 1888451 11 4212 9438 10585 10737 12398 16378 22545 34502 58414 131190 1578052
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA PHIST 321894 0.05 20 4483 5225 3333 3377 5000 4844 5907 6645 9166 12171 15018 18114 22435 23127 26574 27849 29769 35125 34203 29529
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS FULL NEAREST 1 NA NA NA NA RELP 1888451 10 186572.76786 189042.8504 185906.64366 190353.03136 186699.12262 193114.35913 185968.12183 188071.31945 191924.05755 190798.7262
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA ECNT 942667 10 5.70001 NA 4.00375 -11.88084 12.68579 4.93033 NA NA NA NA NA 12.38987 NA NA
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RHIST 942667 11 2413 5389 5596 6592 8077 10938 15850 24123 35918 63123 764648
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA PHIST 183510 0.05 20 2370 2243 1986 1214 2293 2296 2623 3138 3989 5920 7913 10604 12837 14690 16127 16845 18292 22671 20813 14646
V10.1.1 RRFS_GEFS_GF.SPP.SPPT NA 360000 20220507_120000 20220507_120000 000000 20220507_120039 20220507_120039 RETOP m L0 EchoTop18 km_MSL Z500 MRMS CONUS NEAREST 1 NA NA NA NA RELP 942667 10 93297.89802 95576.19722 92626.90794 93687.40913 93422.46865 96683.925 92821.59802 94516.88452 96700.49008 93333.22143
81 changes: 80 additions & 1 deletion METdbLoad/test/test_met_db_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
MODE_DATA_DIR,
TCDIAG_DATA_DIR,
VSDB_DATA_DIR,
RHIST_DATA_DIR,
)


Expand Down Expand Up @@ -103,6 +104,19 @@ def assert_count_rows(cur, table, expected_count):
"line_data_sl1l2": 5,
},
),
(
RHIST_DATA_DIR,
"ensemble_stat",
{
"line_data_rhist": 2,
"line_data_rhist_rank":22,
"line_data_phist": 2,
"line_data_phist_bin": 40,
"line_data_ecnt": 2,
"line_data_relp": 2,
"line_data_relp_ens": 20,
},
),
],
)
def test_met_db_table_counts(
Expand Down Expand Up @@ -133,7 +147,7 @@ def test_met_db_indexes(
testRunSql,
tmp_path,
):
# set up to only apply indexes
# set up to "apply_indexes"
test_args = dict_to_args(
{
"xmlfile": str(
Expand Down Expand Up @@ -168,3 +182,68 @@ def test_met_db_indexes(
with pytest.raises(SystemExit):
with patch.object(RunSql, "apply_indexes", side_effect=KeyError):
load_main(test_args)

@pytest.mark.parametrize(
"met_data_dir, met_tool, expected_counts, local_infile",
[
(
POINT_STAT_DATA_DIR,
"point_stat",
{
"line_data_vcnt": 1,
"line_data_fho": 24,
"line_data_cts": 24,
"line_data_ctc": 24,
"line_data_cnt": 10,
"line_data_vl1l2": 1,
},
'false',
),
(
POINT_STAT_DATA_DIR,
"point_stat",
{
"line_data_vcnt": 1,
"line_data_fho": 24,
"line_data_cts": 24,
"line_data_ctc": 24,
"line_data_cnt": 10,
"line_data_vl1l2": 1,
},
'true',
),
(
MTD_DATA_DIR,
"mtd",
{
"mtd_2d_obj": 278,
"mtd_3d_obj_single": 8,
},
'false',
),
(
MTD_DATA_DIR,
"mtd",
{
"mtd_2d_obj": 278,
"mtd_3d_obj_single": 8,
},
'true',
),
],
)
def test_local_in_file(emptyDB, testRunSql, tmp_path, met_data_dir, met_tool, expected_counts, local_infile):
"""check we get the same result when local_file is on or off"""

test_args = dict_to_args(
{
"xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool, local_infile=local_infile)),
"index": "false",
"tmpdir": [str(tmp_path)],
}
)

load_main(test_args)

for table, expected_count in expected_counts.items():
assert_count_rows(testRunSql.cur, table, expected_count)
9 changes: 6 additions & 3 deletions METdbLoad/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ def abs_path(rel_path):
POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat")
TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr")


# This data is copied from MET test data
# https://hub.docker.com/r/dtcenter/met-data-output
MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/")
MODE_DATA_DIR = abs_path("METdbLoad/test/data/mode/")

# Very small data sample for testing
VSDB_DATA_DIR = abs_path("METdbLoad/test/data/vsdb/")
RHIST_DATA_DIR = abs_path("METdbLoad/test/data/rhist")

DEFAULT_LOAD_FLAGS = {
"stat_header_db_check": "true",
Expand All @@ -42,7 +44,7 @@ def _dict_to_xml(flags_dict):
return "\n ".join(flags)


def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS):
def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS, local_infile=True):
"""Return the xml load specification with substitute values.

Args:
Expand All @@ -65,6 +67,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS
<database>mv_test</database>
<user>root</user>
<password>root_password</password>
<local_infile>{local_infile}</local_infile>
</connection>

<folder_tmpl>{met_data_dir}</folder_tmpl>
Expand All @@ -81,7 +84,7 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS
</load_spec>"""


def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}, local_infile=True):
"""Write test_load_specification.xml and return path

Args:
Expand All @@ -95,7 +98,7 @@ def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
"""
xml_path = tmp_path / "test_load_specification.xml"
with open(xml_path, "w") as text_file:
text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags))
text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags, local_infile))
return xml_path


Expand Down
4 changes: 4 additions & 0 deletions METdbLoad/ush/read_load_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ def read_db_connect(self, root):
self.connection['db_management_system'] = \
root.xpath('connection')[0].xpath('management_system')[0].text

if root.xpath('connection')[0].xpath('local_infile'):
self.connection['db_local_infile'] = \
root.xpath('connection')[0].xpath('local_infile')[0].text

except (RuntimeError, TypeError, NameError, KeyError):
self.logger.error("*** %s in read_xml read_db_connect ***", sys.exc_info()[0])
sys.exit("*** Error(s) found while reading XML file connection tag!")
Expand Down
25 changes: 19 additions & 6 deletions METdbLoad/ush/run_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def sql_on(self, connection):
N/A
"""

if 'db_local_infile' in connection.keys() and connection['db_local_infile'].lower() == 'false':
local_infile = False
else:
# Default behaviour
local_infile = True

try:
if (not 'db_host' in connection) or (not 'db_user' in connection):
logging.error("XML Load file does not have enough connection tags")
Expand All @@ -54,7 +60,7 @@ def sql_on(self, connection):
user=connection['db_user'],
passwd=connection['db_password'],
db=connection['db_database'],
local_infile=True)
local_infile=local_infile)

except pymysql.OperationalError as pop_err:
logging.error("*** %s in run_sql ***", str(pop_err))
Expand All @@ -71,8 +77,15 @@ def sql_on(self, connection):
# look at database to see whether we can use the local infile method
self.cur.execute("SHOW GLOBAL VARIABLES LIKE 'local_infile';")
result = self.cur.fetchall()
self.local_infile = result[0][1]
logging.debug("local_infile is %s", result[0][1])
db_infile = result[0][1]

# Check that both the connection and the database support local_infile
if db_infile == 'ON' and self.conn._local_infile:
self.local_infile = 'ON'
else:
self.local_infile = 'OFF'
logging.debug("local_infile is %s", self.local_infile)


@staticmethod
def sql_off(conn, cur):
Expand Down Expand Up @@ -155,9 +168,9 @@ def write_to_sql(raw_data, col_list, sql_table, sql_query, tmp_dir, sql_cur, loc
raw_data['obs_valid_beg'] = raw_data['obs_valid_beg'].astype(str)
raw_data['obs_valid_end'] = raw_data['obs_valid_end'].astype(str)
elif sql_table in (CN.MODE_HEADER, CN.MTD_HEADER):
raw_data['fcst_valid'] = raw_data['fcst_valid_beg'].astype(str)
raw_data['fcst_init'] = raw_data['fcst_valid_end'].astype(str)
raw_data['obs_valid'] = raw_data['fcst_init_beg'].astype(str)
raw_data['fcst_valid'] = raw_data['fcst_valid'].astype(str)
raw_data['fcst_init'] = raw_data['fcst_valid'].astype(str)
raw_data['obs_valid'] = raw_data['fcst_init'].astype(str)
# make a copy of the dataframe that is a list of lists and write to database
dfile = raw_data[col_list].values.tolist()
sql_cur.executemany(sql_query, dfile)
Expand Down
Loading