Skip to content

Commit

Permalink
Feature 318 tests for read data files (#329)
Browse files Browse the repository at this point in the history
* 318: tests for read_data_files

* rework how test data dirs are handeled

* add test for read_data_files for mtd revisions

* 318: add data and test for vsdb file
  • Loading branch information
John-Sharples authored Sep 11, 2024
1 parent e9b80b8 commit f31a64e
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 37 deletions.
16 changes: 6 additions & 10 deletions METdbLoad/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
TOP_DIR = str(Path(__file__).parents[1])
sys.path.insert(0, os.path.abspath(TOP_DIR))


def parse_sql(filename):
"""Parse a .sql file and return a list of SQL statements"""
data = open(filename, "r").readlines()
Expand Down Expand Up @@ -112,18 +113,13 @@ def testRunSql():


@pytest.fixture
def point_stat_xml_file(tmp_path):
"""Get xml load file for point_stat test data."""
data_path = Path(TOP_DIR) / POINT_STAT_DATA_DIR
return get_xml_test_file(tmp_path, data_path, "point_stat")


@pytest.fixture
def get_xml_loadfile(point_stat_xml_file):
def load_and_read_xml():
def get_xml_loadfile():
def load_and_read_xml(
tmp_path, data_dir=POINT_STAT_DATA_DIR, met_tool="point_stat"
):
from METdataio.METdbLoad.ush.read_load_xml import XmlLoadFile

XML_FILE = point_stat_xml_file
XML_FILE = get_xml_test_file(tmp_path, data_dir, met_tool)
XML_LOADFILE = XmlLoadFile(XML_FILE)
XML_LOADFILE.read_xml()
return XML_LOADFILE
Expand Down
24 changes: 24 additions & 0 deletions METdbLoad/test/data/vsdb/gfs_20140802.vsdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
V01 GFS 45 2014080221 STAGE2 G218/LMV FHO>.50 APCP/03 SFC = 5789 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/APL FHO>.50 APCP/03 SFC = 2814 0.00000 0.00000 0.00249
V01 GFS 45 2014080221 STAGE2 G218/NEC FHO>.50 APCP/03 SFC = 3052 0.00197 0.00000 0.00229
V01 GFS 45 2014080221 STAGE2 G218/SEC FHO>.50 APCP/03 SFC = 3524 0.03490 0.00028 0.00738
V01 GFS 45 2014080221 STAGE2 G218/GMC FHO>.50 APCP/03 SFC = 3524 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE4 G218/RFC FHO>.50 APCP/03 SFC = 60582 0.00213 0.00002 0.00271
V01 GFS 45 2014080221 STAGE2 G218/NWC FHO>.75 APCP/03 SFC = 2556 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/SWC FHO>.75 APCP/03 SFC = 1422 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/NMT FHO>.75 APCP/03 SFC = 6251 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/GRB FHO>.75 APCP/03 SFC = 3400 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/SMT FHO>.75 APCP/03 SFC = 3802 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/SPL FHO>1.0 APCP/03 SFC = 5484 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/MDW FHO>1.0 APCP/03 SFC = 10240 0.00000 0.00000 0.00283
V01 GFS 45 2014080221 STAGE2 G218/LMV FHO>1.0 APCP/03 SFC = 5789 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/APL FHO>1.0 APCP/03 SFC = 2814 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/NEC FHO>1.0 APCP/03 SFC = 3052 0.00000 0.00000 0.00033
V01 GFS 45 2014080221 STAGE2 G218/SEC FHO>1.0 APCP/03 SFC = 3524 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/GMC FHO>1.0 APCP/03 SFC = 3524 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE4 G218/RFC FHO>1.0 APCP/03 SFC = 60582 0.00000 0.00000 0.00050
V01 GFS 45 2014080221 STAGE2 G218/NWC SL1L2 APCP/03 SFC= 2556 0.2401E-01 0.6913E-01 0.0000E+00 0.4112E-01 0.1017E+00
V01 GFS 45 2014080221 STAGE2 G218/SWC SL1L2 APCP/03 SFC= 1422 0.7749E-01 0.2632E+00 0.2126E-01 0.1783E+00 0.8898E+00
V01 GFS 45 2014080221 STAGE2 G218/NMT SL1L2 APCP/03 SFC= 6251 0.6989E-01 0.3756E-01 0.1510E-02 0.1398E+00 0.6695E-01
V01 GFS 45 2014080221 STAGE2 G218/GRB SL1L2 APCP/03 SFC= 3400 0.1266E-01 0.4206E-02 0.1994E-04 0.1205E-01 0.3085E-02
V01 GFS 45 2014080221 STAGE2 G218/SMT SL1L2 APCP/03 SFC= 3802 0.6522E+00 0.2165E+00 0.4879E+00 0.1883E+01 0.7141E+00
13 changes: 9 additions & 4 deletions METdbLoad/test/test_met_db_load.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import argparse
import pytest
from pathlib import Path
from METdbLoad.conftest import TOP_DIR
from METdbLoad.ush.met_db_load import main as load_main

from METdataio.METdbLoad.test.utils import (
Expand All @@ -13,6 +11,7 @@
MTD_DATA_DIR,
MODE_DATA_DIR,
TCDIAG_DATA_DIR,
VSDB_DATA_DIR,
)


Expand Down Expand Up @@ -94,6 +93,14 @@ def assert_count_rows(cur, table, expected_count):
"mode_obj_single": 6,
},
),
(
VSDB_DATA_DIR,
"vsdb",
{
"line_data_ctc": 19,
"line_data_sl1l2": 5,
},
),
],
)
def test_met_db_table_counts(
Expand All @@ -104,8 +111,6 @@ def test_met_db_table_counts(
met_tool,
expected_counts,
):

met_data_dir = str(Path(TOP_DIR) / met_data_dir)
test_data = {
"xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool)),
"index": "true",
Expand Down
68 changes: 62 additions & 6 deletions METdbLoad/test/test_read_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,79 @@
import pytest

from METdataio.METdbLoad.ush.read_data_files import ReadDataFiles
from METdataio.METdbLoad.test.utils import (
POINT_STAT_DATA_DIR,
MTD_DATA_DIR,
)


def test_counts(get_xml_loadfile):
def test_counts(tmp_path, get_xml_loadfile):
"""Count parts of the files loaded in."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path, POINT_STAT_DATA_DIR)

# Read all of the data from the data files into a dataframe
FILE_DATA = ReadDataFiles()

# read in the data files, with options specified by XML flags
FILE_DATA.read_data(XML_LOADFILE.flags,
XML_LOADFILE.load_files,
XML_LOADFILE.line_types)
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

# number of files
assert len(XML_LOADFILE.load_files) == 2
# number of lines of data
assert FILE_DATA.stat_data.shape[0] == 94
# number of line types
assert FILE_DATA.stat_data.line_type.unique().size == 7


def test_mtd_loads(tmp_path, get_xml_loadfile):
XML_LOADFILE = get_xml_loadfile(tmp_path, MTD_DATA_DIR)

# Read all of the data from the data files into a dataframe
FILE_DATA = ReadDataFiles()

# read in the data files, with options specified by XML flags
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

# number of files
assert len(XML_LOADFILE.load_files) == 2
# number of lines of data
assert FILE_DATA.mtd_2d_data.shape == (278, 43)
assert FILE_DATA.mtd_3d_single_data.shape == (8, 48)


def test_mtd_loads_revision(tmp_path, get_xml_loadfile):
# Create a test MTD 2D revision file
data = (
"""VERSION MODEL DESC FCST_LEAD FCST_VALID OBS_LEAD OBS_VALID T_DELTA FCST_T_BEG FCST_T_END FCST_RAD FCST_THR OBS_T_BEG OBS_T_END OBS_RAD OBS_THR FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBJECT_ID OBJECT_CAT TIME_INDEX AREA CENTROID_X CENTROID_Y CENTROID_LAT CENTROID_LON AXIS_ANG INTENSITY_10 INTENSITY_25 INTENSITY_50 INTENSITY_75 INTENSITY_90 INTENSITY_99\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 F001 CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00"""
)
tmp_mtd_dir = tmp_path / "mtd_revision"
tmp_mtd_dir.mkdir()

with open(tmp_mtd_dir / "mtd_REVISION_TEST_2d.txt", "w") as f:
f.write(data)

XML_LOADFILE = get_xml_loadfile(tmp_path, tmp_mtd_dir)
FILE_DATA = ReadDataFiles()
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

assert len(XML_LOADFILE.load_files) == 1
assert FILE_DATA.mtd_2d_data.shape == (10, 43)
assert FILE_DATA.mtd_3d_single_data.shape == (0, 0)

# Check revision have been correctly labeled
revs = FILE_DATA.mtd_2d_data["fcst_var"] == "REV_APCP_01"
assert sum(revs) == 4
revs = FILE_DATA.mtd_2d_data["obs_var"] == "REV_APCP_01"
assert sum(revs) == 4
16 changes: 8 additions & 8 deletions METdbLoad/test/test_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""Test reading XML file."""

def test_loadflags(get_xml_loadfile):
def test_loadflags(tmp_path, get_xml_loadfile):
"""Read various flags from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.flags['load_stat']
assert XML_LOADFILE.flags['load_mode']
assert XML_LOADFILE.flags['load_mtd']
Expand All @@ -18,22 +18,22 @@ def test_loadflags(get_xml_loadfile):
assert XML_LOADFILE.flags['force_dup_file']
assert XML_LOADFILE.flags['load_xml']

def test_loadgroup(get_xml_loadfile):
def test_loadgroup(tmp_path, get_xml_loadfile):
"""Read group and description from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.group == "Testing"
assert XML_LOADFILE.description == "testing with pytest"

def test_connection(get_xml_loadfile):
def test_connection(tmp_path, get_xml_loadfile):
"""Read connection tags from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.connection['db_host'] == "localhost"
assert XML_LOADFILE.connection['db_port'] == 3306
assert XML_LOADFILE.connection['db_database'] == "mv_test"
assert XML_LOADFILE.connection['db_user'] == "root"
assert XML_LOADFILE.connection['db_management_system'] == "mysql"

def test_insertsize(get_xml_loadfile):
def test_insertsize(tmp_path, get_xml_loadfile):
"""Read insert_size from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.insert_size == 1
28 changes: 19 additions & 9 deletions METdbLoad/test/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
from pathlib import Path


def abs_path(rel_path):
"""Turn a relative path into abs path"""
return str(Path(str(Path(__file__).parents[2])) / rel_path)


# Use data from METreformat where available
ENSEMBLE_STAT_DATA_DIR = "METreformat/test/data/ensemble_stat"
GRID_STAT_DATA_DIR = "METreformat/test/data/grid_stat/mctc_mcts"
MPR_DATA_DIR = "METreformat/test/data/mpr/climo_data"
POINT_STAT_DATA_DIR = "METreformat/test/data/point_stat"
TCDIAG_DATA_DIR = "METreformat/test/data/tcdiag_tcmpr"
ENSEMBLE_STAT_DATA_DIR = abs_path("METreformat/test/data/ensemble_stat")
GRID_STAT_DATA_DIR = abs_path("METreformat/test/data/grid_stat/mctc_mcts")
MPR_DATA_DIR = abs_path("METreformat/test/data/mpr/climo_data")
POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat")
TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr")

# This data is copied from MET test data
# https://hub.docker.com/r/dtcenter/met-data-output
MTD_DATA_DIR = "METdbLoad/test/data/mtd/"
MODE_DATA_DIR = "METdbLoad/test/data/mode/"
MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/")
MODE_DATA_DIR = abs_path("METdbLoad/test/data/mode/")

# Very small data sample for testing
VSDB_DATA_DIR = abs_path("METdbLoad/test/data/vsdb/")

DEFAULT_LOAD_FLAGS = {
"stat_header_db_check": "true",
Expand Down Expand Up @@ -72,9 +82,9 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS

def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
"""Write test_load_specification.xml and return path
Args:
tmp_path (Path): Path to write test file to.
tmp_path (Path): Path to write test file to.
met_data_dir (str): directory containing MET files to load
met_tool (str): Name of MET tool that generated files, e.g. "point_stat"
load_flags (dict): Optional.
Expand Down

0 comments on commit f31a64e

Please sign in to comment.