Skip to content

Commit

Permalink
Feature dtcenter#2253 diff_util example
Browse files Browse the repository at this point in the history
  • Loading branch information
John-Sharples committed Jul 18, 2023
1 parent 618103f commit 8f9e8e7
Showing 1 changed file with 343 additions and 0 deletions.
343 changes: 343 additions & 0 deletions internal/tests/pytests/util/diff_util/test_diff_util_bom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
# Tests for metplus/util/diff_util.py
# Requires pillow in Python env

import pytest
import numpy as np
from unittest import mock
from netCDF4 import Dataset
from metplus.util import diff_util as du

DEFAULT_NC = [
[359, 0, 1], # lon
[-1, 0, 1], # lat
[0, 1], # z
[ # data
[[1, 2], [3, 4], [5, 6]],
[[2, 3], [4, 5], [6, 7]],
[[30, 31], [33, 32], [34, 39]],
],
"Temp", # variable
]


@pytest.fixture(scope="module")
def dummy_nc1(tmp_path_factory):
# Construct a temporary netCDF file
return make_nc(
tmp_path_factory.mktemp("data1"),
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
DEFAULT_NC[3],
DEFAULT_NC[4],
)


def _statment_in_capfd(capfd, check_print):
out, _ = capfd.readouterr()
print("out: ", out)
for statement in check_print:
assert statement in out


def make_nc(tmp_path, lon, lat, z, data, variable="Temp"):
# Make a dummy netCDF file. We can do this with a lot less
# code if xarray is available.

# Note: "nc4" is not included in NETCDF_EXTENSIONS, hence
# we use it here to specifically trigger the call to
# netCDF.Dataset in get_file_type.
file_name = tmp_path / "fake.nc4"
with Dataset(file_name, "w", format="NETCDF4") as rootgrp:
# diff_util can't deal with groups, so attach dimensions
# and variables to the root group.
rootgrp.createDimension("lon", len(lon))
rootgrp.createDimension("lat", len(lat))
rootgrp.createDimension("z", len(z))
rootgrp.createDimension("time", None)

# create variables
longitude = rootgrp.createVariable("Longitude", "f4", "lon")
latitude = rootgrp.createVariable("Latitude", "f4", "lat")
levels = rootgrp.createVariable("Levels", "i4", "z")
temp = rootgrp.createVariable(variable, "f4", ("time", "lon", "lat", "z"))
time = rootgrp.createVariable("Time", "i4", "time")

longitude[:] = lon
latitude[:] = lat
levels[:] = z
temp[0, :, :, :] = data

return file_name


@pytest.mark.parametrize(
"path,expected",
[
("/path/to/file.csv", "csv"),
("/path/to/file.jpeg", "image"),
("/path/to/file.jpg", "image"),
("/path/to/file.nc", "netcdf"),
("/path/to/file.cdf", "netcdf"),
("/path/to/file.pdf", "pdf"),
("/path/to/file.zip", "skip .zip"),
("/path/to/file.png", "skip .png"),
("/path/to/file.bigfoot", "unknown"),
],
)
@pytest.mark.util
def test_get_file_type(path, expected):
actual = du.get_file_type(path)
assert actual == expected


@mock.patch.object(du, "UNSUPPORTED_EXTENSIONS", [".foo"])
@pytest.mark.util
def test_get_file_type_unsupported():
actual = du.get_file_type("/path/to/file.foo")
assert actual == "unsupported .foo"


@pytest.mark.util
def test_get_file_type_extensions():
# Check all extensions are unique, otherwise we may
# get unexpected result from get_file_type
extensions = [
du.IMAGE_EXTENSIONS,
du.NETCDF_EXTENSIONS,
du.SKIP_EXTENSIONS,
du.PDF_EXTENSIONS,
du.CSV_EXTENSIONS,
du.UNSUPPORTED_EXTENSIONS,
]
flat_list = [ext for x in extensions for ext in x]
assert len(set(flat_list)) == len(flat_list)


@pytest.mark.util
def test_get_file_type_nc4(dummy_nc1):
actual = du.get_file_type(dummy_nc1)
assert actual == "netcdf"


@pytest.mark.parametrize(
"val1,val2,expected",
[
(1.22222, 1.222221, True),
(190.22222, 190.22221, False),
(0.999991, 1, True),
(0.999990, 1, False),
],
)
@pytest.mark.util
def test_is_equal_rounded(val1, val2, expected):
assert du.is_equal_rounded(val1, val2) == expected


@pytest.mark.parametrize(
"nc_data,fields,expected,check_print",
[
(
# Compare exact same data
[
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
DEFAULT_NC[3],
DEFAULT_NC[4],
],
None,
True,
None,
),
# Field name differ
(
[
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
DEFAULT_NC[3],
"Foo",
],
None,
False,
[
"ERROR: Field list differs between files",
"File_A: ['Latitude', 'Levels', 'Longitude', 'Temp', 'Time']",
"File_B:['Foo', 'Latitude', 'Levels', 'Longitude', 'Time']",
],
),
# One small value change
(
[
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
[
[[1, 2], [3, 4], [5, 6]],
[[2, 3], [4, 5], [6, 7]],
[[30, 31], [33, 32], [34, 39.001]],
],
DEFAULT_NC[4],
],
None,
False,
[
"ERROR: Field (Temp) values differ",
"Min diff: -0.00",
"Max diff: 0.0",
"1 / 18 points differ",
],
),
# Value changed but not comparing that field
(
[
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
[
[[1, 2], [3, 4], [5, 6]],
[[2, 3], [4, 5], [6, 7]],
[[30, 31], [33, 32], [34, 39.001]],
],
DEFAULT_NC[4],
],
["Longitude", "Latitude", "Levels"],
True,
None,
),
# Fails on nan values
(
[
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
[
[[1, 2], [3, 4], [5, 6]],
[[2, 3], [4, 5], [6, 7]],
[[30, 31], [33, 32], [34, np.nan]],
],
DEFAULT_NC[4],
],
"Temp",
True,
[
"WARNING: Variable Temp contains NaN values.",
"Cannot perform comparison.",
],
),
# Field doesn't exist
(
[
DEFAULT_NC[0],
DEFAULT_NC[1],
DEFAULT_NC[2],
DEFAULT_NC[3],
DEFAULT_NC[4],
],
"Bar",
False,
["ERROR: Field Bar not found"],
),
],
)
@pytest.mark.util
def test_nc_is_equal(
capfd, tmp_path_factory, dummy_nc1, nc_data, fields, expected, check_print
):
# make a dummy second file to compare to dummy_nc1
dummy_nc2 = make_nc(tmp_path_factory.mktemp("data2"), *nc_data)
assert du.nc_is_equal(dummy_nc1, dummy_nc2, fields=fields, debug=True) == expected

if check_print:
_statment_in_capfd(capfd, check_print)


@pytest.mark.util
def test_compare_files_not_exists(capfd, dummy_nc1):
actual = du.compare_files(dummy_nc1, "/This/is/fake.nc4", debug=True)
assert actual == (
"/This/is/fake.nc4",
"",
"file not found (in truth but missing now)",
"",
)


file_content_1 = "Some text about meteorology.\n And some data 1, 2, 3!"
file_content_2 = "Some other text about science.\n And some data 1.1, 2.2, 3.3!"

csv_content_1 = "Apples, Milk, Bread\n6, 18, 4\n9, six, 66\n"
csv_content_2 = "Oranges, Milk, Bread\n6, 18, 4\n9, 6, 2\n"


@pytest.mark.parametrize(
"files_to_make_a,files_to_make_b,expected,check_print",
[
# simple check with 1 identical file
(
[("file1.txt", file_content_1)],
[("file1.txt", file_content_1)],
[],
"No differences found in any files",
),
# Different content
(
[("file1.txt", file_content_1)],
[("file1.txt", file_content_2)],
[("dir_a/file1.txt", "dir_b/file1.txt", "Text diff", "")],
"ERROR: Line differs",
),
# Check CSV
([("file1.csv", csv_content_1)], [("file1.csv", csv_content_1)], [], None),
# check .conf skipped
(
[("file1.csv", csv_content_1), ("metplus_final.conf", file_content_1)],
[("file1.csv", csv_content_1)],
[],
"No differences found in any files",
),
# Check files in dir_b not in dir_a
(
[
("file1.csv", csv_content_1),
],
[
("file1.csv", csv_content_1),
("file3.txt", file_content_1),
("metplus_final.conf", file_content_1),
],
[("", "dir_b/file3.txt", "file not found (new output)", "")],
"ERROR: File does not exist:",
),
# TODO: add more combinations here
],
)
@pytest.mark.util
def test_compare_dir(
capfd, tmp_path_factory, files_to_make_a, files_to_make_b, expected, check_print
):
# make two temp direcotries
dir_a = tmp_path_factory.mktemp("dir_a")
dir_b = tmp_path_factory.mktemp("dir_b")

# update the expected value with the actual temp dir
def _str_replace(t, str1, str2):
return tuple(x.replace(str1, str2) for x in t)

expected = [_str_replace(x, "dir_a", str(dir_a)) for x in expected]
expected = [_str_replace(x, "dir_b", str(dir_b)) for x in expected]

# make all the test files
for name, content in files_to_make_a:
with open(dir_a / name, "w") as f:
f.write(content)

for name, content in files_to_make_b:
with open(dir_b / name, "w") as f:
f.write(content)

actual = du.compare_dir(str(dir_a), str(dir_b), debug=True)
assert actual == expected
if check_print:
_statment_in_capfd(capfd, check_print)

0 comments on commit 8f9e8e7

Please sign in to comment.