forked from dtcenter/METplus
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature dtcenter#2253 diff_util example
- Loading branch information
1 parent
618103f
commit 8f9e8e7
Showing
1 changed file
with
343 additions
and
0 deletions.
There are no files selected for viewing
343 changes: 343 additions & 0 deletions
343
internal/tests/pytests/util/diff_util/test_diff_util_bom.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,343 @@ | ||
# Tests for metplus/util/diff_util.py | ||
# Requires pillow in Python env | ||
|
||
import pytest | ||
import numpy as np | ||
from unittest import mock | ||
from netCDF4 import Dataset | ||
from metplus.util import diff_util as du | ||
|
||
DEFAULT_NC = [ | ||
[359, 0, 1], # lon | ||
[-1, 0, 1], # lat | ||
[0, 1], # z | ||
[ # data | ||
[[1, 2], [3, 4], [5, 6]], | ||
[[2, 3], [4, 5], [6, 7]], | ||
[[30, 31], [33, 32], [34, 39]], | ||
], | ||
"Temp", # variable | ||
] | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def dummy_nc1(tmp_path_factory): | ||
# Construct a temporary netCDF file | ||
return make_nc( | ||
tmp_path_factory.mktemp("data1"), | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
DEFAULT_NC[3], | ||
DEFAULT_NC[4], | ||
) | ||
|
||
|
||
def _statment_in_capfd(capfd, check_print): | ||
out, _ = capfd.readouterr() | ||
print("out: ", out) | ||
for statement in check_print: | ||
assert statement in out | ||
|
||
|
||
def make_nc(tmp_path, lon, lat, z, data, variable="Temp"): | ||
# Make a dummy netCDF file. We can do this with a lot less | ||
# code if xarray is available. | ||
|
||
# Note: "nc4" is not included in NETCDF_EXTENSIONS, hence | ||
# we use it here to specifically trigger the call to | ||
# netCDF.Dataset in get_file_type. | ||
file_name = tmp_path / "fake.nc4" | ||
with Dataset(file_name, "w", format="NETCDF4") as rootgrp: | ||
# diff_util can't deal with groups, so attach dimensions | ||
# and variables to the root group. | ||
rootgrp.createDimension("lon", len(lon)) | ||
rootgrp.createDimension("lat", len(lat)) | ||
rootgrp.createDimension("z", len(z)) | ||
rootgrp.createDimension("time", None) | ||
|
||
# create variables | ||
longitude = rootgrp.createVariable("Longitude", "f4", "lon") | ||
latitude = rootgrp.createVariable("Latitude", "f4", "lat") | ||
levels = rootgrp.createVariable("Levels", "i4", "z") | ||
temp = rootgrp.createVariable(variable, "f4", ("time", "lon", "lat", "z")) | ||
time = rootgrp.createVariable("Time", "i4", "time") | ||
|
||
longitude[:] = lon | ||
latitude[:] = lat | ||
levels[:] = z | ||
temp[0, :, :, :] = data | ||
|
||
return file_name | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"path,expected", | ||
[ | ||
("/path/to/file.csv", "csv"), | ||
("/path/to/file.jpeg", "image"), | ||
("/path/to/file.jpg", "image"), | ||
("/path/to/file.nc", "netcdf"), | ||
("/path/to/file.cdf", "netcdf"), | ||
("/path/to/file.pdf", "pdf"), | ||
("/path/to/file.zip", "skip .zip"), | ||
("/path/to/file.png", "skip .png"), | ||
("/path/to/file.bigfoot", "unknown"), | ||
], | ||
) | ||
@pytest.mark.util | ||
def test_get_file_type(path, expected): | ||
actual = du.get_file_type(path) | ||
assert actual == expected | ||
|
||
|
||
@mock.patch.object(du, "UNSUPPORTED_EXTENSIONS", [".foo"]) | ||
@pytest.mark.util | ||
def test_get_file_type_unsupported(): | ||
actual = du.get_file_type("/path/to/file.foo") | ||
assert actual == "unsupported .foo" | ||
|
||
|
||
@pytest.mark.util | ||
def test_get_file_type_extensions(): | ||
# Check all extensions are unique, otherwise we may | ||
# get unexpected result from get_file_type | ||
extensions = [ | ||
du.IMAGE_EXTENSIONS, | ||
du.NETCDF_EXTENSIONS, | ||
du.SKIP_EXTENSIONS, | ||
du.PDF_EXTENSIONS, | ||
du.CSV_EXTENSIONS, | ||
du.UNSUPPORTED_EXTENSIONS, | ||
] | ||
flat_list = [ext for x in extensions for ext in x] | ||
assert len(set(flat_list)) == len(flat_list) | ||
|
||
|
||
@pytest.mark.util | ||
def test_get_file_type_nc4(dummy_nc1): | ||
actual = du.get_file_type(dummy_nc1) | ||
assert actual == "netcdf" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"val1,val2,expected", | ||
[ | ||
(1.22222, 1.222221, True), | ||
(190.22222, 190.22221, False), | ||
(0.999991, 1, True), | ||
(0.999990, 1, False), | ||
], | ||
) | ||
@pytest.mark.util | ||
def test_is_equal_rounded(val1, val2, expected): | ||
assert du.is_equal_rounded(val1, val2) == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"nc_data,fields,expected,check_print", | ||
[ | ||
( | ||
# Compare exact same data | ||
[ | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
DEFAULT_NC[3], | ||
DEFAULT_NC[4], | ||
], | ||
None, | ||
True, | ||
None, | ||
), | ||
# Field name differ | ||
( | ||
[ | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
DEFAULT_NC[3], | ||
"Foo", | ||
], | ||
None, | ||
False, | ||
[ | ||
"ERROR: Field list differs between files", | ||
"File_A: ['Latitude', 'Levels', 'Longitude', 'Temp', 'Time']", | ||
"File_B:['Foo', 'Latitude', 'Levels', 'Longitude', 'Time']", | ||
], | ||
), | ||
# One small value change | ||
( | ||
[ | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
[ | ||
[[1, 2], [3, 4], [5, 6]], | ||
[[2, 3], [4, 5], [6, 7]], | ||
[[30, 31], [33, 32], [34, 39.001]], | ||
], | ||
DEFAULT_NC[4], | ||
], | ||
None, | ||
False, | ||
[ | ||
"ERROR: Field (Temp) values differ", | ||
"Min diff: -0.00", | ||
"Max diff: 0.0", | ||
"1 / 18 points differ", | ||
], | ||
), | ||
# Value changed but not comparing that field | ||
( | ||
[ | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
[ | ||
[[1, 2], [3, 4], [5, 6]], | ||
[[2, 3], [4, 5], [6, 7]], | ||
[[30, 31], [33, 32], [34, 39.001]], | ||
], | ||
DEFAULT_NC[4], | ||
], | ||
["Longitude", "Latitude", "Levels"], | ||
True, | ||
None, | ||
), | ||
# Fails on nan values | ||
( | ||
[ | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
[ | ||
[[1, 2], [3, 4], [5, 6]], | ||
[[2, 3], [4, 5], [6, 7]], | ||
[[30, 31], [33, 32], [34, np.nan]], | ||
], | ||
DEFAULT_NC[4], | ||
], | ||
"Temp", | ||
True, | ||
[ | ||
"WARNING: Variable Temp contains NaN values.", | ||
"Cannot perform comparison.", | ||
], | ||
), | ||
# Field doesn't exist | ||
( | ||
[ | ||
DEFAULT_NC[0], | ||
DEFAULT_NC[1], | ||
DEFAULT_NC[2], | ||
DEFAULT_NC[3], | ||
DEFAULT_NC[4], | ||
], | ||
"Bar", | ||
False, | ||
["ERROR: Field Bar not found"], | ||
), | ||
], | ||
) | ||
@pytest.mark.util | ||
def test_nc_is_equal( | ||
capfd, tmp_path_factory, dummy_nc1, nc_data, fields, expected, check_print | ||
): | ||
# make a dummy second file to compare to dummy_nc1 | ||
dummy_nc2 = make_nc(tmp_path_factory.mktemp("data2"), *nc_data) | ||
assert du.nc_is_equal(dummy_nc1, dummy_nc2, fields=fields, debug=True) == expected | ||
|
||
if check_print: | ||
_statment_in_capfd(capfd, check_print) | ||
|
||
|
||
@pytest.mark.util | ||
def test_compare_files_not_exists(capfd, dummy_nc1): | ||
actual = du.compare_files(dummy_nc1, "/This/is/fake.nc4", debug=True) | ||
assert actual == ( | ||
"/This/is/fake.nc4", | ||
"", | ||
"file not found (in truth but missing now)", | ||
"", | ||
) | ||
|
||
|
||
file_content_1 = "Some text about meteorology.\n And some data 1, 2, 3!" | ||
file_content_2 = "Some other text about science.\n And some data 1.1, 2.2, 3.3!" | ||
|
||
csv_content_1 = "Apples, Milk, Bread\n6, 18, 4\n9, six, 66\n" | ||
csv_content_2 = "Oranges, Milk, Bread\n6, 18, 4\n9, 6, 2\n" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"files_to_make_a,files_to_make_b,expected,check_print", | ||
[ | ||
# simple check with 1 identical file | ||
( | ||
[("file1.txt", file_content_1)], | ||
[("file1.txt", file_content_1)], | ||
[], | ||
"No differences found in any files", | ||
), | ||
# Different content | ||
( | ||
[("file1.txt", file_content_1)], | ||
[("file1.txt", file_content_2)], | ||
[("dir_a/file1.txt", "dir_b/file1.txt", "Text diff", "")], | ||
"ERROR: Line differs", | ||
), | ||
# Check CSV | ||
([("file1.csv", csv_content_1)], [("file1.csv", csv_content_1)], [], None), | ||
# check .conf skipped | ||
( | ||
[("file1.csv", csv_content_1), ("metplus_final.conf", file_content_1)], | ||
[("file1.csv", csv_content_1)], | ||
[], | ||
"No differences found in any files", | ||
), | ||
# Check files in dir_b not in dir_a | ||
( | ||
[ | ||
("file1.csv", csv_content_1), | ||
], | ||
[ | ||
("file1.csv", csv_content_1), | ||
("file3.txt", file_content_1), | ||
("metplus_final.conf", file_content_1), | ||
], | ||
[("", "dir_b/file3.txt", "file not found (new output)", "")], | ||
"ERROR: File does not exist:", | ||
), | ||
# TODO: add more combinations here | ||
], | ||
) | ||
@pytest.mark.util | ||
def test_compare_dir( | ||
capfd, tmp_path_factory, files_to_make_a, files_to_make_b, expected, check_print | ||
): | ||
# make two temp direcotries | ||
dir_a = tmp_path_factory.mktemp("dir_a") | ||
dir_b = tmp_path_factory.mktemp("dir_b") | ||
|
||
# update the expected value with the actual temp dir | ||
def _str_replace(t, str1, str2): | ||
return tuple(x.replace(str1, str2) for x in t) | ||
|
||
expected = [_str_replace(x, "dir_a", str(dir_a)) for x in expected] | ||
expected = [_str_replace(x, "dir_b", str(dir_b)) for x in expected] | ||
|
||
# make all the test files | ||
for name, content in files_to_make_a: | ||
with open(dir_a / name, "w") as f: | ||
f.write(content) | ||
|
||
for name, content in files_to_make_b: | ||
with open(dir_b / name, "w") as f: | ||
f.write(content) | ||
|
||
actual = du.compare_dir(str(dir_a), str(dir_b), debug=True) | ||
assert actual == expected | ||
if check_print: | ||
_statment_in_capfd(capfd, check_print) |