Feature dtcenter#2253 diff_util example

John-Sharples · Jul 18, 2023 · 8f9e8e7 · 8f9e8e7
1 parent 618103f
commit 8f9e8e7
Showing 1 changed file with 343 additions and 0 deletions.
diff --git a/internal/tests/pytests/util/diff_util/test_diff_util_bom.py b/internal/tests/pytests/util/diff_util/test_diff_util_bom.py
@@ -0,0 +1,343 @@
+# Tests for metplus/util/diff_util.py
+# Requires pillow in Python env
+
+import pytest
+import numpy as np
+from unittest import mock
+from netCDF4 import Dataset
+from metplus.util import diff_util as du
+
+DEFAULT_NC = [
+    [359, 0, 1],  # lon
+    [-1, 0, 1],  # lat
+    [0, 1],  # z
+    [  # data
+        [[1, 2], [3, 4], [5, 6]],
+        [[2, 3], [4, 5], [6, 7]],
+        [[30, 31], [33, 32], [34, 39]],
+    ],
+    "Temp",  # variable
+]
+
+
+@pytest.fixture(scope="module")
+def dummy_nc1(tmp_path_factory):
+    # Construct a temporary netCDF file
+    return make_nc(
+        tmp_path_factory.mktemp("data1"),
+        DEFAULT_NC[0],
+        DEFAULT_NC[1],
+        DEFAULT_NC[2],
+        DEFAULT_NC[3],
+        DEFAULT_NC[4],
+    )
+
+
+def _statment_in_capfd(capfd, check_print):
+    out, _ = capfd.readouterr()
+    print("out: ", out)
+    for statement in check_print:
+        assert statement in out
+
+
+def make_nc(tmp_path, lon, lat, z, data, variable="Temp"):
+    # Make a dummy netCDF file. We can do this with a lot less
+    # code if xarray is available.
+
+    # Note: "nc4" is not included in NETCDF_EXTENSIONS, hence
+    # we use it here to specifically trigger the call to
+    # netCDF.Dataset in get_file_type.
+    file_name = tmp_path / "fake.nc4"
+    with Dataset(file_name, "w", format="NETCDF4") as rootgrp:
+        # diff_util can't deal with groups, so attach dimensions
+        # and variables to the root group.
+        rootgrp.createDimension("lon", len(lon))
+        rootgrp.createDimension("lat", len(lat))
+        rootgrp.createDimension("z", len(z))
+        rootgrp.createDimension("time", None)
+
+        # create variables
+        longitude = rootgrp.createVariable("Longitude", "f4", "lon")
+        latitude = rootgrp.createVariable("Latitude", "f4", "lat")
+        levels = rootgrp.createVariable("Levels", "i4", "z")
+        temp = rootgrp.createVariable(variable, "f4", ("time", "lon", "lat", "z"))
+        time = rootgrp.createVariable("Time", "i4", "time")
+
+        longitude[:] = lon
+        latitude[:] = lat
+        levels[:] = z
+        temp[0, :, :, :] = data
+
+    return file_name
+
+
+@pytest.mark.parametrize(
+    "path,expected",
+    [
+        ("/path/to/file.csv", "csv"),
+        ("/path/to/file.jpeg", "image"),
+        ("/path/to/file.jpg", "image"),
+        ("/path/to/file.nc", "netcdf"),
+        ("/path/to/file.cdf", "netcdf"),
+        ("/path/to/file.pdf", "pdf"),
+        ("/path/to/file.zip", "skip .zip"),
+        ("/path/to/file.png", "skip .png"),
+        ("/path/to/file.bigfoot", "unknown"),
+    ],
+)
+@pytest.mark.util
+def test_get_file_type(path, expected):
+    actual = du.get_file_type(path)
+    assert actual == expected
+
+
+@mock.patch.object(du, "UNSUPPORTED_EXTENSIONS", [".foo"])
+@pytest.mark.util
+def test_get_file_type_unsupported():
+    actual = du.get_file_type("/path/to/file.foo")
+    assert actual == "unsupported .foo"
+
+
+@pytest.mark.util
+def test_get_file_type_extensions():
+    # Check all extensions are unique, otherwise we may
+    # get unexpected result from get_file_type
+    extensions = [
+        du.IMAGE_EXTENSIONS,
+        du.NETCDF_EXTENSIONS,
+        du.SKIP_EXTENSIONS,
+        du.PDF_EXTENSIONS,
+        du.CSV_EXTENSIONS,
+        du.UNSUPPORTED_EXTENSIONS,
+    ]
+    flat_list = [ext for x in extensions for ext in x]
+    assert len(set(flat_list)) == len(flat_list)
+
+
+@pytest.mark.util
+def test_get_file_type_nc4(dummy_nc1):
+    actual = du.get_file_type(dummy_nc1)
+    assert actual == "netcdf"
+
+
+@pytest.mark.parametrize(
+    "val1,val2,expected",
+    [
+        (1.22222, 1.222221, True),
+        (190.22222, 190.22221, False),
+        (0.999991, 1, True),
+        (0.999990, 1, False),
+    ],
+)
+@pytest.mark.util
+def test_is_equal_rounded(val1, val2, expected):
+    assert du.is_equal_rounded(val1, val2) == expected
+
+
+@pytest.mark.parametrize(
+    "nc_data,fields,expected,check_print",
+    [
+        (
+            # Compare exact same data
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                DEFAULT_NC[3],
+                DEFAULT_NC[4],
+            ],
+            None,
+            True,
+            None,
+        ),
+        # Field name differ
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                DEFAULT_NC[3],
+                "Foo",
+            ],
+            None,
+            False,
+            [
+                "ERROR: Field list differs between files",
+                "File_A: ['Latitude', 'Levels', 'Longitude', 'Temp', 'Time']",
+                "File_B:['Foo', 'Latitude', 'Levels', 'Longitude', 'Time']",
+            ],
+        ),
+        # One small value change
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                [
+                    [[1, 2], [3, 4], [5, 6]],
+                    [[2, 3], [4, 5], [6, 7]],
+                    [[30, 31], [33, 32], [34, 39.001]],
+                ],
+                DEFAULT_NC[4],
+            ],
+            None,
+            False,
+            [
+                "ERROR: Field (Temp) values differ",
+                "Min diff: -0.00",
+                "Max diff: 0.0",
+                "1 / 18 points differ",
+            ],
+        ),
+        # Value changed but not comparing that field
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                [
+                    [[1, 2], [3, 4], [5, 6]],
+                    [[2, 3], [4, 5], [6, 7]],
+                    [[30, 31], [33, 32], [34, 39.001]],
+                ],
+                DEFAULT_NC[4],
+            ],
+            ["Longitude", "Latitude", "Levels"],
+            True,
+            None,
+        ),
+        # Fails on nan values
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                [
+                    [[1, 2], [3, 4], [5, 6]],
+                    [[2, 3], [4, 5], [6, 7]],
+                    [[30, 31], [33, 32], [34, np.nan]],
+                ],
+                DEFAULT_NC[4],
+            ],
+            "Temp",
+            True,
+            [
+                "WARNING: Variable Temp contains NaN values.",
+                "Cannot perform comparison.",
+            ],
+        ),
+        # Field doesn't exist
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                DEFAULT_NC[3],
+                DEFAULT_NC[4],
+            ],
+            "Bar",
+            False,
+            ["ERROR: Field Bar not found"],
+        ),
+    ],
+)
+@pytest.mark.util
+def test_nc_is_equal(
+    capfd, tmp_path_factory, dummy_nc1, nc_data, fields, expected, check_print
+):
+    # make a dummy second file to compare to dummy_nc1
+    dummy_nc2 = make_nc(tmp_path_factory.mktemp("data2"), *nc_data)
+    assert du.nc_is_equal(dummy_nc1, dummy_nc2, fields=fields, debug=True) == expected
+
+    if check_print:
+        _statment_in_capfd(capfd, check_print)
+
+
+@pytest.mark.util
+def test_compare_files_not_exists(capfd, dummy_nc1):
+    actual = du.compare_files(dummy_nc1, "/This/is/fake.nc4", debug=True)
+    assert actual == (
+        "/This/is/fake.nc4",
+        "",
+        "file not found (in truth but missing now)",
+        "",
+    )
+
+
+file_content_1 = "Some text about meteorology.\n And some data 1, 2, 3!"
+file_content_2 = "Some other text about science.\n And some data 1.1, 2.2, 3.3!"
+
+csv_content_1 = "Apples, Milk, Bread\n6, 18, 4\n9, six, 66\n"
+csv_content_2 = "Oranges, Milk, Bread\n6, 18, 4\n9, 6, 2\n"
+
+
+@pytest.mark.parametrize(
+    "files_to_make_a,files_to_make_b,expected,check_print",
+    [
+        # simple check with 1 identical file
+        (
+            [("file1.txt", file_content_1)],
+            [("file1.txt", file_content_1)],
+            [],
+            "No differences found in any files",
+        ),
+        # Different content
+        (
+            [("file1.txt", file_content_1)],
+            [("file1.txt", file_content_2)],
+            [("dir_a/file1.txt", "dir_b/file1.txt", "Text diff", "")],
+            "ERROR: Line differs",
+        ),
+        # Check CSV
+        ([("file1.csv", csv_content_1)], [("file1.csv", csv_content_1)], [], None),
+        # check .conf skipped
+        (
+            [("file1.csv", csv_content_1), ("metplus_final.conf", file_content_1)],
+            [("file1.csv", csv_content_1)],
+            [],
+            "No differences found in any files",
+        ),
+        # Check files in dir_b not in dir_a
+        (
+            [
+                ("file1.csv", csv_content_1),
+            ],
+            [
+                ("file1.csv", csv_content_1),
+                ("file3.txt", file_content_1),
+                ("metplus_final.conf", file_content_1),
+            ],
+            [("", "dir_b/file3.txt", "file not found (new output)", "")],
+            "ERROR: File does not exist:",
+        ),
+        # TODO: add more combinations here
+    ],
+)
+@pytest.mark.util
+def test_compare_dir(
+    capfd, tmp_path_factory, files_to_make_a, files_to_make_b, expected, check_print
+):
+    # make two temp direcotries
+    dir_a = tmp_path_factory.mktemp("dir_a")
+    dir_b = tmp_path_factory.mktemp("dir_b")
+
+    # update the expected value with the actual temp dir
+    def _str_replace(t, str1, str2):
+        return tuple(x.replace(str1, str2) for x in t)
+
+    expected = [_str_replace(x, "dir_a", str(dir_a)) for x in expected]
+    expected = [_str_replace(x, "dir_b", str(dir_b)) for x in expected]
+
+    # make all the test files
+    for name, content in files_to_make_a:
+        with open(dir_a / name, "w") as f:
+            f.write(content)
+
+    for name, content in files_to_make_b:
+        with open(dir_b / name, "w") as f:
+            f.write(content)
+
+    actual = du.compare_dir(str(dir_a), str(dir_b), debug=True)
+    assert actual == expected
+    if check_print:
+        _statment_in_capfd(capfd, check_print)