dtcenter · georgemccabe · Jul 19, 2023 · Jul 19, 2023
diff --git a/docs/Contributors_Guide/testing.rst b/docs/Contributors_Guide/testing.rst
@@ -31,12 +31,18 @@ Running
 
 To run the unit tests, set the environment variable
 **METPLUS_TEST_OUTPUT_BASE** to a path where the user running has write
-permissions, nativate to the METplus directory, then call pytest::
+permissions, navigate to the METplus directory, then call pytest::
 
     export METPLUS_TEST_OUTPUT_BASE=/d1/personal/${USER}/pytest
     cd METplus
     pytest internal/tests/pytests
 
+Some of the tests use your default location for writing temporary files,
+typically `/tmp`. If you do not have write permissions this may cause an
+error. To override this behaviour you can specify the default directory::
+
+    pytest --basetemp=${METPLUS_TEST_OUTPUT_BASE}/tmp internal/tests/pytests
+
 A report will be output showing which pytest categories failed.
 To view verbose test output, add the **-vv** argument::
 

diff --git a/internal/tests/pytests/util/diff_util/test_diff_util.py b/internal/tests/pytests/util/diff_util/test_diff_util.py
@@ -1,10 +1,12 @@
 import pytest
 
+from netCDF4 import Dataset
 import os
 import shutil
 import uuid
+from unittest import mock
 
-from metplus.util.diff_util import dirs_are_equal, ROUNDING_OVERRIDES
+from metplus.util import diff_util as du
 from metplus.util import mkdir_p
 
 test_output_dir = os.path.join(os.environ['METPLUS_TEST_OUTPUT_BASE'],
@@ -21,6 +23,69 @@
 csv_val_2 = 'Kenny-Smith, Ambrose, 0.8977'
 
 
+DEFAULT_NC = [
+    [359, 0, 1],  # lon
+    [-1, 0, 1],  # lat
+    [0, 1],  # z
+    [  # data
+        [[1, 2], [3, 4], [5, 6]],
+        [[2, 3], [4, 5], [6, 7]],
+        [[30, 31], [33, 32], [34, 39]],
+    ],
+    "Temp",  # variable
+]
+
+@pytest.fixture(scope="module")
+def dummy_nc1(tmp_path_factory):
+    # Construct a temporary netCDF file
+    return make_nc(
+        tmp_path_factory.mktemp("data1"),
+        DEFAULT_NC[0],
+        DEFAULT_NC[1],
+        DEFAULT_NC[2],
+        DEFAULT_NC[3],
+        DEFAULT_NC[4],
+    )
+
+
+def _statment_in_capfd(capfd, check_print):
+    out, _ = capfd.readouterr()
+    print("out: ", out)
+    for statement in check_print:
+        assert statement in out
+
+
+def make_nc(tmp_path, lon, lat, z, data, variable="Temp"):
+    # Make a dummy netCDF file. We can do this with a lot less
+    # code if xarray is available.
+
+    # Note: "nc4" is not included in NETCDF_EXTENSIONS, hence
+    # we use it here to specifically trigger the call to
+    # netCDF.Dataset in get_file_type.
+    file_name = tmp_path / "fake.nc4"
+    with Dataset(file_name, "w", format="NETCDF4") as rootgrp:
+        # diff_util can't deal with groups, so attach dimensions
+        # and variables to the root group.
+        rootgrp.createDimension("lon", len(lon))
+        rootgrp.createDimension("lat", len(lat))
+        rootgrp.createDimension("z", len(z))
+        rootgrp.createDimension("time", None)
+
+        # create variables
+        longitude = rootgrp.createVariable("Longitude", "f4", "lon")
+        latitude = rootgrp.createVariable("Latitude", "f4", "lat")
+        levels = rootgrp.createVariable("Levels", "i4", "z")
+        temp = rootgrp.createVariable(variable, "f4", ("time", "lon", "lat", "z"))
+        time = rootgrp.createVariable("Time", "i4", "time")
+
+        longitude[:] = lon
+        latitude[:] = lat
+        levels[:] = z
+        temp[0, :, :, :] = data
+
+    return file_name
+
+
 def create_diff_files(files_a, files_b):
     unique_id = str(uuid.uuid4())[0:8]
     dir_a = os.path.join(test_output_dir, f'diff_{unique_id}', 'a')
@@ -140,16 +205,171 @@ def write_test_files(dirname, files):
 def test_diff_dir_text_files(a_files, b_files, rounding_override, expected_is_equal):
     if rounding_override:
         for filename in a_files:
-            ROUNDING_OVERRIDES[filename] = rounding_override
+            du.ROUNDING_OVERRIDES[filename] = rounding_override
 
     a_dir, b_dir = create_diff_files(a_files, b_files)
-    assert dirs_are_equal(a_dir, b_dir) == expected_is_equal
+    assert du.dirs_are_equal(a_dir, b_dir) == expected_is_equal
 
     # pass individual files instead of entire directory
     for filename in a_files:
         if filename in b_files:
             a_path = os.path.join(a_dir, filename)
             b_path = os.path.join(b_dir, filename)
-            assert dirs_are_equal(a_path, b_path) == expected_is_equal
+            assert du.dirs_are_equal(a_path, b_path) == expected_is_equal
 
     shutil.rmtree(os.path.dirname(a_dir))
+
+
+@pytest.mark.parametrize(
+    "path,expected",
+    [
+        ("/path/to/file.csv", "csv"),
+        ("/path/to/file.jpeg", "image"),
+        ("/path/to/file.jpg", "image"),
+        ("/path/to/file.nc", "netcdf"),
+        ("/path/to/file.cdf", "netcdf"),
+        ("/path/to/file.pdf", "pdf"),
+        ("/path/to/file.zip", "skip .zip"),
+        ("/path/to/file.png", "image"),
+        ("/path/to/file.bigfoot", "unknown"),
+    ],
+)
+@pytest.mark.util
+def test_get_file_type(path, expected):
+    actual = du.get_file_type(path)
+    assert actual == expected
+
+
+@mock.patch.object(du, "UNSUPPORTED_EXTENSIONS", [".foo"])
+@pytest.mark.util
+def test_get_file_type_unsupported():
+    actual = du.get_file_type("/path/to/file.foo")
+    assert actual == "unsupported .foo"
+
+
+@pytest.mark.util
+def test_get_file_type_extensions():
+    # Check all extensions are unique, otherwise we may
+    # get unexpected result from get_file_type
+    extensions = [
+        du.IMAGE_EXTENSIONS,
+        du.NETCDF_EXTENSIONS,
+        du.SKIP_EXTENSIONS,
+        du.PDF_EXTENSIONS,
+        du.CSV_EXTENSIONS,
+        du.UNSUPPORTED_EXTENSIONS,
+    ]
+    flat_list = [ext for x in extensions for ext in x]
+    assert len(set(flat_list)) == len(flat_list)
+
+
+@pytest.mark.parametrize(
+    "nc_data,fields,expected,check_print",
+    [
+        (
+            # Compare exact same data
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                DEFAULT_NC[3],
+                DEFAULT_NC[4],
+            ],
+            None,
+            True,
+            None,
+        ),
+        # Field name differ
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                DEFAULT_NC[3],
+                "Foo",
+            ],
+            None,
+            False,
+            [
+                "ERROR: Field list differs between files",
+                "File_A: ['Latitude', 'Levels', 'Longitude', 'Temp', 'Time']",
+                "File_B:['Foo', 'Latitude', 'Levels', 'Longitude', 'Time']",
+            ],
+        ),
+        # One small value change
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                [
+                    [[1, 2], [3, 4], [5, 6]],
+                    [[2, 3], [4, 5], [6, 7]],
+                    [[30, 31], [33, 32], [34, 39.1]],
+                ],
+                DEFAULT_NC[4],
+            ],
+            None,
+            False,
+            [
+                "ERROR: Field (Temp) values differ",
+            ],
+        ),
+        # Value changed but not comparing that field
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                [
+                    [[1, 2], [3, 4], [5, 6]],
+                    [[2, 3], [4, 5], [6, 7]],
+                    [[30, 31], [33, 32], [34, 39.001]],
+                ],
+                DEFAULT_NC[4],
+            ],
+            ["Longitude", "Latitude", "Levels"],
+            True,
+            None,
+        ),
+        # Field doesn't exist
+        (
+            [
+                DEFAULT_NC[0],
+                DEFAULT_NC[1],
+                DEFAULT_NC[2],
+                DEFAULT_NC[3],
+                DEFAULT_NC[4],
+            ],
+            "Bar",
+            False,
+            ["ERROR: Field Bar not found"],
+        ),
+    ],
+)
+@pytest.mark.util
+def test_nc_is_equal(
+    capfd, tmp_path_factory, dummy_nc1, nc_data, fields, expected, check_print
+):
+    # make a dummy second file to compare to dummy_nc1
+    dummy_nc2 = make_nc(tmp_path_factory.mktemp("data2"), *nc_data)
+    assert du.nc_is_equal(dummy_nc1, dummy_nc2, fields=fields, debug=True) == expected
+
+    if check_print:
+        _statment_in_capfd(capfd, check_print)
+
+
+@pytest.mark.parametrize(
+    "val,expected",[
+    # Add (numpy.float32(44.54), True) if numpy available as this
+    # is what is actually tested when comparing netCDF4.Dataset
+    (-0.15, True),
+    ("-123,456.5409", False), # Check this is intended ?!
+    ("2345j", False),
+    ("-12345.244", True),
+    ("foo", False)
+    ]
+)
+@pytest.mark.util
+def test__is_number(val, expected):
+    assert du._is_number(val) == expected
diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
@@ -5,6 +5,7 @@
 import netCDF4
 import filecmp
 import csv
+from numbers import Number
 from PIL import Image, ImageChops
 from pandas import isnull
 from numpy.ma import is_masked
@@ -94,7 +95,7 @@ def get_file_type(filepath):
         return 'pdf'
 
     if file_extension in UNSUPPORTED_EXTENSIONS:
-        return f'unsupported{file_extension}'
+        return f'unsupported {file_extension}'
 
     return 'unknown'
 
@@ -478,6 +479,8 @@ def _is_equal_rounded(value_a, value_b):
 
 
 def _is_number(value):
+    if isinstance(value, Number):
+        return True
     return value.replace('.', '1').replace('-', '1').strip().isdigit()