From 2c0332abb56dae4c182053b873a14b0d88f4ce27 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 12:05:34 -0600
Subject: [PATCH 01/18] update diff util and test that changes properly diff
 failling diffs, ci-run-diff

---
 .github/parm/use_case_groups.json |   4 +-
 metplus/util/diff_util.py         | 422 +++++++++++++++++++-----------
 2 files changed, 265 insertions(+), 161 deletions(-)

diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json
index 0d5e34a9eb..a6fdaf3129 100644
--- a/.github/parm/use_case_groups.json
+++ b/.github/parm/use_case_groups.json
@@ -7,7 +7,7 @@
   {
     "category": "met_tool_wrapper",
     "index_list": "30-58",
-    "run": false
+    "run": true
   },
   {
     "category": "air_quality_and_comp",
@@ -227,7 +227,7 @@
   {
     "category": "tc_and_extra_tc",
     "index_list": "0-2",
-    "run": false
+    "run": true
   },
   {
     "category": "tc_and_extra_tc",
diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index 611732ab9b..a77f9df8c7 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -1,9 +1,13 @@
+#! /usr/bin/env python3
+
+import sys
 import os
 import netCDF4
 import filecmp
 import csv
 from PIL import Image, ImageChops
 import numpy
+from numpy.core._exceptions import UFuncTypeError
 
 IMAGE_EXTENSIONS = [
     '.jpg',
@@ -37,6 +41,7 @@
 # Note: Completing METplus issue #1873 could allow this to be set to 6
 ROUNDING_PRECISION = 5
 
+
 def get_file_type(filepath):
     _, file_extension = os.path.splitext(filepath)
 
@@ -55,7 +60,7 @@ def get_file_type(filepath):
     try:
         netCDF4.Dataset(filepath)
         return 'netcdf'
-    except:
+    except OSError:
         pass
 
     if file_extension in SKIP_EXTENSIONS:
@@ -81,63 +86,69 @@ def compare_dir(dir_a, dir_b, debug=False, save_diff=False):
         return [result]
 
     diff_files = []
-    for root, _, files in os.walk(dir_a):
-        # skip logs directories
-        if root.endswith('logs'):
+    for filepath_a in _get_files(dir_a):
+        filepath_b = filepath_a.replace(dir_a, dir_b)
+        print("\n# # # # # # # # # # # # # # # # # # # # # # # # # # "
+              "# # # #\n")
+        rel_path = filepath_a.replace(f'{dir_a}/', '')
+        print(f"COMPARING {rel_path}")
+        result = compare_files(filepath_a,
+                               filepath_b,
+                               debug=debug,
+                               dir_a=dir_a,
+                               dir_b=dir_b,
+                               save_diff=save_diff)
+
+        # no differences of skipped
+        if result is None or result is True:
             continue
 
-        for filename in files:
-            filepath_a = os.path.join(root, filename)
+        diff_files.append(result)
 
-            # skip directories
-            if not os.path.isfile(filepath_a):
-                continue
+    # loop through dir_b and report if any files are not found in dir_a
+    for filepath_b in _get_files(dir_b):
+        filepath_a = filepath_b.replace(dir_b, dir_a)
+        if os.path.exists(filepath_a):
+            continue
+        # check if missing file is actually diff file that was generated
+        diff_list = [item[3] for item in diff_files]
+        if filepath_b in diff_list:
+            continue
+        print(f"ERROR: File does not exist: {filepath_a}")
+        diff_files.append(('', filepath_b, 'file not found (new output)', ''))
 
-            # skip final conf file
-            if 'metplus_final.conf' in os.path.basename(filepath_a):
-                continue
+    print('::endgroup::')
 
-            filepath_b = filepath_a.replace(dir_a, dir_b)
-            print("\n# # # # # # # # # # # # # # # # # # # # # # # # # # "
-                  "# # # #\n")
-            rel_path = filepath_a.replace(f'{dir_a}/', '')
-            print(f"COMPARING {rel_path}")
-            result = compare_files(filepath_a,
-                                   filepath_b,
-                                   debug=debug,
-                                   dir_a=dir_a,
-                                   dir_b=dir_b,
-                                   save_diff=save_diff)
+    _print_dir_summary(diff_files)
+    return diff_files
 
-            # no differences of skipped
-            if result is None or result is True:
-                continue
 
-            diff_files.append(result)
+def _get_files(search_dir):
+    """!Generator to get all files in a directory.
+    Skips directories that end with 'logs' and files named metplus_final.conf
 
-    # loop through dir_b and report if any files are not found in dir_a
-    for root, _, files in os.walk(dir_b):
+    @param search_dir directory to search recursively
+    """
+    for root, _, files in os.walk(search_dir):
         # skip logs directories
         if root.endswith('logs'):
             continue
 
         for filename in files:
-            filepath_b = os.path.join(root, filename)
+            filepath = os.path.join(root, filename)
+
+            # skip directories
+            if not os.path.isfile(filepath):
+                continue
 
             # skip final conf file
-            if 'metplus_final.conf' in os.path.basename(filepath_b):
+            if 'metplus_final.conf' in os.path.basename(filepath):
                 continue
 
-            filepath_a = filepath_b.replace(dir_b, dir_a)
-            if not os.path.exists(filepath_a):
-                # check if missing file is actually diff file that was generated
-                diff_list = [item[3] for item in diff_files]
-                if filepath_b in diff_list:
-                    continue
-                print(f"ERROR: File does not exist: {filepath_a}")
-                diff_files.append(('', filepath_b, 'file not found (new output)', ''))
+            yield filepath
 
-    print('::endgroup::')
+
+def _print_dir_summary(diff_files):
     print("\n\n**************************************************\nSummary:\n")
     if diff_files:
         print("\nERROR: Some differences were found")
@@ -151,7 +162,6 @@ def compare_dir(dir_a, dir_b, debug=False, save_diff=False):
 
     print("Finished comparing directories\n"
           "**************************************************\n\n")
-    return diff_files
 
 
 def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None,
@@ -178,47 +188,16 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None,
         return filepath_a, filepath_b, file_type, ''
 
     if file_type == 'csv':
-        print('Comparing CSV')
-        if not compare_csv_files(filepath_a, filepath_b):
-            print(f'ERROR: CSV file differs: {filepath_b}')
-            return filepath_a, filepath_b, 'CSV diff', ''
-
-        print("No differences in CSV files")
-        return True
+        return _handle_csv_files(filepath_a, filepath_b)
 
     if file_type == 'netcdf':
-        print("Comparing NetCDF")
-        if not nc_is_equal(filepath_a, filepath_b):
-            return filepath_a, filepath_b, 'NetCDF diff', ''
-
-        print("No differences in NetCDF files")
-        return True
+        return _handle_netcdf_files(filepath_a, filepath_b)
 
     if file_type == 'pdf':
-        print("Comparing PDF as images")
-        diff_file = compare_pdf_as_images(filepath_a, filepath_b,
-                                          save_diff=save_diff)
-        if diff_file is True:
-            print("No differences in PDF files")
-            return True
-
-        if diff_file is False:
-            diff_file = ''
-
-        return filepath_a, filepath_b, 'PDF diff', diff_file
+        return _handle_pdf_files(filepath_a, filepath_b, save_diff)
 
     if file_type == 'image':
-        print("Comparing images")
-        diff_file = compare_image_files(filepath_a, filepath_b,
-                                        save_diff=save_diff)
-        if diff_file is True:
-            print("No differences in image files")
-            return True
-
-        if diff_file is False:
-            diff_file = ''
-
-        return filepath_a, filepath_b, 'Image diff', diff_file
+        return _handle_image_files(filepath_a, filepath_b, save_diff)
 
     # if not any of the above types, use diff to compare
     print("Comparing text files")
@@ -236,6 +215,53 @@ def compare_files(filepath_a, filepath_b, debug=False, dir_a=None, dir_b=None,
     return True
 
 
+def _handle_csv_files(filepath_a, filepath_b):
+    print('Comparing CSV')
+    if not compare_csv_files(filepath_a, filepath_b):
+        print(f'ERROR: CSV file differs: {filepath_b}')
+        return filepath_a, filepath_b, 'CSV diff', ''
+
+    print("No differences in CSV files")
+    return True
+
+
+def _handle_netcdf_files(filepath_a, filepath_b):
+    print("Comparing NetCDF")
+    if not nc_is_equal(filepath_a, filepath_b):
+        return filepath_a, filepath_b, 'NetCDF diff', ''
+
+    print("No differences in NetCDF files")
+    return True
+
+
+def _handle_pdf_files(filepath_a, filepath_b, save_diff):
+    print("Comparing PDF as images")
+    diff_file = compare_pdf_as_images(filepath_a, filepath_b,
+                                      save_diff=save_diff)
+    if diff_file is True:
+        print("No differences in PDF files")
+        return True
+
+    if diff_file is False:
+        diff_file = ''
+
+    return filepath_a, filepath_b, 'PDF diff', diff_file
+
+
+def _handle_image_files(filepath_a, filepath_b, save_diff):
+    print("Comparing images")
+    diff_file = compare_image_files(filepath_a, filepath_b,
+                                    save_diff=save_diff)
+    if diff_file is True:
+        print("No differences in image files")
+        return True
+
+    if diff_file is False:
+        diff_file = ''
+
+    return filepath_a, filepath_b, 'Image diff', diff_file
+
+
 def compare_pdf_as_images(filepath_a, filepath_b, save_diff=False):
     try:
         from pdf2image import convert_from_path
@@ -307,15 +333,20 @@ def compare_csv_files(filepath_a, filepath_b):
     lines_b = []
 
     with open(filepath_a, 'r') as file_handle:
-        csv_read = csv.DictReader(file_handle, delimiter=',')
-        for row in csv_read:
-            lines_a.append(row)
+        lines_a.extend(csv.DictReader(file_handle, delimiter=','))
 
     with open(filepath_b, 'r') as file_handle:
-        csv_read = csv.DictReader(file_handle, delimiter=',')
-        for row in csv_read:
-            lines_b.append(row)
+        lines_b.extend(csv.DictReader(file_handle, delimiter=','))
 
+    # compare header values and number of lines
+    if not _compare_csv_lengths(lines_a, lines_b):
+        return False
+
+    # compare each CSV column
+    return _compare_csv_columns(lines_a, lines_b)
+
+
+def _compare_csv_lengths(lines_a, lines_b):
     keys_a = lines_a[0].keys()
     keys_b = lines_b[0].keys()
     # compare header columns and report error if they differ
@@ -337,7 +368,11 @@ def compare_csv_files(filepath_a, filepath_b):
               f'than in OUTPUT ({len(lines_b)})')
         return False
 
-    # compare each CSV column
+    return True
+
+
+def _compare_csv_columns(lines_a, lines_b):
+    keys_a = lines_a[0].keys()
     status = True
     for num, (line_a, line_b) in enumerate(zip(lines_a, lines_b), start=1):
         for key in keys_a:
@@ -349,7 +384,7 @@ def compare_csv_files(filepath_a, filepath_b):
             # ROUNDING_PRECISION decimal places
             # METplus issue #1873 addresses the real problem
             try:
-                if is_equal_rounded(val_a, val_b):
+                if _is_equal_rounded(val_a, val_b):
                     continue
                 print(f"ERROR: Line {num} - {key} differs by "
                       f"less than {ROUNDING_PRECISION} decimals: "
@@ -364,7 +399,7 @@ def compare_csv_files(filepath_a, filepath_b):
     return status
 
 
-def is_equal_rounded(value_a, value_b):
+def _is_equal_rounded(value_a, value_b):
     if _truncate_float(value_a) == _truncate_float(value_b):
         return True
     if _round_float(value_a) == _round_float(value_b):
@@ -425,10 +460,8 @@ def compare_txt_files(filepath_a, filepath_b, dir_a=None, dir_b=None):
     if is_stat_file:
         print("Comparing stat file")
         header_a = lines_a.pop(0).split()[1:]
-        header_b = lines_b.pop(0).split()[1:]
     else:
         header_a = None
-        header_b = None
 
     if len(lines_a) != len(lines_b):
         print(f"ERROR: Different number of lines in {filepath_b}")
@@ -475,110 +508,181 @@ def diff_text_lines(lines_a, lines_b,
             compare_b = compare_b.replace(dir_b, dir_a)
 
         # check for differences
-        if compare_a != compare_b:
-            # if the diff is in a stat file, ignore the version number
-            if is_stat_file:
-                cols_a = compare_a.split()[1:]
-                cols_b = compare_b.split()[1:]
-                for col_a, col_b, label in zip(cols_a, cols_b, header_a):
-                    if col_a != col_b:
-                        if print_error:
-                            print(f"ERROR: {label} differs:\n"
-                                  f" A: {col_a}\n B: {col_b}")
-                        all_good = False
-            else:
-                if print_error:
-                    print(f"ERROR: Line differs\n"
-                          f" A: {compare_a}\n B: {compare_b}")
+        if compare_a == compare_b:
+            continue
+
+        # if the diff is in a stat file, ignore the version number
+        if is_stat_file:
+            if not _diff_stat_line(compare_a, compare_b, header_a, print_error=print_error):
                 all_good = False
+            continue
+
+        if print_error:
+            print(f"ERROR: Line differs\n"
+                  f" A: {compare_a}\n B: {compare_b}")
+        all_good = False
+
+    return all_good
+
+
+def _diff_stat_line(compare_a, compare_b, header_a, print_error=False):
+    """Compare values in .stat file. Ignore first column which contains MET
+    version number
+
+    @param compare_a list of values in line A
+    @param compare_b list of values in line B
+    @param header_a list of header values in file A excluding MET version
+    @param print_error If True, print an error message if any value differs
+    """
+    cols_a = compare_a.split()[1:]
+    cols_b = compare_b.split()[1:]
+    all_good = True
+    for col_a, col_b, label in zip(cols_a, cols_b, header_a):
+        if col_a == col_b:
+            continue
+        if print_error:
+            print(f"ERROR: {label} differs:\n"
+                  f" A: {col_a}\n B: {col_b}")
+        all_good = False
 
     return all_good
 
 
 def nc_is_equal(file_a, file_b, fields=None, debug=False):
     """! Check if two NetCDF files have the same data
-         @param file_a first file to compare
-         @param file_b second file to compare
-         @param fields (Optional) list of fields to compare. If unset, compare
-          all fields
-         @returns True if all values in fields are equivalent, False if not
+
+    @param file_a first file to compare
+    @param file_b second file to compare
+    @param fields (Optional) list of fields to compare. If unset, compare all
+    @param debug (optional) boolean to output more information about diff
+    @returns True if all values in fields are equivalent, False if not
     """
     nc_a = netCDF4.Dataset(file_a)
     nc_b = netCDF4.Dataset(file_b)
 
+    # keep track of any differences that are found
+    is_equal = True
+
     # if no fields are specified, get all of them
     if fields:
-        if not isinstance(fields, list):
-            field_list = [fields]
-        else:
-            field_list = fields
+        field_list = [fields] if not isinstance(fields, list) else fields
     else:
         a_fields = sorted(nc_a.variables.keys())
         b_fields = sorted(nc_b.variables.keys())
+        # fail if any fields exist in 1 file and not the other
         if a_fields != b_fields:
             print("ERROR: Field list differs between files\n"
                   f" File_A: {a_fields}\n File_B:{b_fields}\n"
                   f"Using File_A fields.")
+            is_equal = False
 
         field_list = a_fields
 
     # loop through fields, keeping track of any differences
-    is_equal = True
-    try:
-        for field in field_list:
-            var_a = nc_a.variables[field]
-            var_b = nc_b.variables[field]
-
-            if debug:
-                print(f"Field: {field}")
-                print(f"Var_A:{var_a}\nVar_B:{var_b}")
-                print(f"Instance type: {type(var_a[0])}")
-            try:
-                values_a = var_a[:]
-                values_b = var_b[:]
-                values_diff = values_a - values_b
-                if (numpy.isnan(values_diff.min()) and
-                        numpy.isnan(values_diff.max())):
-                    print(f"WARNING: Variable {field} contains NaN values. "
-                          "Cannot perform comparison.")
-                elif values_diff.min() != 0.0 or values_diff.max() != 0.0:
-                    print(f"ERROR: Field ({field}) values differ\n"
-                          f"Min diff: {values_diff.min()}, "
-                          f"Max diff: {values_diff.max()}")
-                    is_equal = False
-                    # print indices that are not zero and count of diffs
-                    if debug:
-                        count = 0
-                        values_list = [j for sub in values_diff.tolist()
-                                       for j in sub]
-                        for idx, val in enumerate(values_list):
-                            if val != 0.0:
-                                print(f"{idx}: {val}")
-                                count += 1
-                        print(f"{count} / {idx+1} points differ")
-
-            except:
-                # handle non-numeric fields
-                try:
-                    if any(var_a[:].flatten() != var_b[:].flatten()):
-                        print(f"ERROR: Field ({field}) values (non-numeric) "
-                              "differ\n"
-                              f" File_A: {var_a[:]}\n File_B: {var_b[:]}")
-                        is_equal = False
-                except:
-                    print("ERROR: Couldn't diff NetCDF files, need to update diff method")
-                    is_equal = False
+    for field in field_list:
+        if not _nc_fields_are_equal(field, nc_a, nc_b, debug=debug):
+            is_equal = False
 
+    return is_equal
+
+
+def _nc_fields_are_equal(field, nc_a, nc_b, debug=False):
+    """!Compare same field from 2 NetCDF files.
+
+    @param field name of field to compare
+    @param nc_a first netCDF4.Dataset
+    @param nc_b first netCDF4.Dataset
+    @param debug (optional) boolean to output more information about diff
+    @returns True is fields are equal, False if fields are not equal or if
+    field is not found in one of the files
+    """
+    try:
+        var_a = nc_a.variables[field]
+        var_b = nc_b.variables[field]
     except KeyError:
         print(f"ERROR: Field {field} not found")
         return False
 
-    return is_equal
+    if debug:
+        print(f"Field: {field}")
+        print(f"Var_A:{var_a}\nVar_B:{var_b}")
+        print(f"Instance type: {type(var_a[0])}")
+
+    values_a = var_a[:]
+    values_b = var_b[:]
+    try:
+        values_diff = values_a - values_b
+    except (UFuncTypeError, TypeError):
+        # handle non-numeric fields
+        if not _all_values_are_equal(var_a, var_b):
+            print(f"ERROR: Field ({field}) values (non-numeric) "
+                  "differ\n"
+                  f" File_A: {var_a[:]}\n File_B: {var_b[:]}")
+            return False
+
+        return True
+
+    # if any NaN values in either data set, min and max of diff will be NaN
+    # compare each value
+    if numpy.isnan(values_diff.min()) and numpy.isnan(values_diff.max()):
+        print(f"Variable {field} contains NaN. Comparing each value...")
+        if not _all_values_are_equal(var_a, var_b):
+            print(f'ERROR: Some values differ in {field}')
+            return False
+        return True
+
+    # consider all values equal is min and max diff are 0
+    if not values_diff.min() and not values_diff.max():
+        return True
+
+    print(f"ERROR: Field ({field}) values differ\n"
+          f"Min diff: {values_diff.min()}, "
+          f"Max diff: {values_diff.max()}")
+    if debug:
+        # print indices that are not zero and count of diffs
+        _print_nc_field_diff_summary(values_diff)
+
+    return False
+
+
+def _print_nc_field_diff_summary(values_diff):
+    """!Print summary of NetCDF fields that differ. Prints the index of each
+    point that differs with the numeric difference between the points.
+    Also print number of points that differ and the total number of points.
+
+    @param values_diff numpy array (possibly 2D) of differences
+    """
+    count = 0
+    values_list = [j for sub in values_diff.tolist() for j in sub]
+    for idx, val in enumerate(values_list):
+        if val != 0.0:
+            print(f"{idx}: {val}")
+            count += 1
+    print(f"{count} / {idx + 1} points differ")
+
+
+def _all_values_are_equal(var_a, var_b):
+    """!Compare each value to find differences. Handles case if both values
+    are NaN.
+
+    @param var_a Numpy array
+    @param var_b Numpy array
+    @returns True if all values are equal, False otherwise
+    """
+    for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
+        # continue to next value if both values are NaN
+        if numpy.isnan(val_a) and numpy.isnan(val_b):
+            continue
+        if val_a != val_b:
+            return False
+    return True
 
 
 if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        print('ERROR: Must supply 2 directories to compare as arguments')
+        sys.exit(1)
     dir_a = sys.argv[1]
     dir_b = sys.argv[2]
-    if len(sys.argv) > 3:
-        save_diff = True
+    save_diff = len(sys.argv) > 3
     compare_dir(dir_a, dir_b, debug=True, save_diff=save_diff)

From 905b409bc16b5b8870b0202e8a820bb34e3f1735 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 13:44:38 -0600
Subject: [PATCH 02/18] turn off use case

---
 .github/parm/use_case_groups.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json
index a6fdaf3129..131e0eed72 100644
--- a/.github/parm/use_case_groups.json
+++ b/.github/parm/use_case_groups.json
@@ -137,7 +137,7 @@
   {
     "category": "s2s",
     "index_list": "3",
-    "run": true
+    "run": false
   },
   {
     "category": "s2s",

From c2b6e3a37dc9d07307dc11560dc1705da2c6ae63 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 13:48:25 -0600
Subject: [PATCH 03/18] catch any exception that occurs in NetCDF file diff,
 ci-skip-unit-tests

---
 metplus/util/diff_util.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index a77f9df8c7..680f85fa6b 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -669,12 +669,16 @@ def _all_values_are_equal(var_a, var_b):
     @param var_b Numpy array
     @returns True if all values are equal, False otherwise
     """
-    for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
-        # continue to next value if both values are NaN
-        if numpy.isnan(val_a) and numpy.isnan(val_b):
-            continue
-        if val_a != val_b:
-            return False
+    try:
+        for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
+            # continue to next value if both values are NaN
+            if numpy.isnan(val_a) and numpy.isnan(val_b):
+                continue
+            if val_a != val_b:
+                return False
+    except Exception as err:
+        print(f'ERROR: Exception occurred: {err}')
+        return False
     return True
 
 

From 5fdee80f510df17d7765521dec95cc2cd766765c Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 14:10:10 -0600
Subject: [PATCH 04/18] catch any exception from file diff instead of just in
 NetCDF diff, ci-skip-unit-tests

---
 metplus/util/diff_util.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index 680f85fa6b..be63dd034c 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -92,12 +92,16 @@ def compare_dir(dir_a, dir_b, debug=False, save_diff=False):
               "# # # #\n")
         rel_path = filepath_a.replace(f'{dir_a}/', '')
         print(f"COMPARING {rel_path}")
-        result = compare_files(filepath_a,
-                               filepath_b,
-                               debug=debug,
-                               dir_a=dir_a,
-                               dir_b=dir_b,
-                               save_diff=save_diff)
+        try:
+            result = compare_files(filepath_a,
+                                   filepath_b,
+                                   debug=debug,
+                                   dir_a=dir_a,
+                                   dir_b=dir_b,
+                                   save_diff=save_diff)
+        except Exception as err:
+            print(f"ERROR: Exception occurred in diff logic: {err}")
+            result = filepath_a, filepath_b, 'Exception in diff logic', ''
 
         # no differences of skipped
         if result is None or result is True:
@@ -669,16 +673,12 @@ def _all_values_are_equal(var_a, var_b):
     @param var_b Numpy array
     @returns True if all values are equal, False otherwise
     """
-    try:
-        for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
-            # continue to next value if both values are NaN
-            if numpy.isnan(val_a) and numpy.isnan(val_b):
-                continue
-            if val_a != val_b:
-                return False
-    except Exception as err:
-        print(f'ERROR: Exception occurred: {err}')
-        return False
+    for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
+        # continue to next value if both values are NaN
+        if numpy.isnan(val_a) and numpy.isnan(val_b):
+            continue
+        if val_a != val_b:
+            return False
     return True
 
 

From 21af4b24d45bc7c461d0892d6fc563411a825e9b Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 14:12:58 -0600
Subject: [PATCH 05/18] forgot to trigger diff, ci-run-diff, ci-skip-unit-tests

---
 metplus/util/diff_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index be63dd034c..437302eb13 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -627,7 +627,7 @@ def _nc_fields_are_equal(field, nc_a, nc_b, debug=False):
         return True
 
     # if any NaN values in either data set, min and max of diff will be NaN
-    # compare each value
+    # compare each value 
     if numpy.isnan(values_diff.min()) and numpy.isnan(values_diff.max()):
         print(f"Variable {field} contains NaN. Comparing each value...")
         if not _all_values_are_equal(var_a, var_b):

From 2172134b869ec6f3bf78be7f07b78db5e61fa9b5 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 14:16:06 -0600
Subject: [PATCH 06/18] use pandas.isnull instead of numpy.isnan to prevent
 exceptions from occurring, ci-run-diff, ci-skip-unit-tests

---
 metplus/util/diff_util.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index 437302eb13..f9ca1215f1 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -6,7 +6,7 @@
 import filecmp
 import csv
 from PIL import Image, ImageChops
-import numpy
+from pandas import isnull
 from numpy.core._exceptions import UFuncTypeError
 
 IMAGE_EXTENSIONS = [
@@ -627,8 +627,8 @@ def _nc_fields_are_equal(field, nc_a, nc_b, debug=False):
         return True
 
     # if any NaN values in either data set, min and max of diff will be NaN
-    # compare each value 
-    if numpy.isnan(values_diff.min()) and numpy.isnan(values_diff.max()):
+    # compare each value
+    if isnull(values_diff.min()) and isnull(values_diff.max()):
         print(f"Variable {field} contains NaN. Comparing each value...")
         if not _all_values_are_equal(var_a, var_b):
             print(f'ERROR: Some values differ in {field}')
@@ -675,7 +675,7 @@ def _all_values_are_equal(var_a, var_b):
     """
     for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
         # continue to next value if both values are NaN
-        if numpy.isnan(val_a) and numpy.isnan(val_b):
+        if isnull(val_a) and isnull(val_b):
             continue
         if val_a != val_b:
             return False

From eefaa0c90eb71d0361941d7677f0a0d4efcd5cce Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 14:41:49 -0600
Subject: [PATCH 07/18] capture failure in diff script, ci-run-diff,
 ci-skip-unit-tests

---
 .github/jobs/run_difference_tests.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/jobs/run_difference_tests.sh b/.github/jobs/run_difference_tests.sh
index 49e7242ec8..333e0372e7 100755
--- a/.github/jobs/run_difference_tests.sh
+++ b/.github/jobs/run_difference_tests.sh
@@ -12,6 +12,7 @@ matrix_categories=$1
 artifact_name=$2
 
 .github/jobs/setup_and_run_diff.py ${matrix_categories} $artifact_name
+ret=$?
 
 if [ "$( ls -A ${RUNNER_WORKSPACE}/diff)" ]; then
   echo "upload_diff=true" >> $GITHUB_OUTPUT
@@ -21,3 +22,4 @@ if [ "$( ls -A ${RUNNER_WORKSPACE}/diff)" ]; then
 fi
 
 echo "upload_diff=false" >> $GITHUB_OUTPUT
+return $ret
\ No newline at end of file

From 0201daa8df93dcfc75859ba62ce6728805cf40af Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 14:46:22 -0600
Subject: [PATCH 08/18] ignore .idea directory that was auto-generated

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index a065a4e761..f62ca72792 100644
--- a/.gitignore
+++ b/.gitignore
@@ -88,6 +88,8 @@ ENV/
 # pytest cache files
 *.pytest_cache*
 
+.idea
+
 # tilda files generated by emacs
 *~
 

From 194088d68f6e7bc4071e313619152903336648cc Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 15:03:31 -0600
Subject: [PATCH 09/18] fix incorrect syntax, ci-run-diff, ci-skip-unit-tests

---
 .github/jobs/run_difference_tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/jobs/run_difference_tests.sh b/.github/jobs/run_difference_tests.sh
index 333e0372e7..8112ad0ecf 100755
--- a/.github/jobs/run_difference_tests.sh
+++ b/.github/jobs/run_difference_tests.sh
@@ -22,4 +22,4 @@ if [ "$( ls -A ${RUNNER_WORKSPACE}/diff)" ]; then
 fi
 
 echo "upload_diff=false" >> $GITHUB_OUTPUT
-return $ret
\ No newline at end of file
+exit $ret
\ No newline at end of file

From 8017d99c3d9c2471488300a225362490abaf7983 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 16:13:58 -0600
Subject: [PATCH 10/18] handle failing comparison when NetCDF values are stored
 as a string, ci-run-diff, ci-skip-unit-tests

---
 metplus/util/diff_util.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index f9ca1215f1..ccaac6846e 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -673,6 +673,11 @@ def _all_values_are_equal(var_a, var_b):
     @param var_b Numpy array
     @returns True if all values are equal, False otherwise
     """
+    # if the values are stored as a string, compare them with ==
+    if isinstance(var_a[:], str) or isinstance(var_b[:], str):
+        return var_a[:] == var_b[:]
+
+    # flatten the numpy.ndarray and compare each value
     for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
         # continue to next value if both values are NaN
         if isnull(val_a) and isnull(val_b):

From d7981f218aed398d66b81faac35f065a3a5146b0 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 16:15:16 -0600
Subject: [PATCH 11/18] Preserve backwards compatibility by forcing
 TC_PAIRS_RUN_ONCE=False if LOOP_ORDER is still used and warn that it should
 be removed. Add debug log message to alert user that only the first
 init/valid time is being processed and how to change that if desired

---
 metplus/wrappers/tc_pairs_wrapper.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/metplus/wrappers/tc_pairs_wrapper.py b/metplus/wrappers/tc_pairs_wrapper.py
index 61fad0a136..78377e4317 100755
--- a/metplus/wrappers/tc_pairs_wrapper.py
+++ b/metplus/wrappers/tc_pairs_wrapper.py
@@ -293,6 +293,22 @@ def create_c_dict(self):
                                 False)
         )
 
+        # check for settings that cause differences moving from v4.1 to v5.0
+        # warn and update run setting to preserve old behavior
+        if (self.config.has_option('config', 'LOOP_ORDER') and
+            self.config.getstr_nocheck('config', 'LOOP_ORDER') == 'times' and
+            not self.config.has_option('config', 'TC_PAIRS_RUN_ONCE')):
+            self.logger.warning(
+                'LOOP_ORDER has been deprecated. LOOP_ORDER has been set to '
+                '"times" and TC_PAIRS_RUN_ONCE is not set. '
+                'Forcing TC_PAIRS_RUN_ONCE=False to preserve behavior prior to '
+                'v5.0.0. Please remove LOOP_ORDER and set '
+                'TC_PAIRS_RUN_ONCE=False to preserve previous behavior and '
+                'remove this warning message.'
+            )
+            c_dict['RUN_ONCE'] = False
+            return c_dict
+
         # only run once if True
         c_dict['RUN_ONCE'] = self.config.getbool('config',
                                                  'TC_PAIRS_RUN_ONCE',
@@ -318,6 +334,8 @@ def run_all_times(self):
         if not self.c_dict['RUN_ONCE']:
             return super().run_all_times()
 
+        self.logger.debug('Only processing first run time. Set '
+                          'TC_PAIRS_RUN_ONCE=False to process all run times.')
         self.run_at_time(input_dict)
         return self.all_commands
 

From c8a80b2bdd4c6c30322280dcf4eeceedb66fd62d Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 16:17:38 -0600
Subject: [PATCH 12/18] add pandas to diff conda environment used for diff
 tests in GHA to properly check for null/NaN values

---
 internal/scripts/docker_env/scripts/diff_env.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/internal/scripts/docker_env/scripts/diff_env.sh b/internal/scripts/docker_env/scripts/diff_env.sh
index 92a59c1470..bb89248e0c 100755
--- a/internal/scripts/docker_env/scripts/diff_env.sh
+++ b/internal/scripts/docker_env/scripts/diff_env.sh
@@ -2,10 +2,11 @@
 
 ################################################################################
 # Environment: diff.v5.1
-# Last Updated: 2023-01-31 (mccabe@ucar.edu)
+# Last Updated: 2023-06-14 (mccabe@ucar.edu)
 # Notes: Adds packages needed to run differences tests to compare output to
 #   truth data.
 # Python Packages:
+#   pandas
 #   pillow==9.2.0
 #   pdf2image==1.16.0
 #
@@ -23,6 +24,7 @@ ENV_NAME=diff.${METPLUS_VERSION}
 BASE_ENV=netcdf4.${METPLUS_VERSION}
 
 conda create -y --clone ${BASE_ENV} --name ${ENV_NAME}
+conda install -y --name ${ENV_NAME} -c conda-forge pandas
 conda install -y --name ${ENV_NAME} -c conda-forge pillow==9.2.0
 
 apt install -y poppler-utils

From 353fd152d9cf5374860b3bd3a1044f8d99e71d79 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Wed, 14 Jun 2023 16:44:23 -0600
Subject: [PATCH 13/18] turn off use cases after tests succeeded

---
 .github/parm/use_case_groups.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json
index 131e0eed72..23ff852726 100644
--- a/.github/parm/use_case_groups.json
+++ b/.github/parm/use_case_groups.json
@@ -7,7 +7,7 @@
   {
     "category": "met_tool_wrapper",
     "index_list": "30-58",
-    "run": true
+    "run": false
   },
   {
     "category": "air_quality_and_comp",
@@ -227,7 +227,7 @@
   {
     "category": "tc_and_extra_tc",
     "index_list": "0-2",
-    "run": true
+    "run": false
   },
   {
     "category": "tc_and_extra_tc",

From 52f5cebef3b734986fd9a08195e740308d99f9f9 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Thu, 15 Jun 2023 10:55:24 -0600
Subject: [PATCH 14/18] update apt-get to fix poppler-utils install error, add
 specific version for pandas

---
 internal/scripts/docker_env/scripts/diff_env.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/internal/scripts/docker_env/scripts/diff_env.sh b/internal/scripts/docker_env/scripts/diff_env.sh
index bb89248e0c..54544a38e8 100755
--- a/internal/scripts/docker_env/scripts/diff_env.sh
+++ b/internal/scripts/docker_env/scripts/diff_env.sh
@@ -6,7 +6,7 @@
 # Notes: Adds packages needed to run differences tests to compare output to
 #   truth data.
 # Python Packages:
-#   pandas
+#   pandas==2.0.2
 #   pillow==9.2.0
 #   pdf2image==1.16.0
 #
@@ -24,9 +24,10 @@ ENV_NAME=diff.${METPLUS_VERSION}
 BASE_ENV=netcdf4.${METPLUS_VERSION}
 
 conda create -y --clone ${BASE_ENV} --name ${ENV_NAME}
-conda install -y --name ${ENV_NAME} -c conda-forge pandas
+conda install -y --name ${ENV_NAME} -c conda-forge pandas==2.0.2
 conda install -y --name ${ENV_NAME} -c conda-forge pillow==9.2.0
 
-apt install -y poppler-utils
+apt-get update
+apt-get install -y poppler-utils
 
 conda install -y --name ${ENV_NAME} -c conda-forge pdf2image==1.16.0

From 1421c18e5b01e37881c1a39fdfdbe0043327a762 Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Thu, 15 Jun 2023 11:07:20 -0600
Subject: [PATCH 15/18] run use case to test that diff.v5.1 conda env was
 created properly, ci-run-diff

---
 .github/parm/use_case_groups.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json
index 23ff852726..6bae411ea5 100644
--- a/.github/parm/use_case_groups.json
+++ b/.github/parm/use_case_groups.json
@@ -157,7 +157,7 @@
   {
     "category": "s2s_mid_lat",
     "index_list": "0-2",
-    "run": false
+    "run": true
   },
   {
     "category": "s2s_mid_lat",

From b29a7acc67d80c9107aca78e6641a154365cb23a Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Thu, 15 Jun 2023 11:23:04 -0600
Subject: [PATCH 16/18] check if values are completely equal inside
 _is_equal_rounded function, call that function in NC value diff to prevent
 differences that are smaller than 5 decimal places

---
 metplus/util/diff_util.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index ccaac6846e..14efccf47c 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -382,8 +382,6 @@ def _compare_csv_columns(lines_a, lines_b):
         for key in keys_a:
             val_a = line_a[key]
             val_b = line_b[key]
-            if val_a == val_b:
-                continue
             # prevent error if values are diffs are less than
             # ROUNDING_PRECISION decimal places
             # METplus issue #1873 addresses the real problem
@@ -404,6 +402,8 @@ def _compare_csv_columns(lines_a, lines_b):
 
 
 def _is_equal_rounded(value_a, value_b):
+    if value_a == value_b:
+        return True
     if _truncate_float(value_a) == _truncate_float(value_b):
         return True
     if _round_float(value_a) == _round_float(value_b):
@@ -682,7 +682,8 @@ def _all_values_are_equal(var_a, var_b):
         # continue to next value if both values are NaN
         if isnull(val_a) and isnull(val_b):
             continue
-        if val_a != val_b:
+        if not _is_equal_rounded(val_a, val_b):
+            print(f'val_a: {val_a}, val_b: {val_b}')
             return False
     return True
 

From 2b9bea52580fca1621a94958bf18fa91728980db Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Thu, 15 Jun 2023 13:35:26 -0600
Subject: [PATCH 17/18] turn off all use case groups before re-opening PR

---
 .github/parm/use_case_groups.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json
index 6bae411ea5..23ff852726 100644
--- a/.github/parm/use_case_groups.json
+++ b/.github/parm/use_case_groups.json
@@ -157,7 +157,7 @@
   {
     "category": "s2s_mid_lat",
     "index_list": "0-2",
-    "run": true
+    "run": false
   },
   {
     "category": "s2s_mid_lat",

From 0cbcff25c134e7379696a58e9d2e0693cd7d53db Mon Sep 17 00:00:00 2001
From: George McCabe <23407799+georgemccabe@users.noreply.github.com>
Date: Thu, 15 Jun 2023 17:16:08 -0600
Subject: [PATCH 18/18] if both netcdf values are masked, consider them equal

---
 metplus/util/diff_util.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metplus/util/diff_util.py b/metplus/util/diff_util.py
index 14efccf47c..f26ba790f2 100755
--- a/metplus/util/diff_util.py
+++ b/metplus/util/diff_util.py
@@ -7,6 +7,7 @@
 import csv
 from PIL import Image, ImageChops
 from pandas import isnull
+from numpy.ma import is_masked
 from numpy.core._exceptions import UFuncTypeError
 
 IMAGE_EXTENSIONS = [
@@ -680,7 +681,7 @@ def _all_values_are_equal(var_a, var_b):
     # flatten the numpy.ndarray and compare each value
     for val_a, val_b in zip(var_a[:].flatten(), var_b[:].flatten()):
         # continue to next value if both values are NaN
-        if isnull(val_a) and isnull(val_b):
+        if (isnull(val_a) and isnull(val_b)) or (is_masked(val_a) and is_masked(val_b)):
             continue
         if not _is_equal_rounded(val_a, val_b):
             print(f'val_a: {val_a}, val_b: {val_b}')