-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
CDAT Migration Phase 2: Refactor
enso_diags
set (#832)
- Loading branch information
1 parent
9053509
commit bc4709b
Showing
17 changed files
with
4,507 additions
and
959 deletions.
There are no files selected for viewing
370 changes: 370 additions & 0 deletions
370
...ression_testing/663-enso-diags/662-area-mean-time-series-rerun/regression-test-json.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,370 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# CDAT Migration Regression Testing Notebook (`.json` metrics)\n", | ||
"\n", | ||
"This notebook is used to perform regression testing between the development and\n", | ||
"production versions of a diagnostic set.\n", | ||
"\n", | ||
"## How it works\n", | ||
"\n", | ||
"It compares the relative differences (%) between two sets of `.json` files in two\n", | ||
"separate directories, one for the refactored code and the other for the `main` branch.\n", | ||
"\n", | ||
"It will display metrics values with relative differences >= 2%. Relative differences are used instead of absolute differences because:\n", | ||
"\n", | ||
"- Relative differences are in percentages, which shows the scale of the differences.\n", | ||
"- Absolute differences are just a raw number that doesn't factor in\n", | ||
" floating point size (e.g., 100.00 vs. 0.0001), which can be misleading.\n", | ||
"\n", | ||
"## How to use\n", | ||
"\n", | ||
"PREREQUISITE: The diagnostic set's metrics stored in `.json` files in two directories\n", | ||
"(dev and `main` branches).\n", | ||
"\n", | ||
"1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/<DIR_NAME>`.\n", | ||
"2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray netcdf4 dask pandas matplotlib-base ipykernel`\n", | ||
"3. Run `mamba activate cdat_regression_test`\n", | ||
"4. Update `DEV_PATH` and `MAIN_PATH` in the copy of your notebook.\n", | ||
"5. Run all cells IN ORDER.\n", | ||
"6. Review results for any outstanding differences (>= 2%).\n", | ||
" - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Setup Code\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import glob\n", | ||
"from typing import List\n", | ||
"\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"from auxiliary_tools.cdat_regression_testing.utils import (\n", | ||
" get_num_metrics_above_diff_thres,\n", | ||
" get_rel_diffs,\n", | ||
" update_diffs_to_pct,\n", | ||
" highlight_large_diffs,\n", | ||
")\n", | ||
"\n", | ||
"SET_NAME = \"area_mean_time_series\"\n", | ||
"SET_DIR = \"663-area-mean-time-series-rerun\"\n", | ||
"\n", | ||
"DEV_PATH = f\"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/{SET_DIR}/{SET_NAME}/**\"\n", | ||
"MAIN_PATH = f\"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main-area-mean-time-series/{SET_NAME}/**\"\n", | ||
"\n", | ||
"DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", | ||
"MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))\n", | ||
"\n", | ||
"if len(DEV_GLOB) == 0 or len(MAIN_GLOB) == 0:\n", | ||
" raise IOError(\"No files found at DEV_PATH and/or MAIN_PATH.\")\n", | ||
"\n", | ||
"if len(DEV_GLOB) != len(MAIN_GLOB):\n", | ||
" raise IOError(\"Number of files do not match at DEV_PATH and MAIN_PATH.\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_metrics(filepaths: List[str]) -> pd.DataFrame:\n", | ||
" \"\"\"Get the metrics using a glob of `.json` metric files in a directory.\n", | ||
"\n", | ||
" Parameters\n", | ||
" ----------\n", | ||
" filepaths : List[str]\n", | ||
" The filepaths for metrics `.json` files.\n", | ||
"\n", | ||
" Returns\n", | ||
" -------\n", | ||
" pd.DataFrame\n", | ||
" The DataFrame containing the metrics for all of the variables in\n", | ||
" the results directory.\n", | ||
" \"\"\"\n", | ||
" metrics = []\n", | ||
"\n", | ||
" for filepath in filepaths:\n", | ||
" df = pd.read_json(filepath)\n", | ||
"\n", | ||
" filename = filepath.split(\"/\")[-1]\n", | ||
" var_key = filename.split(\"-\")[0]\n", | ||
" region = filename.split(\"-\")[-1]\n", | ||
"\n", | ||
" # Add the variable key to the MultiIndex and update the index\n", | ||
" # before stacking to make the DataFrame easier to parse.\n", | ||
" multiindex = pd.MultiIndex.from_product([[var_key], [region], [*df.index]])\n", | ||
" df = df.set_index(multiindex)\n", | ||
" df.stack()\n", | ||
"\n", | ||
" metrics.append(df)\n", | ||
"\n", | ||
" df_final = pd.concat(metrics)\n", | ||
"\n", | ||
" return df_final" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"metrics = []\n", | ||
"\n", | ||
"for filepath in DEV_GLOB:\n", | ||
" df = pd.read_json(filepath)\n", | ||
"\n", | ||
" filename = filepath.split(\"/\")[-1]\n", | ||
" var_key = filename.split(\"-\")[0]\n", | ||
"\n", | ||
" # Add the variable key to the MultiIndex and update the index\n", | ||
" # before stacking to make the DataFrame easier to parse.\n", | ||
" multiindex = pd.MultiIndex.from_product([[var_key], [filename], [*df.index]])\n", | ||
" df = df.set_index(multiindex)\n", | ||
" df.stack()\n", | ||
"\n", | ||
" metrics.append(df)\n", | ||
"\n", | ||
"df_final = pd.concat(metrics)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## 1. Get the metrics for the development and `main` branches and their differences.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_metrics_dev = get_metrics(DEV_GLOB)\n", | ||
"df_metrics_main = get_metrics(MAIN_GLOB)\n", | ||
"df_metrics_diffs = get_rel_diffs(df_metrics_dev, df_metrics_main)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## 2. Filter differences to those above maximum threshold (2%).\n", | ||
"\n", | ||
"All values below maximum threshold will be labeled as `NaN`.\n", | ||
"\n", | ||
"- **If all cells in a row are NaN (< 2%)**, the entire row is dropped to make the results easier to parse.\n", | ||
"- Any remaining NaN cells are below < 2% difference and **should be ignored**.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_metrics_diffs_thres = df_metrics_diffs[df_metrics_diffs >= 0.02]\n", | ||
"df_metrics_diffs_thres = df_metrics_diffs_thres.dropna(\n", | ||
" axis=0, how=\"all\", ignore_index=False\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## 3. Combine all DataFrames to get the final result.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"df_metrics_all = pd.concat(\n", | ||
" [df_metrics_dev.add_suffix(\"_dev\"), df_metrics_main.add_suffix(\"_main\")],\n", | ||
" axis=1,\n", | ||
" join=\"outer\",\n", | ||
")\n", | ||
"df_final = df_metrics_diffs_thres.join(df_metrics_all)\n", | ||
"df_final = update_diffs_to_pct(df_final, [\"e3sm_v2 (0051-0060) DIFF (%)\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th></th>\n", | ||
" <th>e3sm_v2 (0051-0060) DIFF (%)</th>\n", | ||
" <th>e3sm_v2 (0051-0060)_dev</th>\n", | ||
" <th>e3sm_v2 (0051-0060)_main</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
"Empty DataFrame\n", | ||
"Columns: [e3sm_v2 (0051-0060) DIFF (%), e3sm_v2 (0051-0060)_dev, e3sm_v2 (0051-0060)_main]\n", | ||
"Index: []" | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"df_final" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## 4. Review variables and metrics above difference threshold.\n", | ||
"\n", | ||
"- <span style=\"color:red\">Red</span> cells are differences >= 2%\n", | ||
"- `nan` cells are differences < 2% and **should be ignored**\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"* Related variables []\n", | ||
"* Number of metrics above 2% max threshold: 0 / 720\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"df_final_adj = df_final.reset_index(names=[\"var_key\", \"region\", \"metric\"])\n", | ||
"get_num_metrics_above_diff_thres(df_metrics_all, df_final_adj)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<style type=\"text/css\">\n", | ||
"</style>\n", | ||
"<table id=\"T_d3f2c\">\n", | ||
" <thead>\n", | ||
" <tr>\n", | ||
" <th class=\"blank level0\" > </th>\n", | ||
" <th id=\"T_d3f2c_level0_col0\" class=\"col_heading level0 col0\" >var_key</th>\n", | ||
" <th id=\"T_d3f2c_level0_col1\" class=\"col_heading level0 col1\" >region</th>\n", | ||
" <th id=\"T_d3f2c_level0_col2\" class=\"col_heading level0 col2\" >metric</th>\n", | ||
" <th id=\"T_d3f2c_level0_col3\" class=\"col_heading level0 col3\" >e3sm_v2 (0051-0060) DIFF (%)</th>\n", | ||
" <th id=\"T_d3f2c_level0_col4\" class=\"col_heading level0 col4\" >e3sm_v2 (0051-0060)_dev</th>\n", | ||
" <th id=\"T_d3f2c_level0_col5\" class=\"col_heading level0 col5\" >e3sm_v2 (0051-0060)_main</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" </tbody>\n", | ||
"</table>\n" | ||
], | ||
"text/plain": [ | ||
"<pandas.io.formats.style.Styler at 0x7f555503c610>" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
} | ||
], | ||
"source": [ | ||
"highlight_large_diffs(df_final_adj, [\"e3sm_v2 (0051-0060) DIFF (%)\"])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Results\n", | ||
"\n", | ||
"- 792 / 792 metrics are within diff tolerance\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "cdat_regression_test", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.14" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.