DataResponsibly · denysgerasymuk799 · Dec 18, 2023 · Sep 30, 2023 · Oct 1, 2023 · Oct 1, 2023
diff --git a/README.md b/README.md
@@ -28,7 +28,6 @@
 </p>
 
 
-
 ## 📜 Description
 
 **Virny** is a Python library for auditing model stability and fairness. The Virny library was

diff --git a/docs/examples/Multiple_Models_Interface_Vis.ipynb b/docs/examples/Multiple_Models_Interface_Vis.ipynb
diff --git a/docs/examples/Multiple_Models_Interface_Vis_Income.ipynb b/docs/examples/Multiple_Models_Interface_Vis_Income.ipynb
@@ -0,0 +1,299 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "248cbed8",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:44.370856Z",
+     "start_time": "2023-12-10T22:37:43.972175Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7ec6cd08",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:44.380242Z",
+     "start_time": "2023-12-10T22:37:44.371542Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "os.environ[\"PYTHONWARNINGS\"] = \"ignore\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b8cb69f2",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:44.391659Z",
+     "start_time": "2023-12-10T22:37:44.380644Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Current location:  /Users/denys_herasymuk/UCU/4course_2term/Bachelor_Thesis/Code/Virny\n"
+     ]
+    }
+   ],
+   "source": [
+    "cur_folder_name = os.getcwd().split('/')[-1]\n",
+    "if cur_folder_name != \"Virny\":\n",
+    "    os.chdir(\"../..\")\n",
+    "\n",
+    "print('Current location: ', os.getcwd())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a578f2ab",
+   "metadata": {},
+   "source": [
+    "# Multiple Models Interface Usage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7a9241de",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:45.918385Z",
+     "start_time": "2023-12-10T22:37:44.390547Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pandas as pd\n",
+    "\n",
+    "from virny.datasets import ACSIncomeDataset\n",
+    "from virny.custom_classes.metrics_composer import MetricsComposer\n",
+    "from virny.custom_classes.metrics_interactive_visualizer import MetricsInteractiveVisualizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "outputs": [],
+   "source": [
+    "data_loader = ACSIncomeDataset(state=['GA'], year=2018, with_nulls=False, subsample_size=15_000, subsample_seed=42)\n",
+    "sensitive_attributes_dct = {'SEX': '2', 'RAC1P': ['2', '3', '4', '5', '6', '7', '8', '9'], 'SEX&RAC1P': None}"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:47.214487Z",
+     "start_time": "2023-12-10T22:37:45.921391Z"
+    }
+   },
+   "id": "d3c53c7b72ecbcd0"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [],
+   "source": [
+    "ROOT_DIR = os.path.join('docs', 'examples')\n",
+    "subgroup_metrics_df = pd.read_csv(os.path.join(ROOT_DIR, 'income_subgroup_metrics.csv'), header=0)\n",
+    "subgroup_metrics_df['Model_Name'] = (subgroup_metrics_df['Model_Name'] + '__alpha=' +\n",
+    "                                     subgroup_metrics_df['Intervention_Param'].astype(str))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:47.242581Z",
+     "start_time": "2023-12-10T22:37:47.214727Z"
+    }
+   },
+   "id": "2aab7c79ecdee914"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "                         Metric       SEX     RAC1P  SEX&RAC1P  \\\n0               Accuracy_Parity  0.047756  0.074977   0.065217   \n1  Aleatoric_Uncertainty_Parity -0.039005 -0.011947  -0.009222   \n2   Aleatoric_Uncertainty_Ratio  0.935159  0.979638   0.984220   \n3            Equalized_Odds_FNR  0.030793 -0.110745  -0.052498   \n4            Equalized_Odds_FPR -0.021317  0.000952  -0.007008   \n\n                  Model_Name  \n0  LGBMClassifier__alpha=0.7  \n1  LGBMClassifier__alpha=0.7  \n2  LGBMClassifier__alpha=0.7  \n3  LGBMClassifier__alpha=0.7  \n4  LGBMClassifier__alpha=0.7  ",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Metric</th>\n      <th>SEX</th>\n      <th>RAC1P</th>\n      <th>SEX&amp;RAC1P</th>\n      <th>Model_Name</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Accuracy_Parity</td>\n      <td>0.047756</td>\n      <td>0.074977</td>\n      <td>0.065217</td>\n      <td>LGBMClassifier__alpha=0.7</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Aleatoric_Uncertainty_Parity</td>\n      <td>-0.039005</td>\n      <td>-0.011947</td>\n      <td>-0.009222</td>\n      <td>LGBMClassifier__alpha=0.7</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Aleatoric_Uncertainty_Ratio</td>\n      <td>0.935159</td>\n      <td>0.979638</td>\n      <td>0.984220</td>\n      <td>LGBMClassifier__alpha=0.7</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Equalized_Odds_FNR</td>\n      <td>0.030793</td>\n      <td>-0.110745</td>\n      <td>-0.052498</td>\n      <td>LGBMClassifier__alpha=0.7</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Equalized_Odds_FPR</td>\n      <td>-0.021317</td>\n      <td>0.000952</td>\n      <td>-0.007008</td>\n      <td>LGBMClassifier__alpha=0.7</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_names = subgroup_metrics_df['Model_Name'].unique()\n",
+    "models_metrics_dct = dict()\n",
+    "for model_name in model_names:\n",
+    "    models_metrics_dct[model_name] = subgroup_metrics_df[subgroup_metrics_df['Model_Name'] == model_name]\n",
+    "\n",
+    "metrics_composer = MetricsComposer(models_metrics_dct, sensitive_attributes_dct)\n",
+    "models_composed_metrics_df = metrics_composer.compose_metrics()\n",
+    "models_composed_metrics_df.head()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:47.297089Z",
+     "start_time": "2023-12-10T22:37:47.240439Z"
+    }
+   },
+   "id": "44ee5eff6054ce04"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "dict_keys(['LGBMClassifier__alpha=0.7', 'LGBMClassifier__alpha=0.0', 'LGBMClassifier__alpha=0.4', 'LogisticRegression__alpha=0.0', 'LogisticRegression__alpha=0.7', 'LogisticRegression__alpha=0.4', 'MLPClassifier__alpha=0.0', 'MLPClassifier__alpha=0.7', 'MLPClassifier__alpha=0.4', 'RandomForestClassifier__alpha=0.4', 'RandomForestClassifier__alpha=0.7', 'RandomForestClassifier__alpha=0.0'])"
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "models_metrics_dct.keys()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:47.328697Z",
+     "start_time": "2023-12-10T22:37:47.295950Z"
+    }
+   },
+   "id": "15ed7d1ba1f22317"
+  },
+  {
+   "cell_type": "markdown",
+   "id": "deb45226",
+   "metadata": {},
+   "source": [
+    "## Metrics Visualization and Reporting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "435b9d98",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2023-12-10T22:37:47.374721Z",
+     "start_time": "2023-12-10T22:37:47.317882Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "visualizer = MetricsInteractiveVisualizer(data_loader.X_data, data_loader.y_data,\n",
+    "                                          models_metrics_dct, models_composed_metrics_df,\n",
+    "                                          sensitive_attributes_dct=sensitive_attributes_dct)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://127.0.0.1:7860\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n",
+      "Keyboard interruption in main thread... closing server.\n"
+     ]
+    }
+   ],
+   "source": [
+    "visualizer.start_web_app()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-11T00:26:17.429094Z",
+     "start_time": "2023-12-10T22:37:47.343749Z"
+    }
+   },
+   "id": "678a9dc8d51243f4"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Closing server running on port: 7860\n"
+     ]
+    }
+   ],
+   "source": [
+    "visualizer.stop_web_app()"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-11T00:26:17.482944Z",
+     "start_time": "2023-12-11T00:26:17.438287Z"
+    }
+   },
+   "id": "277b6d1de837dab7"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-12-11T00:26:17.483195Z",
+     "start_time": "2023-12-11T00:26:17.479725Z"
+    }
+   },
+   "id": "21c0ad91536f0af5"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}