Cleanup/lint with ruff

ccao-data · Nov 7, 2024 · 9af8f19 · 9af8f19
1 parent f478d7a
commit 9af8f19
Show file tree

Hide file tree

Showing 9 changed files with 150 additions and 91 deletions.
diff --git a/assesspy/__init__.py b/assesspy/__init__.py
@@ -6,19 +6,19 @@
 from .formulas import (
     cod,
     cod_met,
+    ki,
+    mki,
+    mki_met,
     prb,
     prb_met,
     prd,
     prd_met,
-    ki,
-    mki,
-    mki_met,
 )
+from .load_data import ratios_sample
 from .outliers import (
     iqr_outlier,
     is_outlier,
     quantile_outlier,
 )
-from .load_data import ratios_sample
 from .sales_chasing import detect_chasing
 from .utils import check_inputs
diff --git a/assesspy/ci.py b/assesspy/ci.py
@@ -54,10 +54,12 @@ def boot_ci(fun, nboot=100, alpha=0.05, **kwargs):
         kwargs.keys()
     ):
         kwargs = (kwargs["assessed"], kwargs["sale_price"])
-    elif fun.__name__ == "prd" and not set(["assessed", "sale_price"]).issubset(
-        kwargs.keys()
-    ):
-        raise Exception("PRD function expects argurments 'assessed' and 'sale_price'.")
+    elif fun.__name__ == "prd" and not set(
+        ["assessed", "sale_price"]
+    ).issubset(kwargs.keys()):
+        raise Exception(
+            "PRD function expects argurments 'assessed' and 'sale_price'."
+        )
     else:
         kwargs = tuple(kwargs.values())
 

diff --git a/assesspy/formulas.py b/assesspy/formulas.py
@@ -165,7 +165,10 @@ def prb(assessed, sale_price, round=None):
     prb_ci = prb_model.conf_int(alpha=0.05)[0].tolist()
 
     if round is not None:
-        out = {"prb": np.round(prb_val, round), "95% ci": np.round(prb_ci, round)}
+        out = {
+            "prb": np.round(prb_val, round),
+            "95% ci": np.round(prb_ci, round),
+        }
 
     else:
         out = {"prb": prb_val, "95% ci": prb_ci}

diff --git a/assesspy/load_data.py b/assesspy/load_data.py
@@ -23,5 +23,7 @@ def ratios_sample():
     :rtype: DataFrame
     """
 
-    stream = pkg_resources.resource_stream(__name__, "data/ratios_sample.parquet")
+    stream = pkg_resources.resource_stream(
+        __name__, "data/ratios_sample.parquet"
+    )
     return pd.read_parquet(stream)
diff --git a/assesspy/outliers.py b/assesspy/outliers.py
@@ -96,7 +96,9 @@ def is_outlier(x, method="iqr", probs=[0.05, 0.95]):
         ap.is_outlier(ap.ratios_sample().ratio)
     """
 
-    out = {"iqr": iqr_outlier(x), "quantile": quantile_outlier(x, probs)}.get(method)
+    out = {"iqr": iqr_outlier(x), "quantile": quantile_outlier(x, probs)}.get(
+        method
+    )
 
     # Warn about removing data from small samples, as it can severely distort
     # ratio study outcomes

diff --git a/assesspy/sales_chasing.py b/assesspy/sales_chasing.py
@@ -27,7 +27,9 @@ def detect_chasing_cdf(ratio, bounds=[0.98, 1.02], cdf_gap=0.03):
     # Check if the largest difference is greater than the threshold and make
     # sure it's within the specified boundaries
     diff_loc = sorted_ratio[np.argmax(diffs)]
-    out = (max(diffs) > cdf_gap) & ((diff_loc > bounds[0]) & (diff_loc < bounds[1]))
+    out = (max(diffs) > cdf_gap) & (
+        (diff_loc > bounds[0]) & (diff_loc < bounds[1])
+    )
 
     return out
 

diff --git a/assesspy/tests/test_formulas.py b/assesspy/tests/test_formulas.py
@@ -162,7 +162,9 @@ def test_prb_met(self):  # Standard met function
         gini_data_sale.append(first_column)
         gini_data_assessed.append(second_column)
 
-gini_data_assessed = [int(value.replace('"', "")) for value in gini_data_assessed]
+gini_data_assessed = [
+    int(value.replace('"', "")) for value in gini_data_assessed
+]
 gini_data_sale = [int(value.replace('"', "")) for value in gini_data_sale]
 
 mki_out = assesspy.mki(gini_data_assessed, gini_data_sale)

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -1,5 +1,6 @@
 import os
 import sys
+
 from sphinx_pyproject import SphinxConfig
 
 sys.path.append(os.path.abspath("../.."))

diff --git a/docs/source/notebooks/example-ratio-study.ipynb b/docs/source/notebooks/example-ratio-study.ipynb
@@ -51,19 +51,18 @@
    "outputs": [],
    "source": [
     "%%capture\n",
-    "import json\n",
     "import pandas as pd\n",
     "\n",
     "# Load 100k rows of 2020 residential (major class 2) assessment data\n",
     "assessments = pd.read_json(\n",
-    "    \"https://datacatalog.cookcountyil.gov/resource/uzyt-m557.json\" +\n",
-    "    \"?$where=starts_with(class,'2')&tax_year=2020&$limit=100000\"\n",
+    "    \"https://datacatalog.cookcountyil.gov/resource/uzyt-m557.json\"\n",
+    "    + \"?$where=starts_with(class,'2')&tax_year=2020&$limit=100000\"\n",
     ")\n",
     "\n",
     "# Load 100k rows of 2020 sales data\n",
     "sales = pd.read_json(\n",
-    "  \"https://datacatalog.cookcountyil.gov/resource/wvhk-k5uv.json\" +\n",
-    "  \"?$where=sale_price>10000&year=2020&$limit=100000\"\n",
+    "    \"https://datacatalog.cookcountyil.gov/resource/wvhk-k5uv.json\"\n",
+    "    + \"?$where=sale_price>10000&year=2020&$limit=100000\"\n",
     ")\n",
     "\n",
     "# read_json removes leading zeroes, add them back\n",
@@ -139,12 +138,12 @@
     "        assessments.rename(columns={\"tax_year\": \"year\"}),\n",
     "        id_vars=[\"pin\", \"year\", \"township_name\"],\n",
     "        value_vars=[\"mailed_tot\", \"certified_tot\", \"board_tot\"],\n",
-    "        var_name = \"stage\",\n",
-    "        value_name='assessed'\n",
+    "        var_name=\"stage\",\n",
+    "        value_name=\"assessed\",\n",
     "    ),\n",
     "    sales[[\"pin\", \"year\", \"sale_price\", \"is_multisale\"]],\n",
-    "    on = [\"pin\", \"year\"],\n",
-    "    how = \"inner\"\n",
+    "    on=[\"pin\", \"year\"],\n",
+    "    how=\"inner\",\n",
     ")\n",
     "\n",
     "# Remove multisales, then calculate the ratio for each property\n",
@@ -482,33 +481,53 @@
     }
    ],
    "source": [
+    "import warnings\n",
+    "\n",
     "import numpy as np\n",
+    "\n",
     "import assesspy as ap\n",
-    "import warnings\n",
-    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
     "\n",
     "# For each town and stage, calculate COD, PRD, and PRB, and their respective\n",
     "# confidence intervals then arrange by town name and stage of assessment\n",
     "town_stats = combined[combined.assessed > 0].copy(deep=True)\n",
-    "town_stats['stage'] = town_stats.stage.astype('category').cat.reorder_categories(['mailed_tot', 'certified_tot', 'board_tot'])\n",
+    "town_stats[\"stage\"] = town_stats.stage.astype(\n",
+    "    \"category\"\n",
+    ").cat.reorder_categories([\"mailed_tot\", \"certified_tot\", \"board_tot\"])\n",
     "town_stats = town_stats.groupby([\"township_name\", \"stage\"]).apply(\n",
-    "    lambda x: pd.Series({\n",
-    "        'n':np.size(x['pin']),\n",
-    "        'cod':np.round(ap.cod(ratio = x['ratio']), 2),\n",
-    "        'cod_ci':np.round(ap.cod_ci(ratio = x['ratio']), 2),\n",
-    "        'prd':np.round(ap.prd(x['assessed'], x['sale_price']), 2),\n",
-    "        'prd_ci':np.round(ap.prd_ci(x['assessed'], x['sale_price']), 2),\n",
-    "        'prb':ap.prb(x['assessed'], x['sale_price'], 3)\n",
-    "    })\n",
+    "    lambda x: pd.Series(\n",
+    "        {\n",
+    "            \"n\": np.size(x[\"pin\"]),\n",
+    "            \"cod\": np.round(ap.cod(ratio=x[\"ratio\"]), 2),\n",
+    "            \"cod_ci\": np.round(ap.cod_ci(ratio=x[\"ratio\"]), 2),\n",
+    "            \"prd\": np.round(ap.prd(x[\"assessed\"], x[\"sale_price\"]), 2),\n",
+    "            \"prd_ci\": np.round(ap.prd_ci(x[\"assessed\"], x[\"sale_price\"]), 2),\n",
+    "            \"prb\": ap.prb(x[\"assessed\"], x[\"sale_price\"], 3),\n",
+    "        }\n",
+    "    )\n",
     ")\n",
     "\n",
-    "town_stats['prb_ci'] = town_stats.prb.str['95% ci']\n",
-    "town_stats['prb'] = town_stats.prb.str['prb']\n",
-    "town_stats['cod_met'] = town_stats.cod.apply(ap.cod_met)\n",
-    "town_stats['prd_met'] = town_stats.prd.apply(ap.prd_met)\n",
-    "town_stats['prb_met'] = town_stats.prb.apply(ap.prb_met)\n",
-    "town_stats = town_stats[['n', 'cod', 'cod_ci', 'cod_met', 'prd', 'prd_ci', 'prd_met', 'prb', 'prb_ci', 'prb_met']]\n",
-    "town_stats = town_stats[town_stats['n'] >= 70]\n",
+    "town_stats[\"prb_ci\"] = town_stats.prb.str[\"95% ci\"]\n",
+    "town_stats[\"prb\"] = town_stats.prb.str[\"prb\"]\n",
+    "town_stats[\"cod_met\"] = town_stats.cod.apply(ap.cod_met)\n",
+    "town_stats[\"prd_met\"] = town_stats.prd.apply(ap.prd_met)\n",
+    "town_stats[\"prb_met\"] = town_stats.prb.apply(ap.prb_met)\n",
+    "town_stats = town_stats[\n",
+    "    [\n",
+    "        \"n\",\n",
+    "        \"cod\",\n",
+    "        \"cod_ci\",\n",
+    "        \"cod_met\",\n",
+    "        \"prd\",\n",
+    "        \"prd_ci\",\n",
+    "        \"prd_met\",\n",
+    "        \"prb\",\n",
+    "        \"prb_ci\",\n",
+    "        \"prb_met\",\n",
+    "    ]\n",
+    "]\n",
+    "town_stats = town_stats[town_stats[\"n\"] >= 70]\n",
     "\n",
     "town_stats"
    ]
@@ -621,12 +640,16 @@
     }
    ],
    "source": [
-    "deciles = np.linspace(.1, .9, 9).round(1)\n",
-    "\n",
-    "median_ratios = pd.DataFrame(deciles, columns = ['Decile'])\n",
-    "median_ratios['Decile'] = (median_ratios.Decile * 100).astype(int).astype(str) + '%'\n",
-    "median_ratios['Sale Price'] = np.quantile(combined.sale_price, deciles)\n",
-    "median_ratios['Sale Price'] = median_ratios['Sale Price'].apply(lambda x: \"${:,.0f}\".format(x))\n",
+    "deciles = np.linspace(0.1, 0.9, 9).round(1)\n",
+    "\n",
+    "median_ratios = pd.DataFrame(deciles, columns=[\"Decile\"])\n",
+    "median_ratios[\"Decile\"] = (median_ratios.Decile * 100).astype(int).astype(\n",
+    "    str\n",
+    ") + \"%\"\n",
+    "median_ratios[\"Sale Price\"] = np.quantile(combined.sale_price, deciles)\n",
+    "median_ratios[\"Sale Price\"] = median_ratios[\"Sale Price\"].apply(\n",
+    "    lambda x: \"${:,.0f}\".format(x)\n",
+    ")\n",
     "\n",
     "median_ratios"
    ]
@@ -656,32 +679,39 @@
    ],
    "source": [
     "import matplotlib.pyplot as plt\n",
-    "plt.style.use('default')\n",
-    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "plt.style.use(\"default\")\n",
+    "warnings.filterwarnings(\"ignore\")\n",
     "\n",
     "graph_data = combined\n",
-    "graph_data['rank'] = pd.qcut(graph_data['sale_price'], 10, labels=False)\n",
-    "graph_data['rank'] = graph_data['rank'] + 1\n",
-    "graph_data['decile'] = pd.qcut(graph_data['sale_price'] / 1000, 10, precision=0)\n",
-    "graph_data['decile'] = graph_data['decile'].astype(str).str.replace('(', '\\$')\n",
-    "graph_data['decile'] = graph_data['decile'].str.replace(', ', ' - \\$')\n",
-    "graph_data['decile'] = graph_data['decile'].str.replace('.0', 'K', regex=False)\n",
-    "graph_data['decile'] = graph_data['decile'].str.replace(']', '')\n",
-    "graph_data['decile'] = graph_data['decile'].str.replace(' - \\$9050K', '+', regex=False)\n",
-    "graph_data = graph_data.groupby(['rank', 'decile']).apply(\n",
-    "        lambda x: pd.Series({\n",
-    "            'Median Sales Ratio':np.median(x['ratio']),\n",
-    "            })\n",
-    "        )\n",
+    "graph_data[\"rank\"] = pd.qcut(graph_data[\"sale_price\"], 10, labels=False)\n",
+    "graph_data[\"rank\"] = graph_data[\"rank\"] + 1\n",
+    "graph_data[\"decile\"] = pd.qcut(\n",
+    "    graph_data[\"sale_price\"] / 1000, 10, precision=0\n",
+    ")\n",
+    "graph_data[\"decile\"] = graph_data[\"decile\"].astype(str).str.replace(\"(\", \"\\$\")\n",
+    "graph_data[\"decile\"] = graph_data[\"decile\"].str.replace(\", \", \" - \\$\")\n",
+    "graph_data[\"decile\"] = graph_data[\"decile\"].str.replace(\".0\", \"K\", regex=False)\n",
+    "graph_data[\"decile\"] = graph_data[\"decile\"].str.replace(\"]\", \"\")\n",
+    "graph_data[\"decile\"] = graph_data[\"decile\"].str.replace(\n",
+    "    \" - \\$9050K\", \"+\", regex=False\n",
+    ")\n",
+    "graph_data = graph_data.groupby([\"rank\", \"decile\"]).apply(\n",
+    "    lambda x: pd.Series(\n",
+    "        {\n",
+    "            \"Median Sales Ratio\": np.median(x[\"ratio\"]),\n",
+    "        }\n",
+    "    )\n",
+    ")\n",
     "\n",
     "graph_data = graph_data.reset_index()\n",
-    "plt.scatter(graph_data['decile'], graph_data['Median Sales Ratio'])\n",
+    "plt.scatter(graph_data[\"decile\"], graph_data[\"Median Sales Ratio\"])\n",
     "plt.xticks(rotation=45)\n",
-    "plt.xlabel('Decile')\n",
-    "plt.ylabel('Ratio')\n",
+    "plt.xlabel(\"Decile\")\n",
+    "plt.ylabel(\"Ratio\")\n",
     "plt.suptitle(\"Median Sale Ratios: Open Data Sample\", fontsize=14)\n",
     "plt.title(\"By decile of sale price in 2020\")\n",
-    "plt.gca().set_yticklabels([f'{x:.0%}' for x in plt.gca().get_yticks()])\n",
+    "plt.gca().set_yticklabels([f\"{x:.0%}\" for x in plt.gca().get_yticks()])\n",
     "plt.grid()\n",
     "plt.show()"
    ]
@@ -731,14 +761,17 @@
     "# Plot to view discontinuity\n",
     "ecdf_normal = ECDF(normal_ratios)\n",
     "ecdf_chased = ECDF(chased_ratios)\n",
-    "plt.plot(ecdf_normal.x, ecdf_normal.y, color = 'blue')\n",
-    "plt.plot(ecdf_chased.x, ecdf_chased.y, color = 'red')\n",
-    "plt.xlabel('Ratio')\n",
-    "plt.ylabel('F(x)')\n",
+    "plt.plot(ecdf_normal.x, ecdf_normal.y, color=\"blue\")\n",
+    "plt.plot(ecdf_chased.x, ecdf_chased.y, color=\"red\")\n",
+    "plt.xlabel(\"Ratio\")\n",
+    "plt.ylabel(\"F(x)\")\n",
     "plt.grid()\n",
     "plt.show()\n",
     "\n",
-    "{'Blue Chased?': ap.detect_chasing(normal_ratios), 'Red Chased?': ap.detect_chasing(chased_ratios)}"
+    "{\n",
+    "    \"Blue Chased?\": ap.detect_chasing(normal_ratios),\n",
+    "    \"Red Chased?\": ap.detect_chasing(chased_ratios),\n",
+    "}"
    ]
   },
   {
@@ -779,34 +812,46 @@
    ],
    "source": [
     "# Combine sale price and assessed value, calculate cumulative sums\n",
-    "gini_data = combined[['sale_price', 'assessed']].sort_values(by='sale_price')\n",
-    "\n",
-    "sale_price = gini_data['sale_price']\n",
-    "assessed = gini_data['assessed']\n",
-    "\n",
-    "lorenz_data_price = pd.DataFrame({\n",
-    "    'pct': np.concatenate(([0], np.cumsum(sale_price) / np.sum(sale_price))),\n",
-    "    'cum_pct': np.concatenate(([0], np.arange(1, len(sale_price) + 1) / len(sale_price)))\n",
-    "})\n",
+    "gini_data = combined[[\"sale_price\", \"assessed\"]].sort_values(by=\"sale_price\")\n",
+    "\n",
+    "sale_price = gini_data[\"sale_price\"]\n",
+    "assessed = gini_data[\"assessed\"]\n",
+    "\n",
+    "lorenz_data_price = pd.DataFrame(\n",
+    "    {\n",
+    "        \"pct\": np.concatenate(\n",
+    "            ([0], np.cumsum(sale_price) / np.sum(sale_price))\n",
+    "        ),\n",
+    "        \"cum_pct\": np.concatenate(\n",
+    "            ([0], np.arange(1, len(sale_price) + 1) / len(sale_price))\n",
+    "        ),\n",
+    "    }\n",
+    ")\n",
     "\n",
-    "lorenz_data_assessed = pd.DataFrame({\n",
-    "    'pct': np.concatenate(([0], np.cumsum(assessed) / np.sum(assessed))),\n",
-    "    'cum_pct': np.concatenate(([0], np.arange(1, len(assessed) + 1) / len(assessed)))\n",
-    "})\n",
+    "lorenz_data_assessed = pd.DataFrame(\n",
+    "    {\n",
+    "        \"pct\": np.concatenate(([0], np.cumsum(assessed) / np.sum(assessed))),\n",
+    "        \"cum_pct\": np.concatenate(\n",
+    "            ([0], np.arange(1, len(assessed) + 1) / len(assessed))\n",
+    "        ),\n",
+    "    }\n",
+    ")\n",
     "\n",
     "# Plot Lorenz curves\n",
     "fig, ax = plt.subplots()\n",
     "\n",
-    "ax.plot(lorenz_data_price['cum_pct'], lorenz_data_price['pct'], color='blue')\n",
-    "ax.plot(lorenz_data_assessed['cum_pct'], lorenz_data_assessed['pct'], color='red')\n",
-    "ax.plot([0, 1], [0, 1], linestyle='dashed', color='green')\n",
+    "ax.plot(lorenz_data_price[\"cum_pct\"], lorenz_data_price[\"pct\"], color=\"blue\")\n",
+    "ax.plot(\n",
+    "    lorenz_data_assessed[\"cum_pct\"], lorenz_data_assessed[\"pct\"], color=\"red\"\n",
+    ")\n",
+    "ax.plot([0, 1], [0, 1], linestyle=\"dashed\", color=\"green\")\n",
     "\n",
-    "ax.text(0.785, 0.1, 'Sale Price', color='blue', va='center')\n",
-    "ax.text(.9, 0.15, 'Assessed Price', color='red', ha='center', va='center')\n",
+    "ax.text(0.785, 0.1, \"Sale Price\", color=\"blue\", va=\"center\")\n",
+    "ax.text(0.9, 0.15, \"Assessed Price\", color=\"red\", ha=\"center\", va=\"center\")\n",
     "\n",
-    "ax.set_title('Lorenz Curve for Sale and Assessed Values')\n",
-    "ax.set_xlabel('Percent of Properties')\n",
-    "ax.set_ylabel('Percent of Value')\n",
+    "ax.set_title(\"Lorenz Curve for Sale and Assessed Values\")\n",
+    "ax.set_xlabel(\"Percent of Properties\")\n",
+    "ax.set_ylabel(\"Percent of Value\")\n",
     "\n",
     "plt.show()"
    ]