From d611de4584dd9c9c31d02f888e5a9b45969cf466 Mon Sep 17 00:00:00 2001 From: Dan Snow <31494343+dfsnow@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:16:23 -0600 Subject: [PATCH] Refactor AssessPy for consistency, stability (#24) * Add type stubs to dev deps * Refactor all assesspy formulas * Add full test suite for metrics * Add and clean testing data/samples * Update doc refs for data sets * Update metric docs types * Cleanup doc groupings * Bump package version * Add metrics _met docs * Update all CI functions * Don't export check_inputs * Update outlier functions * Cleanup test fixtures * Refactor outlier functions * Rename expect output vars * Add outlier function tests * Add option to disable lt 0 check for check_inputs * Update outliers docs heading * Remove scipy dependency * Refactor sales chasing functions * Fixup outlier functions * Finalize sales chasing rewrite * Add tests for all code paths * Update README * Update docs * Lint with ruff * Add Python 3.13 support * Update tests and types * Add Python 3.8 support * Add more python versions, update docstrings * Revert .python-versions setup * Remove pandas stubs dep * Set 3.9 to min python version * Fix indexing bug for MKI * Add more tests of MKI index issues * Update example ratio study notebook * Fix rst table format * Set notebook execution timeout * Update doc references * Fix minor doc issues * Remove 3.8 from tox env list * Bump release date * Remove unnecessary typing * Deduplicate PRD code * Scope test to session * Join check_inputs messages with newlines * Fix ruff errors * Scope seed fixture to each test --- .github/workflows/python-package.yaml | 2 +- .github/workflows/test-coverage.yaml | 4 +- CITATION.cff | 4 +- README.md | 2 +- assesspy/__init__.py | 14 +- assesspy/ci.py | 156 +++---- assesspy/data/ccao_sample.parquet | Bin 0 -> 22430 bytes assesspy/data/mki_ki.csv | 30 -- assesspy/data/quintos_sample.csv | 31 ++ assesspy/data/ratios_sample.parquet | Bin 31633 -> 0 bytes assesspy/formulas.py | 299 -------------- assesspy/load_data.py | 61 ++- assesspy/metrics.py | 382 ++++++++++++++++++ assesspy/outliers.py | 124 +++--- assesspy/sales_chasing.py | 149 ++++--- assesspy/tests/conftest.py | 83 ++++ assesspy/tests/test_ci.py | 160 +++----- assesspy/tests/test_formulas.py | 254 ------------ assesspy/tests/test_metrics.py | 48 +++ assesspy/tests/test_outliers.py | 148 ++++--- assesspy/tests/test_sales_chasing.py | 130 +++--- assesspy/utils.py | 37 +- docs/source/ccao_sample.rst | 5 + docs/source/ci.rst | 14 +- docs/source/cod.rst | 2 + docs/source/conf.py | 1 + docs/source/index.rst | 6 +- docs/source/ki.rst | 4 +- docs/source/mki.rst | 5 +- docs/source/mki_ki.rst | 5 - .../notebooks/example-ratio-study.ipynb | 354 ++++++++-------- docs/source/outliers.rst | 6 +- docs/source/prb.rst | 2 + docs/source/prd.rst | 2 + docs/source/quintos_sample.rst | 5 + docs/source/ratios_sample.rst | 5 - docs/source/reference.rst | 35 +- docs/source/sales_chasing.rst | 2 +- pyproject.toml | 12 +- 39 files changed, 1285 insertions(+), 1298 deletions(-) create mode 100644 assesspy/data/ccao_sample.parquet delete mode 100644 assesspy/data/mki_ki.csv create mode 100644 assesspy/data/quintos_sample.csv delete mode 100644 assesspy/data/ratios_sample.parquet delete mode 100644 assesspy/formulas.py create mode 100644 assesspy/metrics.py create mode 100644 assesspy/tests/conftest.py delete mode 100644 assesspy/tests/test_formulas.py create mode 100644 assesspy/tests/test_metrics.py create mode 100644 docs/source/ccao_sample.rst delete mode 100644 docs/source/mki_ki.rst create mode 100644 docs/source/quintos_sample.rst delete mode 100644 docs/source/ratios_sample.rst diff --git a/.github/workflows/python-package.yaml b/.github/workflows/python-package.yaml index bef08f1..8cc14f1 100644 --- a/.github/workflows/python-package.yaml +++ b/.github/workflows/python-package.yaml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 5b33b33..e4de8cf 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout @@ -35,7 +35,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies - run: uv pip install .[test] + run: uv pip install .[dev] - name: Run pytest run: | diff --git a/CITATION.cff b/CITATION.cff index 7040773..59e3278 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,6 +2,6 @@ message: "If you use this software, please cite it as below." authors: - family-names: "Cook County Assessor's Office" title: "AssessPy" -version: 1.2.0 -date-released: 2022-11-14 +version: 2.0.0 +date-released: 2024-11-25 url: https://github.com/ccao-data/assesspy diff --git a/README.md b/README.md index f2cb8ea..1c8a038 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![test-coverage](https://github.com/ccao-data/assesspy/actions/workflows/test-coverage.yaml/badge.svg)](https://github.com/ccao-data/assesspy/actions/workflows/test-coverage.yaml) [![pre-commit](https://github.com/ccao-data/assesspy/actions/workflows/pre-commit.yaml/badge.svg)](https://github.com/ccao-data/assesspy/actions/workflows/pre-commit.yaml) -Assesspy is a software package for Python developed by the Cook County Assessor’s (CCAO) +AssessPy is a software package for Python developed by the Cook County Assessor’s (CCAO) Data Department. It contains many of the functions necessary to perform a standard [sales ratio study](https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf). diff --git a/assesspy/__init__.py b/assesspy/__init__.py index 785ebc6..0284e20 100644 --- a/assesspy/__init__.py +++ b/assesspy/__init__.py @@ -1,9 +1,11 @@ from .ci import ( boot_ci, cod_ci, + prb_ci, prd_ci, ) -from .formulas import ( +from .load_data import ccao_sample, quintos_sample +from .metrics import ( cod, cod_met, ki, @@ -14,11 +16,5 @@ prd, prd_met, ) -from .load_data import ratios_sample -from .outliers import ( - iqr_outlier, - is_outlier, - quantile_outlier, -) -from .sales_chasing import detect_chasing -from .utils import check_inputs +from .outliers import is_outlier +from .sales_chasing import is_sales_chased diff --git a/assesspy/ci.py b/assesspy/ci.py index b2394c6..6c81938 100644 --- a/assesspy/ci.py +++ b/assesspy/ci.py @@ -1,38 +1,46 @@ -# Import necessary libraries +from typing import Union + import pandas as pd -from pandas.api.types import is_numeric_dtype -from .formulas import cod, prd +from .metrics import _calculate_prb, cod, prd from .utils import check_inputs -def boot_ci(fun, nboot=100, alpha=0.05, **kwargs): +def boot_ci( + fun, + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], + nboot: int = 1000, + alpha: float = 0.05, +) -> tuple[float, float]: """ Calculate the non-parametric bootstrap confidence interval - for a given numeric input and a chosen function. + for a given set of numeric values and a chosen function. :param fun: - Function to bootstrap. Must return a single value. + Function to bootstrap. Must return a single float value. + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. :param nboot: - Default 100. Number of iterations to use to estimate + Default 1000. Number of iterations to use to estimate the output statistic confidence interval. :param alpha: - Default 0.05. Numeric value indicating the confidence + Default 0.05. Float value indicating the confidence interval to return. 0.05 will return the 95% confidence interval. - :param kwargs: - Arguments passed on to ``fun``. :type fun: function + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values :type nboot: int :type alpha: float - :type kwargs: numeric - - .. note:: - Input function should require 1 argument or be ``assesspy.prd()``. :return: - A two-long list of floats containing the bootstrapped confidence - interval of the input vector(s). - :rtype: list[float] + A tuple of floats containing the bootstrapped confidence + interval of the input values. + :rtype: tuple[float, float] :Example: @@ -43,69 +51,77 @@ def boot_ci(fun, nboot=100, alpha=0.05, **kwargs): ap.boot_ci( ap.prd, - assessed = ap.ratios_sample().assessed, - sale_price = ap.ratios_sample().sale_price, - nboot = 100 - ) + estimate = ap.ccao_sample().estimate, + sale_price = ap.ccao_sample().sale_price, + nboot = 1000 + ) """ + if nboot <= 0: + raise ValueError("'nboot' must be a positive integer greater than 0.") + check_inputs(estimate, sale_price) + df = pd.DataFrame({"estimate": estimate, "sale_price": sale_price}) + n: int = df.size - # Make sure prd is passed arguments in correct order - if fun.__name__ == "prd" and set(["assessed", "sale_price"]).issubset( - kwargs.keys() - ): - kwargs = (kwargs["assessed"], kwargs["sale_price"]) - elif fun.__name__ == "prd" and not set( - ["assessed", "sale_price"] - ).issubset(kwargs.keys()): - raise Exception( - "PRD function expects argurments 'assessed' and 'sale_price'." - ) - else: - kwargs = tuple(kwargs.values()) + # Take a random sample of input, with the same number of rows as input, + # with replacement + ests = pd.Series(index=range(nboot), dtype=float) + for i in range(nboot): + sample = df.sample(n=n, replace=True) + ests[i] = fun(sample.iloc[:, 0], sample.iloc[:, 1]) - check_inputs(kwargs) # Input checking and error handling + ci = (ests.quantile(alpha / 2), ests.quantile(1 - alpha / 2)) - num_kwargs = len(kwargs) - kwargs = pd.DataFrame(kwargs).T - n = len(kwargs) + return ci - # Check that the input function returns a numeric vector - out = ( - fun(kwargs.iloc[:, 0]) - if num_kwargs < 2 - else fun(kwargs.iloc[:, 0], kwargs.iloc[:, 1]) + +def cod_ci( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], + nboot: int = 1000, + alpha: float = 0.05, +) -> tuple[float, float]: + """ + Calculate the non-parametric bootstrap confidence interval for COD. + + See also: + :func:`boot_ci` + """ + return boot_ci( + cod, estimate=estimate, sale_price=sale_price, nboot=nboot, alpha=alpha ) - if not is_numeric_dtype(out): - raise Exception("Input function outputs non-numeric datatype.") - ests = [] - # Take a random sample of input, with the same number of rows as input, - # with replacement. - for i in list(range(1, nboot)): - sample = kwargs.sample(n=n, replace=True) - if fun.__name__ == "cod" or num_kwargs == 1: - ests.append(fun(sample.iloc[:, 0])) - elif fun.__name__ == "prd": - ests.append(fun(sample.iloc[:, 0], sample.iloc[:, 1])) - else: - raise Exception( - "Input function should require 1 argument or be assesspy.prd." - ) - - ests = pd.Series(ests) - - ci = [ests.quantile(alpha / 2), ests.quantile(1 - alpha / 2)] +def prd_ci( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], + nboot: int = 1000, + alpha: float = 0.05, +) -> tuple[float, float]: + """ + Calculate the non-parametric bootstrap confidence interval for PRD. - return ci + See also: + :func:`boot_ci` + """ + return boot_ci( + prd, estimate=estimate, sale_price=sale_price, nboot=nboot, alpha=alpha + ) -# Formula specific bootstrapping functions -def cod_ci(ratio, nboot=100, alpha=0.05): - return boot_ci(cod, ratio=ratio, nboot=nboot, alpha=alpha) +def prb_ci( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], + nboot: int = 1000, + alpha: float = 0.05, +) -> tuple[float, float]: + """ + Calculate the closed-form confidence interval for PRB. Unlike COD and PRB, + this does not use bootstrapping. + See also: + :func:`boot_ci` + """ + prb_model = _calculate_prb(estimate, sale_price) + prb_ci = prb_model.conf_int(alpha=alpha)[0].tolist() -def prd_ci(assessed, sale_price, nboot=100, alpha=0.05): - return boot_ci( - prd, assessed=assessed, sale_price=sale_price, nboot=nboot, alpha=alpha - ) + return prb_ci[0], prb_ci[1] diff --git a/assesspy/data/ccao_sample.parquet b/assesspy/data/ccao_sample.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4152fe6eac2df14a52d3736650d6f9b075649524 GIT binary patch literal 22430 zcmch9d013s|M#G0^vtjg!!`)WfDFti$ZWviIyW#fqiiZFZaFRlxPdE38e@t=Ze%WG zi)d+3nwXWen7E*rl~|^hnWJW6Ws+KICFlLzhjeCqp5GtudtL9(t2uM-<-4!h~ z8KLoK{HquFzcVA9OR;PdF~IPaM&&| zu%0g=XR|o2d!JbH!krGT=qXQ<(b2(iwKC#X$Z=b1q{I#UT0{NF?W5p#{9WUgBOKTK znvQ%ql;h^9qDeL2CY|>nZq;qIUjBrnaGZ7c5c2jHVB>Esnfn9BrDaExT?rsNiXn^l z;`>A|(l3|e?i`9HAD-{v_AQSQDl4}Zg_2%NIPSq64KahM^5k}7p%<9U^B|$SI4WK&U8v+7*hL_EnL66y%e09OpjQKt3A8ai>m8$unAxdv$0c zuYp0q{fGeAH%|*BXP1Jhfn5kuaon=EqsY8V99Ps5Ncw)jak^b{at#(Je>#$=GC6MP zEHz;aVE(=zZ<#&20?4gDI8O77hAi5{ai`sNJYjPk6B{8C8ntmn9{52A#P1p3gTQ|5n8N+bg>pS`ppSc_tJtj~g zX+M1Dj+RIUbKFa%YVv&rELkrh+0fF~w;viS;4zcF{>Ml?B)`Fr9P#0}H!t=eM#yuo zDu`%D!ZaJB$g?)c7amFaRz2j(-@j#~`svkQMP^Sg!6K|c_D(NP3_<| z{~gTZ(RIY~CckqjhOGPGKO@4hwUlpOTs?|KTCXxES2L9 zb`kS>x>X)xYGTKvON6b;{Nw#e0UTl73U^a0dudEplgxZ|xSZ^L4H5C75BaJFe9lmt zO4(D{!9oMO`P+D5F`M?1pHOKY`AjrPp2%@$UP~c6^EmGBZsCe-_HK+uQCzusc^L87 z4xJ?YW&GF!-(CCjyvMnDlg|TS!}1`Zn%(tEl%kY9_(vC^q(HtRlx!N$aTi{TBIn?u z6EEt??@z&7zGH>*x}hiZq;>;*p~;(^+S$P!n36ztZ|&etZt2h4{CZ0SdAB>(`*{Ix z! zC6mu^+>-cs67(m>rCd%R`?exK{G}u}Cm^r}1(GrF(zvtTh1u;RtTBFM!$A-nBPM?? zMEDL9lXp&W+;Kq=ul%c01L=;map$Uz>2V2sfFMw+flH}>Ee zKiFHLsARM5#+$O)zu(d2wX)s)q7+Tc-+x+Kl(vFE61xmqOY0%5WF>AP3LAU>n~|8H4o00k~2_B`H9|qKH0h`)YQm&UyKkc*$KDhCL_D*W`t0t9NZd1 zqJlWCWL{5GHR~NK+hI`Vm-vw6b(mY+JFh`GY*;Vy_hyb8&qXK%1+y-wNr?q|s|qHf zuQ@Jonw$*RA#J!Rg=S^i7&RG<#O;2(S6=JPt}SX({}T2D$jA$*Qr>OuM0gtC4IX4$ z0h{8hHQiTUJ=&Y75#UcV$qEU(?qOnHowC3gm^d@UYbKJgR;C|YSu@NL5$#ZZ`S91674sPW7B=XEvgl~gIV16Y;LCh~A z;YCWxA_P<1Q_;;zcG+LaLaSMzy=y#n2XUI}PF}Ne+>4rM#C!EtcWEi>abZw2+19Fz zJ1-!Xz7XK0Xj67U;BQ*ubr#lpHi}gI+QAK56hSf@K)SY{&}e?wS4)09g!=ziIN68H zvS(1RsmXll??Gg66UUva_alj8JGk;&UGfSdAAX%gerksZ1%wX5+kHdlz``Hz_f=UbACX&iHjw|8Gt&} zt-@YVRL77kNHpW;2$I?d9=}dW-aw$POz4?csUP!BAbI#Hs`s3D^6v4aMnQl=&7|F` zvMbMUF=Pg?|FSJYn9UArPPlfSoixL}S)#nO%!`!nfpFi-6y^eZu#B937IAu~Ct2_e zDv(S@sy+nu1O67)S|3jezeR?W`I0U0mPMnIi4uM}tln~8sUF})mX3hCydNUWR=$&M zByWBR{rntdvKP$y-9Y@%iH-j%hP;Qen|4(}(oqld_xhUd7X)^MlQowSIe}6#0IU0- zPbBIvguv56sanJud-N3MvX$@n30X!msNT%lb$(L0*zB$LCimfOugA)X*BpS2k_eT` z2cdfArzuKG`v3)SZ8 z&3@!8`k=vc|2B5{s)LJL=B+TZ-Tw+l3eTS)Q&`!q-}b7gR^Bu8AadyCR1;HC$qt_4 zv7@mvn$-|p4&46117kD7a@g`HzIuC4i6=vjcW{IEdkBluOCyzJ=mS80kVFpeg>;*4 z8@;R|_SU;@LOFA0+X*|{G`d?wExS235XtV+O_`82zkE$a`XItp&kQ1OqY)Z2Uvf=u zp0P24yz&yqT~5}L=RzSuWURu#3~mT$6Pve+gGg-z$1R8%K#p8M0hla9K<#P2$j<6hWvI;lxC>eT7BcpcaI)ef*e{_cd2uy7;DRT~MXOZQrV!f9 z>wXU+-H^=&uMH(Xo#(iuqIi=`nbeX%=KX+_Fw&4$8|f|eCoA^D5N}kqcvzeJ5U*Ab z<+>>$WM3lyTY5_cBJ-`r9^|9f(G&a@PTUW0-0fe;4gpi57S+gBD`LnLF=uLWYiFA~ zw9*yxb%ZN)4XMOSOL)HI>lz zzCH?@hjry0qgRuAWNTy?->VOvrK%~ImZ&1rK>Xah67s@WgxpDQGJQTAZ+kCf$Ac9; zh3)LyHobyXUKh#u);~=lF`1Qdnl9uEIA~gLiOIl@$?)6J$Q-P{(e821TShLon#V+i zl8lobT#7P^+~0^KHeV>L@v9y5kFnprZH@9jjA2Nd>ylViBa`c|Xk)vsiZa!)`-6H2 zZRzwduoEp=pR!o;(k8TyBkveVbxg&LM6wyVD#g7wS$bcY__{y&2958%8?n;!V&$oi zgk&MSFm{|lSUNL1s4pqEAwT|(s9MZ%o?rAM;ZybZ9VI<%? z;PAn5sMcfiU=?_e>Ap)(vXE{jJ<^&QSG+q@&qwqll@5?V$}N?f8u3;7hjO0;s(})3TxR^NpJLOO+i|8mvQ3-(mGF% zWCZqzjMLCL=NXK-=37$=D}U`AYN)Oc27j4uvj@f z%RoYgAf@Dsb{JW%LLy~7nZWTUtTFBFLF7yuj+`-G#7p4AW{JcKnMZ9CQcsfyyQe5Z zXo#G2r8{vS09P{yG_o;q)iFc~GjF}|yK!=H%f87mWKw^SeOp2L4#x@ROdPQx_AY#? zBJu4V-1T8GWa3O%=I`#L6mokH=~ZDu1PMf<{bPrYP!rE7!p}$!=GG!pL>xn25fn0fMRUcEM^59*T*bj7YTUUE2 z)XLRsdXYclkVk`M@F6zqfmm3|uKS^DMN@mq)#XX#EE2(oC)FerT}xVmFDXH4iW}UE zxEH}}=N}sB@!;V`4VeJD>qNtKWGZ-#_K7B@7dUQ*E|FaR2>N_KoQ#1>_IgcDw)RCu z`#KC+;!>fT z5*_sF+ALtBS4f0SY?@iJgJl<-SLfBSB@*8}BOBMmC>s67FVzv-PXP@+UNSFbpXZ~= zuM#-mCsD#mV&1lU?NCkn}? z{Jt-VL7vXLaPMKvfI-0UTpK&L>zwhLILI?~ zzU;hR;llGw(LGh0S+_>Rx9%^;^d*hBdMaNOrf6rw`ibC|${D`VM6E*qwIN1o6ti!~ zMxa;nep@Q7^J2%$3x-R0H-$IXv5D%i9cn}LKe&|p7|t?BN47lZkV?c8`sH%miDotV z8tG=wUo3y>Pdua`m)qHQ`;)v%_R=Z#T3coJ=1AgMjObq_CpCv*o5>22f}DD=(uW*| zTOH}AsHlnLHhK|HWYj(J0R`@?R-9BJGmov+lX+!V)8 ztJ>46$usx(J7^d#hmnUm^j81$B-21Aupn64=IxQzD*`3U&pb6hoXkKR-&)3!ZBXGP zUoUB~ggsu^Q`o@XYLH25Ma<^=0i`~SY(;=t%${HAD{W_4t_}X_ck3?|VT1uq5k#<)`yv*JO(W842T{t4tPejVxkkz8Kq#d38y#POxfb|ZKQwSK{ z-T=FQjaaLP0{Oj6e|aRC?PU* zoBee!TYL<1Bq9$FB^9{#AIbJ zct_@`**%~3+EJ(6db0~rzlJ>bV%G{Qd(cB5v?`7L-H90?7_?W@T+0?U1V{@4m2d8e zB2jzc$^Mafb~d=LVn<_S(PT0CX)T)ZuTRwJxho-L6|xW4LrVtyW@(ij@B{XUy`v>d5jfT}b~Mxc2KFV3)BQyu+n+B32jJS6E$Vt>cVWdckkL6A^k-dg^W`<3q`zQugmJ!*|rO-8U=pjLPVf0p!#YY*;VL z%iSiCTHPasQ6$d;PPf<#%8Q=lr%b=)ONdb!Es!An! zcoI>o3TTrm?LH5Uf4m5Onj|>Ru<=_oQkj@NI44M0?6Kz)Z{n3Jj!g3pAU)wblYFX* z+4=6#(rkC-p3l8VAwjsV?p7gSgKtGhi#^!#SN^J~VI|wcg*63Ve;7@cO3+do`;c!k zLGE24@wIW>z(TD;&Du196}fD8gG8F^&z3(36t*buEsY@u&OxvOp|p`@qkBdoc!y7j zZ*FIc5^&YSp3?U;NtESnekAMPaGo!au?Y>kdlg`5>$b0d^I;MNS!WMRaU5v0)X)6+v z!h;;=yR@H3&ODGxTP5t^=|KuXr27vFQmugYIs&X>_TArfLOEN$+qa^wpnQQJe`hy1 zBb0oHMrFJ-l0^R0!QDCCXGaTr{WE{iV_$koBfQUEpP@kceQvQDI*Hp8rD*f;9_C5B zYW&RQwc+GdI8<2nKSp|iAGpKcbf3NRtV(fSx%mwl5uy6Lx>zffh?uyGqHMO-93pIB z2KTSBo7+z-$WZPfmvxjSJ+I(SXt-EmEJ7b zs2A3Hw7lMjglC(5GsDT|d$@s-dy{=rQ2d$hkSyXkwX~R3hJD_Pw5>)%lM-HGW7DQ1 z4YBK1L@Bb(DYs%s+F$69_r;LRO(;2w<)#Ajr9?G3j;u1NdpLhb81{R&3M&(sUSGG@b4F|;O5|bn*Zmg7CZ^uDpKsfQT`!ikV2DFNq_B2a@^2OZ-qMiU3JS~y7 zvFyBU5j*T2oST+hZt}Qv^0v{f+GA3BAn|JQV$UHAY;0uzK?8{O49bCffYOh(UyP)Y zzV)F`p2Ym-C$VJqCFH$pVdVCDG)Yzd*CgzC4~4XnWj{U=g`$3LpMDwIk zT}k98P~(eUFp*i7clZB0pn+Fa$_TFo4R=D z?M+=Jsq#aNUyk*VP&TdhP`8lz_Cwv}%jJjJ$PL!RBIO&khs81P*$=y`kIIjD=+9V> zcp84JJ>oUsuKkF&kqJL4N%fn2)W;mT?WiK=X}GdFB{S)#SC(b=`+3)q%>KW9(lu+q?ax!P2LAc| zvaCUW{dy#8aK}GgvyELmQ?p68(B;|5;@G3vDP9Aa;i=M$)ZwOpiOYu%=`ri*@U)&w zm=WfPHK`*kQLio^F|_yYqa%j(J;01i*M5;YG9%&Q@{yVSZXO+(HRvIelb!5o${9W+ zv?6E3(Aegjk(mR9qjE-Mm`05nJ+Wfc=y9`}M~!)EiEwo8lr^T&W1oJtV)VG_yPHRk zpM5|$X2RSrOk*Z4yjU^jsj{2RVeqH8ae^enqkzrW`*H$S}BbMfyd62q1(`s3S@uxB6K*dJE@@WH(>o154m zUn=ZTDt}JYyHUQ(v)_IBa-X4w@CyHNrQs`rrZ)6YYactU0pli&$g@UjNMc}R;LCO{`i79s{Y6I=?OP}d@*~7 z>e_~!(S_GG=0072ZPSE>H?D2Au2B6{Gi_7hPcIehtp91t%nxq-^m6er)%92ApDn!p zYUxk)*I!%w=Z))IZQXje)h-{BUAJvz#Fo14tF+(My}mBFNBtWwju==L}I;fQqnuD!=2Uf%WYN$rJQ`%WbX?|$#=5#x8ie{RalyFa)v_rmV|m!1oLr}66g z{u4g@>9tj}Kl=Hdle0hm?Nj%nPi}uXtmwd>KddVH^snDe79H&9a4&A^;+N&@H6n#GDc+YZ=xh)av(&wIt+PZq~XTA4)KKEqb zgC6rf*PcqB_eH{`)$_jWckA2Zo$B1uxNmW$AN3|8++Hhx#+ahpoAD zCTGm7KfcM$-~Gqg2_-lGIA>iM`{(&-FK2)7ZT{=8e(>FleHTBtP;?}8|M&C0p1A*F z$<(>E#t+YfG;@ANOVH)wP4Y7X7$BD|6AcO_SCx`e{qi zmy52yR_;~SwtZb@*^QlB*OvYK)}AlRetGAh*E7Gqdn)spn;%?S`^;}2-TLyGTc2`X zi`x%-XD$BySlGG&^d~ccAZoqMEyQ0O6ffu|36imS^X3=Mnmj+xze~VQ?|>_7+yb7r zxka6y5yk%B{FHb$O=0ld(2*!)J#5cEG`gwT+7}{466T#i%-4R^7f<|I0d9nhks>+U z@^`FI!Zrlb`!Cy9ZgfkG98Y7U9?zx;{+_Hrh_~u~F?6=X9Z&hL9wL@G>kAx$N>7#v zZO4yb*`Flfrw(~uI5ya_q*$x<66LaWC(XbwKdZxpy+veW+HZ*P+vraEyNE1EjoEfw zX|PR!oY2bJd_z!@tXD+^Ol`ZUjj8RT5-<%rMU)-<*b8>z0noS<7>wP5a9J*RAgX3; z8jJ;%y?`AaDK9lQOUFf~Vk46rK%hgR#Y$gDtkK2w;*7 z5@j>So}yx=X^_gs2rfXvhG5ZoMzY7VP4Gpmoq4aD(8v_Oq-tUW(=EXEmly=w_w@rF z^)*owtK6#v9{D6yF=J~|30T|q2e{&{ovbnd8=$IlRRydyMJ~U9MWSV|a$(iaIDmx4P!xY)Wz;kQbVCYbAz#Dq2@-pIif2{?y8jb=vM1j4R zdqEer1{Z%wbNWK6Lhrw3y<>}yXJH8Ivw z3k!7=Dw)!*a4nm!$jr|FG8w<8)wWyeU=B+G({c~mM_y{#$GIV7q@QjoyMYP}RRg1-4%(a7BY*>WF<>yZPdgSb4c-o~hH zkWI1&im+**b|qu6GSv@6m5lupSg*myVn&?aL7*3Sv!Y_b7E2A&zOUZQ)QyAYWq(`j zjQR_xt>9C*w!9H<4I_bCGROkqZq+NPDwRxoByiQ`SR_nCJa}wfQiu#!-d$*8%p;(q zruRknA?vJ0LC+fors)e2G_ff*TM$xZWduS?D`Ol0oab*s+17JVlShrHmie_01kEjh zJ>ZLAd@rnERKqg%V3n0okEV=9LXL)bn6_Aaf=UV4vs);w>!My=@jbV>Lth>=_H(p}q?FWi8T) z;O0HWd8Ss56lCio0^1*|VBJgLPJl$h-0dbZGx95ib&TPaLIKnM99+=251MZpeZxqD z5EmfA6xgZddr>wM{wf55hqfVEgp1??Uu1;4dnrq8&`;AZP=Vk9ysUT&^~;2+0>)Nd zSVN`HW!9cjA$bX_jEvx>1wP#jWmHarFOB};1nklbA#BT`zNQIaOK`a!%J7L*3$8;I z*1488>Iwk8psHjld(UbYd?;#UY~ulL+zmUxqR>Xd3E((Zg>=wORKUnXMKz333R_BE z7s;9S({KcN8aP4JAZ<*B&fCUAElqM5xOD!_Cc&S`m&T3QR{TsrwuoUrfF7*f5#8A> zh2X+1pp<3adtj-gNwJi2mCLl}z?|);GwgyQ1ZArfHk7A{)J(BI>|tMxOwv$^9qMnZ zTB!F*7&Zwiw55OpyEpa;(r*}8!G*s}wM?S~i9leihbcZP%w@7a5fvljW#A@W{WX5e z#cwW1y0(ujYDWD#gtsR_$Hsk@V#fN0rJDM_nXxILo|Z%~F|)FeW!nC%vNQI3g>c>O zP=3=rtlCnj8V>|iidfsgKj7{5STMf)2CQqJ4NW$j#JX}eas!H@8p+UNhH?t!sM-B- zIcm$gS;{=rpnAMG9NH0tTC7a#SU9(>2`bN~X2_ljG^x8FrnwmQY)C^LM4oMAlo5sC z{b5xb)0$U*z7xN34oq0Nq^go>`XIpI&eZr@1dMGv)Kgaq^9?#`X+h3X!F+R}M*C_+ zszi=3JP6OTbq{C~G3En>_usKE zl}u|S1h%~wAYrMe*D`jK{}3Ai5lSa9=%nQ7bu_G_+q$_0ZgcE;wW zLPJuH#o7_bA+nRIYGztOp_!@Od_n+f0cH^+cnZNS9|^xxU#eOpR4Oq*b2s=v@u@Zp>^XLjtO>+_9#yJ732WwC2 zC9Gx)=PeNMslo=PHoqRy)Ki3C0T-V`kc#az14`gbo{a)GWFoVrP{|nI1D}#E&_Q;M zsuEpey0DbV{_chi^&8Y1=fUZXrK%Rjm>_ClCJeOH(zXkp7X_!1d~7i=i}$FYoU=$o zg4`PgY-2|^)cS_c0Nk)k)yOocfI*!~JyKQ$gZzd-fca~f`;CyoHc%vI%xg~|k1SNF z8T;T9)S4$dLoC|bFjBn0F|JZgU6PM3YaD>kf=of zxPKI>yLFibQt7H#DxZ~6FTzEtE#>YG=yRWEfnYV1m>nBnb%&z?Sg_Rsx^s~Q><_DI zV$>@l_`eypZ4dhI7cK()Lp~ugmc9u>+l}N8d`z-AM@9VA%TV>{<-S=)p9;So>BQtedcsmuz_^)* zOo2}=c3Z(r{!#wK{Yy8WqBcQVh_zMz=o03W>f@5KUk{~)J^PQFS^k_yvN?Q3$aCh1 z=VK0=yY$3O471L-y{qhnyWx+oW4?MWYVOR#mgv_O-|nj1Rw+G%JDGQvs@|+!|9tP9 zU5AJEc_+JF9J9AY>mK|5`P(hAKDMR9`hHZ`;2w9N?()gFrlzH->cbu>>6&99%hI)< z#rV4GzG$+4u4`Q^^$hw-zP+1xu37CF|4pv$Wc>O2>(?Y)DE@j);-w8;G7Vvsy0!Wr zH~4mmT~IU6tKW^feQUe_`rg$qd)#XBJ=OhojoN!)z?rX?54>mh{rB$X)BC*+4HgKO z+l*b~NjB*to0cK#9=@SE*np#y^!iVJm&?|f(bc-Ar~53(}(m}u-C^kmCpjv2#e}{zmXX`USBsdi&Xge z`V{zn<2~ZC#(&hPvHr(K<>u}9X0(sKY1Ej_tv`M<#+)J8)P3@jKbGcB{U#ZALEBn0 zW%=9Q&lxwC?AX+2#bK5$~fnY?oo_t`fF#->(pKc{`> z_ix6yOCS6ZXdQi1Qez$G8I`TMBpF>Z`TB&p=SP;NUpr~8RP2-mtdVaAH};JTKDb*j z!TXTOT3vi(xI8)V=&K`NnR7h;3NtskdHsaQ&mO#SeD3GF6+`EJ8UNM!dC4U3)%gd~ z(s_{&yxNncKES~v>yAu55H2mxw_Q8RIx>NvdVh?T9@Zl>uaY3_gQONC+hOa z;VG*IR$ZAA8h2Qk7OMH|yv8!|AtdfnKErBywQ-Rigc_N@(%j6LA_z&vG6v7LRvv~$Wd-<$!(ZwxYD z*$`P8d(^aX{jhPbuMz2$Cvf3G|3%$xcDis8&zn3mZ&LBx!m0mWxom$bU_)xuM|<6( z>`UEn=R!eT6XgGpHvj9Li`G9OonFP@f7}sd6#-RdUWq(sZeiYBirBPp^6Yu@i)Q1N zW_=g`EnS>0$P)xg(c`@`_R&rNRk^iS{(BkjW;wh3SlF2XT@v11IO*xD{wszYO}A_A ze^|6pdq*`z=JDL7G`C(wQbE{_@$s!0Vp5cIqnwn zKZ*6kMMOUaXH7CnSm!d?KVXsI_8)Mk6+8)%fV~F^CO1yz4OsS_XXr zKG>&E5*vWmkcf5gX-L9)683{`k_Lao0sKjjDM_yuCxP#zc+69+tJPF_Y6I0E)TV~6 zG-@bIqlSj@pbJ!|YA~cxC#l664K;&Ci=l?DYUqL{5nFh@YoH7bZ!C=gm^C~E6rjLu%pb*qGsSP#Ma2f+;+(1{8 zU{g&J9VSsiNfb*G`~eR@BvFS?g8sB>4SZG&W8*=GFtt_<`)R2?@PG%a6Zr>S0CA{U z3y+7Wcwis3mKKVE@wC7U$A=Bzn_`+#=Rq(qM8kofr5roGBWxbc)9`up4rGttp*hF9 z(;isz@jSxu@jLYK?>hvhOm-s00BUs%;Bk(D10LIGTt9;01Kcrq%oa35L;mrKgL$m* zNr0D@rk=;_)4^jt>A;B{K2bcz=1`Z8PaTix&>Z44c&q_zgX=n0;6;uBV$x~gBm&Jm zjuoUJr-74jG$}b&kewb6Xqs{isN*~mfhI1;02$0_;DnmyG{=gwV z5gnQU9RtLdV}Oj{G;ktHljP$Snk^jzxUFM=+~pV`a2x}K*W&?At&Rb5pkv@nhh|;J zinBa4yd5hJuSTXtR6ky!DcUhWVsRQcp{7Z|v4S-27|;-Zq&J$_od!-yq^aI%#Ys|{ z{2ePuMUH{9htRsU?(WR6EvaQFB^Dj&~ZlV4?NS zY0Z(LC>B&er!_~qpldXzJLtLc;)n_Cq~+4Flh#eA5elf&h;~Ykanh3NG;(+a#fc*8 zwC3<;x<;EJ2Np-@(=`-gr!`b$$B33@r;#Jk0ge`Hr!`bN=aI7}XdQQ2LqT^MIr1{a z>B1+i^^Tpi=sS%ZaSS-x1~{#uA8?FlW8ge;#zI>Kr!_}t11I03KGqrSBAj+Q(j9iv zzQVCa8w{tBi|n-Ja9Ts};WTnYGH}vv#IZ)(5~q=i@6txaY0brxY3t&&=13fr0_`Ur z%SXE!rx9&49BYp1063@ANJoyOM@KGRKzktv7TOd!ja+upHpywtMF!e9Ijy0CavC`r z22kgFsz+RRHdl_3BLzdTe7ohe=46M?2F$VM%u8oW=2)Y>nbXJxi?en++qTC$omwLx z;2Sx|nxjXfYyYLSn>usyyw29nLA+>I;e0ed&;jiO<7xZp)D9-09qc=K>ioi@+59Hz z*@#_ifAnz%dmPmU@uNR4c|-{9s^bFW@uW`da*+QIJ|4Rgz$ee2Keuqog867p|J7Wg zcxuti1+!*j0t^Xc9%RIbtTFvM5Aw2Km2vc+IzO8bG8`ZJZx|tZlAKS55Ry+)^7*;s zWS8|4I%#^mo;EzIbBrgCgLQ*rZKRG#Ndd9Gf>0`?>a>mA| zOgF`6mgVcSNRcLG=IBu)3f20Q>7&Xrh8HdR_q#M@`cyr}rCF(?hGpbT#b*ZK@m`ic zEHiaDKBIsnf!RH7Gpgf^EAtd8a$U~4le@FpkHPIZyAfQUWRv!Gk`BclUY(!mO0&o zF_kMrpOIfwmX%6*r2Gt5V_lt@171_7mSL;|eRWnU_)eVxdZW}CB}JOd9MiDOoUvFR zr3L?*%rZKj0scqDW4{)(AnS0@AFIKBZRYeDRCe$`R*!XEMk?0JEL3M@Sl4BgK|f`q z=vYg~R8Lu{1n~K#(1$)_I>Fci`SKUR2KtOLIv$Sw`IH}hX6jfv*3mKW>oapQFrEr| z^QnCLEbyC^I)lPf{^B!oEU*EkKcfum@sJ(kQQ&`uhOSe7VV@Fen~YQfJf@Ni=!Dt~ z>zK1pTa{3|VonX7FgF(bVvhPv4vEj8b2%pPWGc`JSO6Nv(?#rQmj5*j4b3D(PIm0Q>yx$HM2>|J4_T;q$LE9X=O3({Z&4VE7#EEDxWropsQe zyPf6XW7GwP#(^sg%_S}{)Ms2^Xx?{$;cI{k46O$)Ff@0%z|cD3OsBJEIK%Mu!x@IJ zC9W{EuDHO^8sh>(>y0Z6jaz3Jz79FV@HNQ=hR?GuFtk>=!k})s!q6J#0z>PCGYntb zTwwS*=M2NwJQokQLbcb#GQ8te?i*JD>0TAN*9Xq|S2LCtoBq4nDZhSqWy7+Tk#fT20lDdzZk?+Qcf zpp&jU`v4c1C;gi54_skruiygnq#y7-gbNJcPdLNyy@fMxe4pV8LwgPv7}|fh!q8si zu{^Z?@_otUJ+vlv_9&0>(AwJBueieTz2u|()CouX7*{ygywB64eajQ{Xs`1GTxY-h zZ+_(AXbIpd7SGm%2&B>HczTbKR zj`m(pz|lU;g&yCNxx%5gJptF*tGUAQ=LQ!$@I9Ohz0Q8l6^{0HPtc=%o+}*Rqq)M- z{_hDm+6z7b$Ddao)lsS!zDIP0bFD?x4xPQD3q8J%d;*U4Y%cUV`%70i+G{=mNBd4! zxc`)o??+wWI(t(WxXwP+g&yCtJ^@GjR~NX>Ue*PU?`vJ*_#XEOINI;J!u^NcJNsZ) zIC_?Hh2#5US9*M}>;l)>H@nc|d*~GA!!D;(c5JI7Hb-`~5yb@uu$aD3nILa*}-;0pJj^!T%Z zD;$4LaDnSQGq}*>&ks+)(X)gr9DlBGg>&h3X}-?tJa0JJ@6p-ADQ^EghdAj4XAOE* zbEfy`eBuJfpH*Dpp0wYiGmHxyf1YuLd(zJyopW5^p0poo2Y>!?mha!QkTYE8xyXgy z|9nQ8J|%5P*`qU(iwC9YR@}kLOGF{h7iq%te(-utC78Fk%lEk0+{s&slHJRcpm}VXUd_y(U(ndS;p-Y*S zxp0bk^!zEa`LmB2_h|gp*Hr#KEd!TeDfA``e>X_~(Mz+Gsq`uiFUiRx=p30rA3P9! zAtaMNrllq6rp(SPnw*>b^psgc=8xm=+lJG9xQ`=5oed=P`fYdyzhb77kFWMXh0-@? z7EDPS{q$7x(BiS$QG{fq1IT~{3q^tnFq=;R%HQMh$SD?(Nacxivd+JD&hz(sGpFiC z6i*p5L```DJnsF*((gxG1`1^I;>mO8EXbQ5JGHntcJkc0MJ4iud2{FC=2Wgp?5pmp W4jO>J5$gO;umFD}mW}^s-v0xQT!`}k literal 0 HcmV?d00001 diff --git a/assesspy/data/mki_ki.csv b/assesspy/data/mki_ki.csv deleted file mode 100644 index b09de43..0000000 --- a/assesspy/data/mki_ki.csv +++ /dev/null @@ -1,30 +0,0 @@ -"32900","37299" -"36000","40166" -"54000","56317" -"64500","66184" -"68000","69487" -"70000","71515" -"74000","75338" -"80000","81036" -"84900","85673" -"89000","85021" -"94250","90046" -"99000","94089" -"105900","100227" -"109000","103157" -"115000","108290" -"124500","117099" -"129900","115347" -"135000","119678" -"149000","131631" -"155800","137321" -"163500","143974" -"175000","153572" -"179000","148457" -"185600","153488" -"199900","165040" -"215000","176940" -"235000","192959" -"250000","180046" -"279000","200240" -"295000","211445" diff --git a/assesspy/data/quintos_sample.csv b/assesspy/data/quintos_sample.csv new file mode 100644 index 0000000..6c370c4 --- /dev/null +++ b/assesspy/data/quintos_sample.csv @@ -0,0 +1,31 @@ +estimate,sale_price +37299,32900 +40166,36000 +56317,54000 +66184,64500 +69487,68000 +71515,70000 +75338,74000 +81036,80000 +85673,84900 +85021,89000 +90046,94250 +94089,99000 +100227,105900 +103157,109000 +108290,115000 +117099,124500 +115347,129900 +119678,135000 +131631,149000 +137321,155800 +143974,163500 +153572,175000 +148457,179000 +153488,185600 +165040,199900 +176940,215000 +192959,235000 +180046,250000 +200240,279000 +211445,295000 diff --git a/assesspy/data/ratios_sample.parquet b/assesspy/data/ratios_sample.parquet deleted file mode 100644 index 9defc519c91232b8b31b88e5db90c0063e21b459..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 31633 zcmbTdcT`hf*De}lM|LP7gx*65BmxquSXUM`2nq@!_E-?Gfnvde1O)*_#Eum#DA+); zV2=d^D|Res0Fh2Yk)pyr6W{N<h96hCD=4t`8Y+8aWzF9l<_bPUa_tMEDNP56k~|T+E7%lyCqi4r6_6v9k1I@ zQ5J!&_~dVjO4Jw-ew|8DNA}oa_cn@h{A_@=zYbEVuSa4fkzz-%!)Tm5g`%djxcJ)` ziV9ldfPdVfs8M!~*fo`+=6$ANGhNtswJ~nqNl{I=U2yq&xUQ6qN6X>&NEZGOL{Tbs z4tRIhAQdxL15X+pq^Mj2tQ1XAIoWKi1onzeb#UinuykSb2Lr}NTsVob4B0!8(AIbjhPmFTGuMl-?42z6|FfuhW--RUI?uaq!XTy}Ag z3M$sb_FG_a4=p_9DMe`}8RD=baN9gRoOG3<<_|jKv9O6Q#|`h=PEokQ8CQVsy*I$O zB3_A54-dIwr{Hq46+5i$xcJUTic(wUfp1Nqs8_Gqc%w5#9h*4n9|9AM$6y+)esGaK z{*VMljUR?FpQ5&&x507WDJrI1ACJ6FQ7#uaxE>;qxY!!=CsS0?asehh!1z9$e~~G? zpoitZD9UN06JB3PQLoip@S~p;RX9NV2T4q!3RY5hGpt)U_jxZH2@Gs8!;61J;WXl1BMl@8;LFMfjtS@*ua~j?hjM_hm$&Whq}GsOpHQPh{?HuxhDbWXJ^?wAW)`ApI|`4cN# zaqd1~f>aZ~Ix|S!UEq!{<_uEL4v+m8@rH6sd_@I*AH#%oi(abJNs<*wi;(epJp9hYhEz;-0mTyaB5C(kqI3LNojaeI45a ztAJ>9)w$qe3yNCt)eFCx2qB4JV^s!(!n98~UrfgDH=>u5_eXHKX=G5hn@=$L=aNfA zC8?rg!<7<$di=|Ik|=%buoWQlGoq)FEF}}JglxF0O=pp`48w?W#W>q9T%0io76%Hj z{Y$uTW{1!blJAH;U^X8Rv{%QMhf~y!@(CEYLfua{$2UPp5>u4%nIs-(Ef)v$18H>m z(x~9$cK8I~tl6ZA?+PhOzS;o4Pl9XtnknIgob7L*9`#j#lf+RsZUF$Dz^zOh9A67L znDJNmtp)ILXQ#yS3QLXg^>MKGGi7>_df{$Ue1at9G=g4U9==BxpLU`s|G(q03M7Nd zD>IA%;6$gt!Xep|uQkRQAjjs{wD8GtimH3c=fb_EY`TyatT_@d2ejqouJkOj;U+t; zkwlvr++5z1XdRp|323&+&?kz_L0WVVo~&{hUQ;wkt-Hwwv`Af6X}kzUvhh0rC9z_} zKPkysZ|YM*YF1m)dFrz}IT#Dc9c`9$10Jfh$2Nu(wJy%gCyUf{Fv#=ZMXuArp1a_? zO(PfoQTmo;6MFPUwI`Sld_{_J?HeFk{BSQ#6B zrKnG)&iJA`RE;uod<4jW+|(gauFP~Qm6G!oyK~cc?-g$NoiSj0K|R8N)SqBLmynL{ zm_95rF^o)F|8+BHA^mL%tHA`F*MLzM?qKBug)fkTtJ|3h-oih5TSm^s- z;ZSWn+u?}&p`OIdGvVfv`dP-XDQ~<1Jzn&-!yWfR6`p4@0`Cl?sNw)Hf6>wzme>uH z6J;yo?mvT6$ez)7V;y8~kv~mz)P##gdqM52*?2wVlH*+4QXaYWwF-7flvW0Zd%28v-lj>h4*y0nFJjW^;_8$oz9J2KZj?!;;#>@{8zRfl`rFD=B zSZ|3Z7s1xKW9UNB6>VqS`T&amZwp)u!csWF*heb*{$~P4Qi}Rmpo2%v7^D*A!y=-r z2i}gxKNa9YRJ6qUU?K{c?ghKKbP_FO{%%9B50DIewxIwwxoe z5PTiQ0*e>HN`(Yo`3y*$BnQzyPP;gx&Az`jg@UHyr&5&;swQfyB{g<8koojqRhbz#KAQF`X@7kU}FfdN`SONrz4ntO*4ob(xC} zW^z)gR%oH-nVpR!y#mRQYWI8Xh^@fGZ}I zKUt+@@p?UsFb9BnB_&eQNVGsbuO!Wubiyv7!214vVJW0#K(fuh%B{J;4f{SBq|gm@ zdTe04H4jhhhdZy2#t$!pdj~p&OfiF$Us0lSh<7I{WTZ4~cuKDNK?i;OfJJ_9GoX{A zBlUc22Pw|qI02uBE@=8{Ry{|wbiXA&dWfQGJe~0tQ}Dssf$Kq_BE3dcQI4u1&b6ed zHTL81qc2bb<{Lml6&};#Hi|}Fw!>GBgO|<5_yA;fkex9-D=ql;Fudm+i1s53yzLf* z(A^C0%>WX7(ZJ!*D8)2#=@QYd4nsT~^bBR1;-6I%H9E%4$ACAw+#Sb#15F6=h{(0p zWb5K>Hy{+J)63PxrFNKEsm|NAzyueUz>Vc2*ffSnUSfoAor0dA-2$uKrKrvpoJS+p z2^iT18C-k3K$Y^5Dk;d)L1%V0Y5nnl12zKNc9rNU7@>6&E%6R0BTJ!|`3TX9e8Ivm z^lPLvI@?2o_dWNw&?yb@F1Dj9yv32XKw@p8D2*3muihj}p+!kx z;X?vX7y*YMwmH`O3~Ok?VJMe8i1_uD_9_=#aS#a3yzP{b$zqVtCR?&4O!9KJE6~R7 zy=4T8402?r6!D%6>SEg~Q0@-9M9@U$^L4Q!WU1yxT|Cqx$iHx)=IUpg(sM~~)(B|T zq=wGWS~||6vGX<5pU5FeIZV?0z7|x@@98AY1W`+M)@E0#k%bLxXgtl+jc{rTfH#|u zA7lc-G+gLnvMt)39xhU37~-S7VB|Lo?7tV{=RT62&vSfFV$IVOb>7sme1fEp)#4zQkPuq5i*0- z$nr9$lyXw@BHf2)|NSEeN9Iu!cbN-j=96huoB?(Ufr@l-B;MaiQ4~)bU%L<4V5@}d zP6C>d*3gPH)UlvNS4wckb8-O7A`-v+1GOQ=-A5+cpXP#YWT{=}1`BO2AI zYIV#MFv&toYkDkiN{|OO@dZ(dWaJ4+Duu-+H3 zr@;B;ogZ*lE&qFkPV^Q zT(L+~;ekb^z>rWYydePE-@O6}^(&>dGw@ zkeXV0Kw*eoEkP{5@38YJ;oa{h zvFz?3m6O5b3V0bit#FSc2&u6FJD((j`c>)i1A`I+z!m}YTg~nRg&jC7$I2}Cy1X!cN!puH0Ds>Z#D0Tqb zY-xBS6ce?5e7xNr0(Qh8FO9tM*pMsEXH0Ry3&B=fEnDofo1*euM&X)Ufaf&}JRLY` zb)189MnX}0YYsZ`-9eQbYag_ZhK=HZ@z-o%A5C((z=VtIOCr9I&EbF=o?!O9un~WClW{!8krxrp4{n_j+ z4N@Lz1Z~mxCVi}lz>0IG=`><%C%cMCmd+RjhoC}RL%gB@?BtH33wSw)xma}tK)BJ` zN64eRlZzMtw}z!#Y4b%@}oD4YfJsgF9{j(aY)U$m5aH zi6mD_zV00br%qOho>0}g^UWxHa$u11>+mm+iqSN|Hi@7_oo&L33y>QkOZ+1pSj#kF zi?zwP3l{V$BBqz$sN=HJ2^x526pJ$wm%#KXalJWLL7I@6jf0w zz;8iw3V)Mq1Gf682V7jEAg_$Y5ozT2S7d!-no6lP)`*4t-@(Dz4q%KU6ue%g zGX(0niME)T#!~m|Ho#1gW|VE16?UiUrr(@-fBQ&9(%i(f)t9L~Ua5#vRxsEL^y$64bdV~+E zL0Zljz(hxn!=7K_@ocu$zxm|6Wh|_rM)?FA+_9l3rMK=rLek6CH7}NjZnCfebev;d zTm8zJP`KGVam5`YDJ3?jl$u$TgE z-gLSRj!*~6ZDIm;w)1qjJTh^#HM^Wa?9kVslhUx5G|s16A_=N<(_Adf^>S%nVnx_S4nFWITIsRstJQ!u z!D_t1=S&=pAz3qqr_e}axg|SRT|M#WZ>Z`Fl6BI8o*nh}rY%llL1QVj!|x}Bdku-M_cWMaQQy`0yZWRLHD1m~jY>=Kf+HM54~oif|4R6)kL z!^nqx|70!8p&pt z>!|1K=@K$!dc1jSSy*!=Tsdh3TcDO?V8D)pQOH#{dOo2tCc2!wk#EoFvyP$T==&6< zoiv8QA^O?uN*0Nh7;WNj!EA1|H4w-mSJ0(9VagENuu^pOZia$fA zGK+1Et$z+uU9au(%E^YOy09I2Xs#2zk8D`Vg#x@~lK>ELEVO|ZUNb<0ovmY^m}`NL z0iVo+{|cE!EK)yD*Qbx{+RW!x*&jS@fEiGGj%|XW1dBm9Rx^UhT#*UAh(Kf0Wg^9E zs29`#74(?IX0O%Pj(SEXgx}ZiO2&pri4!NRg0!OF)CjUE6H9!6GlY zJ@)$zO>(h4o_ql6%_fddl>PTn0{jFdWuA%!W|I1(dB^Y+F`*w=5KqQ?XfqT9O}{jX z*L&9b-^6+OCueqvI%%uTf!Zr(bMdUDklTLNr7WfV6FkB%oo^s2dTfG8sK1Hh&GEZ> zI3F%y0d*W_uyKO|&MD(8*fL#0d6izJDxvW6kK*mU=z#^ZfqSo6>_(D|J7Jk8Q>T=i zag9{{`?F4=QWiNcP(LM${HSIN9XxOBgmGB>4$6R<9zY_iwjN6HoB^!}mgwvq2fX4t z$X&fT?%V_YQMztDi=3s-Wv7wk?MF6H%s;N>u%)_5SdjtVQ>rgcwkR`Lv~PZyVbtjp zWk$=c)|45?KDKybvijZpC#LZ~PCPN&)Lrw$TtZlubCPuylv|`(pDee`aIG!3%JR0X zu-+HCpn{vd=wyY>(bcsTwzJEqKb$KYjA)hznP1pV}2YwtQxP_1%JJ4kbT4 zlSkfGyxHM+pQfE5c*F|HaC*Yoo8kOS@HWHcl~8-9>)YUvoo*lVlP0-;UiN6xsBfE; zCwtUpcugMt^XS&eW125Knmo4sj&jhr&gWi1<9oht4Vv(~^-&NS{Hq)+9H!wNjE9>h z2YaeIJP!6^jwhygvnP2^@zI-;JjK^&`Qs^mW(mYpk>yVBss1*{lBZ4_aq;ogfRT5J zkU-}b-XW9RtCK?}k7;`x5;S3e2o3hs@ClvbYnl=|b)rLQXvpO8^l70}C;3d97B(kk zTG-6xrPHR*O`wN`FWBi5He>Ozl(3mgE|!MPT5*>?efFvsKGWx{txlOfcSBq0^m&^H z=;7im8ouH4x0!AWU$EVwEPP?+_~A1a?V02|BjUiEZ8H`hUS2jM@_54VnNcTq`p%3# zb8OqpCFd@d&0Kov?(kX5uDtM_wfuVZwplA~wUy0^xi>J}MC$=#l*!6c!*wRBD)=`{ z;$DpIHCg@If0XH(_cPX+uKg5!!*pHs`d-ucn)Fd->+27#GuzNqaKmh4+s$6HO&t}Z z%oEoC`n1k`bAQtf^TdJvUUP|(ss|^DZWPbi!WdD)*{U(7kCUu5(ZeD|cV@iBHp3+) z7OAEi`Yh5cn6|ZP)(4_$(|Kp|Yq#59Z>-%RDC5;-xPFML+v)K$zb$o9r8$eaOC3kFRQ*> zyzzaD@uiZwJ!5Cz{CRxGid)T>o~^jueormtPUp*jn7cjScEsHK-To}*{@|cmtaO-W zVC;k8<{7aMRUMzlK4K#Em5@`+iZ`l@ovU4g4AY;rPIJsnZ_ zeATm&_toQ`JHHByd*S{)BktuG`SZ9}6Dal7m7bd8Oz^V|9lQQ{X>s-SFENiyZ+wk=JLg9A zy1HXGzHRKPzVSU#*{npiwRmED%{KGQ_}cA`FXQVn5p(^IJwcP#*B_Xdx&G(jn3wAt zjwdoVG@jfwc|+5goXic)=L%nLXt{Ksxv}-itH~SNu7A(m*nUg?a-;kn#oVNLs2Q}W zqs)BQI1n^=*=&<2ndqt-x)HSDh6d#PxVQ-TZ;|dWy)&A6wL6vc1`}9?E^w30hznon!zG2>BINAZ$`p9{CfhJ1_`Yh9J2h61D!<{=?~4m zByW_4F0tppyU^Md-lem3HAosAK4p!uANqEk8oYBZ7%@oVgEp)|OVc0;Qw2SX48G$I zk2yFI7I1+ihz-BxG8y4y{xcD*&-vg2U&zWC5<=0$fXAX|hMtKS7z*H91q?GtsXj#) zlM-zcx&a}sV?+_T3PvN5JB&{wiq0^G%z>x8V2&HyP?7{I2!|WOq@32z$RZ?8@V{wk zRQa+88oa~43cJ6$>2doBnWrg#jNO341YOb)+KbR1^`wNL~Hf&4N z=VudH6+rzo6P&}*B$0N64-RqQlW-j)oZvj=i-@eb{CJ3wFYKOs3``Vcz?)c!!GatA za%rUC!FW24khbtSMAlNij0F546u|*tbEO{w3~6^Y0k?Ff5Z?PT{b8@7$FMlK0`X2} z0v3|wL3$LCMQ6x}SRR8%&;p0vsAwr6co3aNyz^t^6H%L^#YELzzL?0Zg4hBD1w!G_`rUnQK zM*yXB-@rXZ{lFO3Zhj*Xtm$7hw1meHzFCCq93!3(?()xqIKwAe8u$pAkUwO9@DSS| zw$ag95H(mdb`UbPvO5}pH2@cLj{z#I9ef!f-M}aU3*fLQ+Xlc%kMRwNSXVeT2#3RU z3QvZV5XbwIfJZcsh*t(`NwgUva`j73c$${0@Xse{fspA$c^L#n*j)g_l?l<6Lly#% zzaZDLHZcIm)!=ySA#Y%MxDCWH*Z~}1Jz(U5DzE^kTqbuYh2H<+mJof-{(Z1=0f@j7 z*q!57ARz=2aE?_7NJyLjcpl*|CbIe&X@u+!#IH!pUj&KT1(_bDNit$-hyAk&MR9?M z$e#(&8~pK?5rP+hThu+^Hm3wW6@|dktO@?$oxFfIge8wqSi@TRq5dqQ$PH|)Oo#@F zOH_d&hG;5)B)!Jy1MV#^p_wYiV6fZ~8dFwG=gOwj25m4t8{tXb6tXp27rPU_i= zT%y$u91ULw0Rc9G;g=x{LIH$EX3Q58g0LY&t-+5XH;8SlYQ>P3aPNo1LwhwaVj;GA zBp?x3z<~wRA&_&T1w_^uy(|zo&n6lXtbm;4@cl~&nSFXVAua{TWiCVeo$%)nViAB( zzVin@6r_W|WP+a1+IqQFL@oz3NMgqTvv2Ytyx+k(8Yl_TJ)9vTIJMFFgvZfn8ll(% z926D<^3t#-;ZPRB7E`%NV1}{hrACbGGFmQz3Ayeg4X5_q%H+8(&qU`5ni~pU1+qN2JQ~bU(u*gFtNs3IL$Vk@WL7w#2!(hmUQd$IVjwLm*$_mIA45RI z>Ow$d86YM_X>f($Q+oLjeOUxK8ZeZ2fdw*6xQ-Us39`XaHGQ>>o>rPy1&LkzYhexPVXjC@^AK zS~N*C_N2=QS#LCOR|U|Q_QJ0cuOS@o>ZLJA$@pJDcbNkip4#DS`7{{L5mK4D)FHyNrYc>r-F4ow9azg0NC?{r*_s<<_ zynuN&Kq}U4$oox&e1PTyC=o5ZDT*u^91hjK=qcP>bb((&6!BpNf%gz24bmYV?T`o% z{%oSJ1e}nJXK)BnW(5ezTE2jgp^7vJCLbc|mmW-HPXVv98vH?3hrxGIV}OfL|FVf> zQS?874u?uupdhMD^x`!LgJlc_m=3g$Uja+YY{6jKCb(Ld-b50W>p&Q?8X&-unDlZG zRUlt(iiQX66;MT-3@hi_1JGMG@goYkhnfJ?R1B3O;nH+~qj9;6W(FD} z3x#VdX#fdybHLaQj3`3t42uXDa6{i?(C*5u{@|5MIyvN@m=LUo8LGsqI}h+I)`+5I z0Eod3dSW%88Qe_E@rQj^feOe5(zERa+bsY6b79Fz!~c2WLW39R2{s^yKG94S!l8-F z(8vG%TlnJtZsNk0u(ptz8A4Bn%~6S`>Im;F>Hj=-Sys7K69tQhu`uaGX!#hde9ZVa zdzmPBova-s)JoAY3dBCTVY8Eb^ky!5;G;ixlhQU~{twRoX8~tq_H2sQ07V!|7`A_?IS`jJe+8EDOjOuf4SUQ&EZ;AXSsuxBq?Czt^6W2 z$GiD8&m5)FByYh(b+16DGLx-=&QI;N)m&akWzStIH?cJg-*8S2S6w9%Xt=!(cX{Sk z)wgG-`yLff!yMLR$1%q=%nN#_3ui}%tlk9bXvj5SkPD4|4a=?0lqC z1J;*xvjd$fTtkdqowR)aGvv8?HFqy#>|JR{P)Of9Z!@H%8%keEy6t1WV3h{K%x4W_ zsz3jlPROLZQ@cWfaEgw$R+RR8&8am`y3?l3&@G!59+CGxOv_a|ZTi8=AMdA&CeaS4 z%uo206u$7iCk%s5R8BUCJaH{_<_w&7z;0>%!!WzPcdx_3)~{Hp(3`!o!cBjU`@4vb zbBbmA^&PykPJVRW*ztb48oU3OzBsInl`WpBVH521ofVcnzhU;Os*w1=`e&jv?imBa z9Zny@^|p9~JzbdPWAll%cZ!YYf6b3B79G+a5&Pj#WKPn7kSM9QA}LC=)AzI5!+mQH zMV~!!VCKwoy2VE77p|41E?s)}r{S{87j3>Q8(kT8WO?A%{|uJCOyf7b{-QH^cHI%; zd{c3V@%@Xm*_sc0#96VArf@v1uzGh*^*R|t_nS(sh1+B0dl`OnjSTijnoUgz zjWgR2dpdIGh8Dhb?)ra|sZa-2&ZfJY1m+3)$%6gbzm{*E&Fra8okxc`m2~qM#mAgP zN-2N)1aqR~z?{@HOOA#zyHS)h{AWi+5@}QN&Qx_|BV+3+-Mzm08e`HPZgmtI)VR+m zXMdV~(?9%Svg?#S&y*QEBWhC2O-fYwljiqT1{yxKNZn@KEK^O5^6;y)H*I&@pXyk? zGT5j;xa$OG$vfg?utk}rpHcBwi`ukEf#TVcYq8au*oeW%}%547>Ai z!H)6iwF^ugAM*T6ot{=nm@enmrB4fe9bV_1I!AxoPSxD^4|jUl=z3;OZsg6>3vDZ} ziyexp^}GkOnagGLS}7pthrC?2OwAv^2w0a(|H=$)nfX%1pY@x_kMN37+k!>rjYp6|V&q ze~Vc)Zl)Zaj;)Y5nYE(d%kOHY?CC@PI`-OuX$tf%ZsWO^#vRDm_xRKW8Ewe@$GO9= zs5aET3AxSPpg?=~M=Kw5??wUVqh;%lwV>U{r0MOGexa4Amu_acHlcdPF0&OcT9MAw z=3`%vx1)#LyOj6n9#nYU)oK2kcC<3ceLVj{2Re2rzJDcKftq&WY!u##PF9>68Ew{v zl0N14hUK-P8r^D(?%Og{wZA_vf20ic+FfRN^~#a<;OdlWy(VOAmhRi4`V;jtEs5yB z2DIc#=8{y;Zq(2Dn)CdD0txi*zcwi8Le>+{9pvQ65z~kfy-L=MMg~lOTYp53CNE%x zk9Jj{7pL1-7I+V$T@QcpW>4=xp}*b+>}&jm8l*owPTXol8=ft`KcC-*<_voMZUbD- zWpLZg9Q)B9>*52eQ}pWaxqa6<<=nYs7klI#|gl00sP zw{0uZeL2^>ah@FQIXI0QFiU}^Z+LWGbf+Dy`8^|{RJa#jEr@mX zU3Yc69I2JgNP33l=6Wi903dRy3ioxHB^!^~lf$)pE1o zo6RWAJAd30I|Wjna>rObPJ!f~ua9_owgcUszv$eJ*)=GBV!TuE`*!57v8cGgw*^H6 zT08Fl)_}fT)J~{c(t(QBcq|(zRG_Gd`!Dz2XhVB;*XUe~`GH)^U%XzCSC3{$G`!C( zX+bYv-0k@2^b;j4jyO3g6nK_?asM2^WqQIGhr9i)=<{36Ss|x8P-^+HL6t>6k#Kt~ zHU4=sit@Q~<%hHjrS}S^4qMZYPV0P7wzVu9i7TvZP$b`5R*N#pY^ln3s z7M&A^PL(4^PlFR3O06hF-Ei9mSsU^^8P&R3qYb^W(jTZB)r^$ScxgVnszCamUiOBZ zYC}&RKaRY(umR2H;EsjQo6u>85v{53JCObJcauhk*CS=s#fO=pt;js3WlG6}7Ib%4 zRqkyi1*&`fH*~|~pJ;dZ`b({THR%1dWyRUb3iRXLO6xwr^~-Yo!!54uXqiPhX1H{q zk7;8!e9QWUGIoz%p8NJEqAUh_*2r4X8XFVBYp@-?`!)I9{oTE29S1-ich3EcKjP73lsL zNt;Sl3)0ES+`Qm+D@wb)&6lNUMIM*-zW#Eh6D@DC&oLd-jv{s)-f6Gjg3^|TY_SzJ zqv;{*Qr|Z={g*D$gch|2pI`5pi*H03(we^>k{(oIHUu18zoIR3eo_NE=tZY(8;~P< z_Tp&zR~a&PSvzoMMk6|Em1!4qz5{JB%_*oo--3KJ&dl!vUE6Ev{4j=+qqy`Hh9$o` zko0YiWPqVSZ^m=Bxa%oU(6;yU%f57=PAh{G{434K#-hyhk9!Al3i^{=X48lUOm-TL zE29aC%qx%H)pw%#-KaJ|sz4??dvB00y3x@SuBu_6vjuz=kG)eA$XYe-NJO&&C0x3` zAn{WtS{I}ucWC;BQgp577~X70yJbygnduE^CVRGU!`4#(3QQ2K3zI z&GAnrs~b^Jfg($z8R9CtxBvGk1^RhGz4kqyP;O+S&{fhV}GK+w^d6nXLg{xchd{AV-@Jwo6+lR zD%#QVupvwP(M0F>>6K#}(X&h3)$tR$(87o&ZH`6{s{iEM73H&E|{s`)+rlc^eM}U#U@`b(ed#j5^YZ8r!rbKEUtxBj>NFz;)76U+u50ooKlD zwf3?87IY_c_G0zaE_B3?OQf&)h2l4D4E+39j@G_p8b50PiM+bXkJ|$dYeV814pRy= z@4}naxm((iOI+ebm;82ACTsf=o?VUBRNwjO>D+=siSRpLZ_Cl*wBdhNaTVy%8HvVk zkpk(ez8#qUq7z*(nYn1}_eL~x+7cuGna!x?$yohuMxY@_Ki&r-MwCF_n7OUwy=gU#KwN>Q&pgMG(_*P-^ zx(lhUo@OZ@t3VaaDRpB+t!ViP#WiQc7Bot2<+xDW8np8G`jd%X^=Ryv60f3zUFa}r ze0(m{OAfgkkW#kT@xLS^gp5GVFZ*E7h*fZ9XHz@2Uaop#93;Mcv>1^x!b*So3ueY4p zi97|>r=EOkMB#n`Yp#bXP=ChchzS}UC~$gDbpN$Z)S7pRcGHh0+!yyes;sdFVRf^% zsehW$Y_qVCREJ+EuX(KP!K1CnVZ`ZSwRc;Q;liXoQ_zhsBU(78j1?&IR$s0C@*e1e zv>NI=+R@W6mFC63_gN>Cc0`|&qlK-L)vj2yA<-POt_skZ@RSjw7DIorux;m8=N)xO zm3B6wR@#8{mQ3o(4CzA-dQ)e%)XR~Z+`>$a*@3p6{MJ-q)`-?VtWzp@LID=ii154$*N@~l@V=lsR0d28j!D(hEVH@6X0iK{+t_H9KwDs@YKDAl1I zAMJGqTjl8f;6|drr2*X-fAU7e#db9A+!op?(;j4)_t{}n1oQ_JuM}z)H2$X_yw&xe zdS=#+is$HTKScwb+B9X&CXO6&Phb5l<#nOoZ7tH#k2;X!Mw5&iC*;WKZBO9^*I($N zzD)@IR6El88^ayzUW@!Rr@YK9Z$pOk(I=y?_M+U^qfQ-4m7`}Te6DvT^q`l9-RrK* z>_lbDhdZPA@2Ee#hw&Zqeg49tzwyR3OyL4TZ6t=Txrt1+=QrC^Y7iEwV=jD zgQxrcb|PKs!hWT{GDJ6CI(Mjk{4MBsGiiG#QXe+5CnZmTUg2dy9`$`Fa@x(CYa^6} zTdbEia8Ea(KS>EjR^$6o)$EG_eVLu;NY3PbgJ;!f#CMB$!WjChzptivE^9zTlO_u(4NiXU-?YKK~y&PTgzBKR7T;N&F z2AxZOEojY0D^kO#7o}O5oqP}Wncqw4ce<%Sx$c#Rt##Vap^+~eDl8$NHrxKW0Cj8g z%CyVpSIN<*0wZU&&vI1y$M>^aKp!f;FgJ1(yA9#qbff<4UUcho+z0;NX7qQT&F94q zZ75W|AS?PK^hKvnnxAa!Kq|X$FIH$bAv;gRSQ-lFiHB95SzFuCJ^OXR-6eQ1B)0e$>k=KJ|?MD6HPNRiuzhixc6K+#a6(}W5F zwp?q3KHGC(!7|m0^=QYkhCc-z&1h*(<`Z?u>!3$5)YnHpP^jrDm#>lrRA8pucDGB8 zqNnxwzcZvzqweuS>1zeL@G-4Hp8gZLRo`$F`N+}CrUxN0Jcw8M4#@|@4zy^;mj#xw zZHPJIuIl^q-RPa-3+fg0p^&Y+)c%wxkk)k#_2cwTWP8IxrRsMFI*rzr9h}*SR4bOo zUVhMqo<*)V`+BSm6{UO>tZ=D8Yi{{ZU;5}Z^5s^?zRw>-vQ=^LjN|8{Rh zPw0+Br>6o9_yvyPjPF2mGaNFEzr98NV`=S6&UGQ5ce=EyvR2epbl}Vr(2YsCuNT!$ z??eSf=>_Ybwxf1~q~r(t+R>OfdfG>JQRvRb`uMrPs~Nt-noeF+pme+TQQe#CP>z@L zn~FP)Xt}t3<AP?t8Xmc4*}Z1KNWJNgos5^p`X9U15;B`ySBzFLfO6(Nl%nP)yHazZEi zGCzM;w^)v*P9MDf81OB+FcB9#SD;S^XLW7(BZofc(I~n?fkvcSemg(s7nf+JRy=AL}%<{E0%uYW^W36sRD&bTM!AALJS5dTbe-ch@V~?%HkHfi^;R z)m(2u18uVo`Alm<`)(MJ$f8Z<7@F|Tu|9Hz0$KCOR3D{9= z+#367N)6h({*-sz!Dck4`^|%wu5IY)7xl!)h6=PlEPQW|Z3mi^{yE_MQaQR9%9J*} z=|H;O#mjcE6iCrJ`;hIpW^~oog12#A7xH>A_W3ioZw~h=tLsS%Dsx_t+9L;E(d->B zaaxcvantb7kq$JWxToXYqDJ%}jG>q6(t_g6sEU_yt*EfGKdchYu?g#ydR`aQp%2=h zw3f~IfzDSgyiz#16FvF+CGZiPQ>G0zTWxyYimtVlJ0AGkjO_ckj{87wY2_%rVXguN ze0slS4CGB={jW1O-F~CP8v2C+5XX7nk0kV|ccEeC(d9;gf6>j*;Ha}^3RIG?pn-d~ z6^-=s$gK0|L5@MkW!H+JZgd>``txoR@-*zucN&FqbUb;x|M~1LG+_sOr25%5^o5$*wvE$>T4Syr$yn5hZkj!_ zN&E%JP{r7dUc;R(10pF_I8I} zfb-PsR`!>XaujfG(?(OM8&l@LOi|@@A||;@*nAxN0kW-q(bEpp(Y>`|&Kw1D4YDu$ zdlYnaPBm5Zp#}X;FsNe}w4s0P9-%<&uro?taSceF9-}k^E0EsHV||N2kKau`9})=r zKfUaBz<&ZEoV_=pW#V8L;yAq@f1*l(!W?9q)L+Wc`|$Bo+*)hVo80YV@ln|SdG~`y zGvr9?#^LmRDKK}K8L;9^O%s~CfB{ ze@09_T9%p~c@FF{Q^ume(VZyqsBOCWx-PVDvTqoQSD;#h&t>PJ|DtKFSDTmIh6XFv z*S>z-hK_{ZTCrhRH=1j@wJbqfj{dOra{TpSYLK@Tewq?asb9 z1^Ukmnp?aK?dp)xHOT>Q(51WMc{^AOno!29>ek>_U}s+dG(?>!t@*S0n5s4uG-#JJ z=}#N_`RC-Mjc@*-z4~@$C3hN-tJUSfQ>DGqt{;fqWvg=<@d}~9ec}AD^-H{{f zNrBW@lEd`-#3EGXAL0>O_$a`o(XLG@%!H`LEu3bRz1+%DU+z z;oP{_al9U!qt1qg9C>)|2kP0EzQnh;3B~2QCFcO&b6;K2w(y7kd7j}dOD3FKFFjyX zOQ6nux?%ckMjiUKYOP^Ps|-aaZ}>aTupPZ|+Z%KJW<65#<*pevMvhun7Hug5y`7@z zads}$gL(Hi&0tKcN6eYmj^A_ZLR;^Zm*qINp^~x>Tb7zCkVlVz6AkeSJ+zkWS=Eis z`xv#pTm*glYwLx}y&I9=x88tj33B9hZ+2Oxr~}PlF4?;7!Y_39x~0N26y}kAp_Lry z6YAvW6CTZkxrN!uz1@3SP}qXJEq0&V(FJ#lI??)ObmWtsHb1!)`O+1q><=l>giEX6 zIv;39rzct;-B8|&-q?rdjbq4>lHqCZg`hXaS>v7;WVNA=(U)?(!+xSCP61t;4nf{K z*&p3d+=ULzilLE^ug5tzj4JLx{mb6@Vu#@`bkfv>>LC@V6v|!H%3dTQoMvA4X+?Fj zmN`uw)`1L@OqKiR%F)DP@%~QhR)q7zAIF1!to-cXJPGE;dDlhP*21}A-I|@6zQD5y zz8?=|9%@Fuoix`iXIjuBmFep9wl^S`$T!5Q(Q-7{thIbXY8#p;y_ygUa|v`cQox5f z&7wJ9eUYwrQR*7Y(xlWIp!O53CkFFR1r?99>XA#G^ZP{9NHg-=J`JhV-Y z_ASz*{W#o?ZjGzHJ)*V=m9pn}zL{8$HX82Dnqbt3Wc4GRu5|XGS<*c-j}|wfss9E% zFjv@f#Vqye0Me=Su2O&wCEqSy`z7TkYJ7U;$0zYyw5;oN{_b5Ji0ACv|7=MAsg}?| ztp;?G9jrTlT@%u_Kb0ef^ZVG%5pL`?ohaZ>@{NmW?a032yRf?v_&uyu)@lO@~vCU6Zg|FoAg8802^DMms&P#hNoiEhx zg!80H7yk1a`Xeo2bRO_pRPDsE{} zA%>!j#8Vab-1#DJyOJ0h_eMxWP!f?w5*dn2S1C0mRBNbtsM0ou=9ykSK}%I9v?Zn@ zywVg&L?Xp2o&LYIPjb)R(f9qo|M$J`eScr?xo7RQe|zn{_S$Q$z3(~cE%r$KwGU>b zR?1SQ2&ov%U z&4z>}mj01-k2S2=e5~@x8kT=E<@+sdtJHI~(f8OVdrq$@DZ0(R%0k4cDr1Lk1oe8f zO%-eMM``4jf3IRGL(H*nmshh!d$t|;6!ZL9?+-pZu(^tTu1xjH9b3Zg*OYz|k9}VA zm!>`_#D026(8aySu#b3Q=IKe(7Tjg~CmtSPoKwkIW<;-1=i%o|uC?fHy~Qf~P3_X* zTq*NOT5xsBfS=e@XS)SaIByzk#SYc~XZEd+W&QOLIRAaL@unkBhleptm$FWEom9`*qvv&U}8D!9+`l#bEc(NAmN{EvD*SI(+?-V3ie^}V{U z^t;3E&WyFs8B@W|m;G@r5c`IqWqocn?qAKOzU^*OiSwZe$BJ4-FT=U+{Hhc z6<)e_czHE@r#z@~3HB4aGaRoiICPt3Pss5K9a+t`_A1>xwz!HNXuERqu;0trS8bQI zUv5LYI~Sh4u(*u%fArDucSqH*XMX?Uo9E}=V(HCS#&_kcee8*q3z6UL{7nCP#J%h6>>u{yE85&- zt4oawFKxfVrj0+)xd-sq)B8Q+{RGZ;-Wagd{`5`ubl8H1i}LY1N|Odx+Z5NZE>8_t zl6)#y{?Oz87v`3+)7@{s*$3x=wsT|m%(!)%9S>=gqPtzeF8`2e?A^bT?G7zT3B{g|;M^xWpb;n)#THEd$Dkmp$=%x8Y%v9CUUhk0-5 z{cU03ZT8A@@#h)`*D%}b%P%Ap|G=`(mu|e4SIJs_a`F3<&2XO3{grJQ@MBj-EkD?P zTO|wpaNpAH$d~W0o0pfD_apN??>iw0=U?i1Kq;f2K}Kw@WNY91tY@QC*zvl@SM1L| z#d=Qhi@6+)ebeRZ>DlR3?8Kzrqwj3F&8A-&FqjpWGrx&(!;-%IiJgmaZ0+5xoZ-)h zOhlN?2{aPT83ix#>OMmVb z>y(yUc;s*ed-<8Nvah`!un*du$!z^~6?=Pj^{j3aOW8sFw5&eg)-c-cxLe(1)h8z% zy=rdQx6SnOcE;hzGgh@e(xd4Ec5|igyq@qgof?Jza`JDLZ0Duoaf5#=V`tVYTc-E9 z&PMrprRJC79J|l**49JSZ1l)x`BULX5BW6d&>O!$eC_?deO<+E)+)@EcIb_2wz%2o z4)J3v@H>yso+YjBGr#FGpZb{HWL~o?d!-<6NNTg8?Y6_%kG^zc&9dLlvxsNx*$=Rf zJM#YTOV1ms*xokZWUlX9&VD~#@LQYtH`%vo2}vWdesij$XXR$s)b2}ev3{FU|GMbl zO}2i)&HK^kD%o9I%c<$@@3CIp_jT)Yu!?;#Y*XO#ORx{4pKV@*eh>P7yt-utD|DZX zDHx3N<3D?SsqXO7w{hL^8vQE#a4CM7{XaiUqsurkvr#8{n8t6>kk^`Y-LU92UH2U) zUVFUfq2{xi8cz(F)y(?+qFGP$uBPW`yjsj|9^5=)c8dX>7td}P=6Qr>*pP@hKGCs@ z=d>P`adJ+Zu`^oC^^IQ?F}H2vhQ)K+rR+F4w|)Ad7V|nxJ`pkR>FnpZLZ(7b2nbzYM9>;ippp5L2&TjqCJI%H_R|BBcp`2lM(iu1d^J)`CPzie7G zbiVqe%(flH^Sf_9)N+CMuH*f}dg#709KkzG8;=DZxN{W`$vo}i)$Sah%5#GPMR+NXYaKI>xhy+6KxG3~(JYZucGHSAiFal|LFR+>vc$`|H{L6_?9RRU;q}W? zt3T`d&sXl8N&M%upMNa)=k#Ci7R>Pe{kKN3GrXS|7(4T^j(M@K=>oruo%O_D8^_IV z=^7X}r*(Oj^fuFc+q3!pt8<>6-)-yOXBYI?_w%!_D@Sd93(XgD{NAva@AZ2#=)uo^ ziyS(8m&Kv&a=R=U^juMwrOsaWySx?gg57^v)W}@_ z-^??^$1cn~J2L(2%yVO2ZI<=jy1XFE`GhsoEEkftePp?)`+McRiy8CU{hRlio*J|F z$~?dId#^4qUf%n|o53B6N|z2BQ}pAC`1M8C)=a)!bp7pF9rxYXv~60`DAIo5RtqFf2wQXz@NOvVJxcw2U5jeY3{pSk^y4E|pExhQ>^ z;iqone4Dsi4k$kD-QnMj=1+9y-FCFu z%lEO#@e>kK<9r%>`RM$;wKvREQh5AHEl{V>%cm*o@=o}B4ekJ4;dQjO9oi25@4T+| z-+M&szhuAv@yTsn{ea#O3~1bX<0{gr2rZelJ zBK!Biw^7j<_*KzaZHi8@nRSX4wR8rv&S=w_OgfWUXR+$6W}O{qJ*pV-Z84x4ssdtA zO!(OF0SG9ASr1_-v*6xh(HU&GN3%w~NoQ2>jrNTO13sXQ_=Xsx8D$m)A3LO?3|%xq zmdRv=e6!AEHR5B}nQWlZ53|XrGn*Bi*@AD#Fk5kNH{)a1S&T-V#bnS~%=kuo7OP%o zvDtN2y-{a1V3^=rjV6o;_rO~*I-B00vnh(sX3*nf!pEkw8F6ny8D!Z^7JN|8WY^iy zuFVQQ`f0O+Z%2K|wJZ4ZDrn!1G1*OeogMwQn^BHFD0)39Pj4d)LT!5JO3_1EiXIxq zg&3q$JvyZ5?RuS}&@?CpkP2}XVklNr;d)n~424gYVnfRcM?e9JK@Wz3WEc!+z<~M1 z1u_iiHZEw&fB`EsE4a{|ofsH^V$=ghQ-cf2j7B19wv9&Onu&o)Rg8o+(TGgYi(-Nz z6%%#MWJh~uvJo?J&9Dl^43#QmW{R1HN;9Qcpl+BD8nsZ%7HCVcSb?=brx+yaSxK&y zMs9`96`CkqAlwSoD^?p2R!l1NKnOBTnnQ&qPO(wPZN#->P8BM_~45Q(Wa=rMl=nh#uH!Fnsd5Cg)YW&&AcVn@Q_?u_P9B2Xltz%@)D3|g zmqx&Y353uz`816q&K!9;4~N{HMvz(7$sq@*5m;Xu0goUMf>3gj9u9d+jeyx|1bml9 zVBu&4mY0Vh*Qyclff^yoA+M`(#PQJb);O9~!_#6_dpP8xH3E)BAcUsL31}R+agCrQ zUZ*#5>;fSukzBpN5k^YRU*o_PX@qDY6c;oO0)#*atx?GEa42?YglMo7O*D>ZRTN(| zjy73vpcHXxS)B58l_+fqamv#OQQ9&lO7lFNl3LN+2l2EiBnkmZlg_=Ps&?=ws0lpy zc!82yp?D|oG!I3sAOZ?J&0P?We7Yu2?u)i2P?N%?R+HkUKp}t%6lF@DHYubElx7vw zCW5TM)2x|zlniODXiJ}X2*d&p5m}=sGz*mG=zybOE$|TSL`u{I#c_d$fG$v)U#2#t zep0O0YEsY_C~X}BM`?h-Lq4EUlo*JV*b1cz0#94oXp`rt9-UDZA=K2|9cofuq46le z5GZNvlyV3>PVB0P7EIEkjJLbM%iPoJ2XnuY@O%*(f_W9ZQE z=w9lHf$!dKJ?WnbFB&==5%|!*Fo(nB=$Gi|?{FkKf)jaJzkU)wn+iic{DF>emBy8~ z!uf+7j%){T_~wn^+u;bI3qc+Ia_MdeWH~}Z!<^aaxvu0v23KTEm@_iP7#@;n3Qmp+ z8$NvKfWeW8VXjDLPH^&|+V9XXmpc)3sL2(Pp5w|+&k2tJJ|f*6?nu|W=o67{K)Dg$ zW>;c*jxz)@BAo_c3{KE4cNA!9V^9w2=i)xhnH*{cP3`2mA|0S(^q>u#PDa0y6Nx67 zZu}*!FsD0I2~KvBe76&QbGiqC?{uSmx6_UG+)+l*1}Eyfl2bq@=7Kg+AE`gCkO=T2 z6tt&zxt*ZnAwR-|_VnSc<$M3xmt;0G$Yad^Y$-j14+-f*QRRk92~cqDOmX z7y6Ge5TE*s`Ek(PWIIEkw@610bV73mKFVMRD5H5rSp@nNnhjf_G6Q6zEF~LdInKy< zl!fNY52vU}sd1><0R9 z5Z2%%dyI_Xa$U(0q(9gUwHISj*9X}$^v3%auZPV-Z#3VbMpdV<8*0xTPddPQCH=*l zXdKWl$uN=)qd(cyo@z^>Caei6r!||+$Cm6csaWdYAhIvigDqjb zla25)TDOsoT#^eKdV)@~)0N;<^MrBH!>MniXYv8$uSmyXTwkjH^3c$O$X9)eR-gQo z$S2#DX&FyzROFLi6KV2s8qL=PjV*?37=0#NB;V&54<85mEn&D%lrY>s*69Ek)mM7P zO*-d(Q;?(jP!S`KoyMX1R;_QfezsQEB&XKr*6Np!o#tMY!+o(RN1hY%$z=B;hWl$# z2i$jyI#AcBgrRjHVaUJB7_u1&Lw;YvPz;bTJRV3G@|_Zf;)ICdF+-H2#t#w0V~K>J zxFTaH#z+{7Hxh=gTcK}i91<}+CW#mxpCmavR!JE0KN5ywn2e!#A!2xJ6EQr_Nf;jU zBn-t98AHBA91o9+B8JCE5yRu9gyFGM!cZKQFcec|4EZt%Bd_DyxGQ4R7%XCVJQn5f z*eqixPD>by*%F50w~V1!E@3FH*TWzl2zpcFy@Vm(C}Zla1vMv-Fq9ui7|Io7IrZ&; z=MW->=Mxf!=N6(Io@YoH$~j~VdB)VNW!W4kc8v8k&NScl8jU1XPvL5@$vkr9*%OU zdN|6fWOJQ+thT0I=)e=?5ef-;WsLK(+%L(j(Bb@(F$ zhogL5#_`-;mPh${Jsi)O#dW0S_Y#ih`Vvmf`z3km9ze!DBv0KNNH}$$AmR9)L6*n& z4>FGLC1f1!E9&8-T$lWH?S4a;zuLV=UH(gJzjhxY=%sE?BH{S{M8@&Gij3p?7754q zFfxwsXJj1h+r;tJ?sH^Z{rRig|A_L`y^w@c_eG+-9DcU(r+XyVW zLCF|TYm{QWip4rcPC|P`!PdefGBRU^nJ2^;hh~g3n9xQP<|4|CI`|%>aDU751uEw> z_Hl!-w~5L@ogB<7mDlyx6r9@M9d8&;pA?}Sd&mrTOmxn~5ypPGanY`c0#6A}dtt)( z0TZ)?GJSB`(5&&E{?M5OozL+7EA6Xa zGoIo%#zALb>Y-V1I^B(=O9($iqeC)B!VsL#(Q#Tt6fYb|g`T52oT3pqkoS%5hz5?T z2B*-`9xuuvjw6OHyguDH=zP!Nh(}-JqHVYf9#&gF*x?GnEtQ3a@J{rrJMSX|{iCk- zlk|s92Yc`kg2r*)7vlibNr>7`(QQA6+8=%>DD;P)50d?iq)#-BA;uBxa15yJI%r-6 z-$6lMzMV4SCcT`QFr`O)Mn;dgNt4pEJ7*v?}KuOaxK GasLlzf_1b2 diff --git a/assesspy/formulas.py b/assesspy/formulas.py deleted file mode 100644 index 3989499..0000000 --- a/assesspy/formulas.py +++ /dev/null @@ -1,299 +0,0 @@ -# Import necessary libraries -import numpy as np -import pandas as pd -import statsmodels.api as sm - -from .utils import check_inputs - - -# COD, PRD, PRB, KI, MKI functions -def cod(ratio): - """ - COD is the average absolute percent deviation from the - median ratio. It is a measure of horizontal equity in assessment. - Horizontal equity means properties with a similar fair market value - should be similarly assessed. - - Lower COD indicates higher uniformity/horizontal equity in assessment. - The IAAO sets uniformity standards that define generally accepted ranges - for COD depending on property class. See `IAAO Standard on Ratio Studies`_ - Section 9.1, Table 1.3 for a full list of standard COD ranges. - - .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf - - .. note:: - The IAAO recommends trimming outlier ratios before calculating COD, - as it is extremely sensitive to large outliers. The typical method used is - dropping values beyond 3 * IQR (inner-quartile range). See - `IAAO Standard on Ratio Studies`_ Appendix B.1. - - :param ratio: - A numeric vector of ratios centered around 1, where the - numerator of the ratio is the estimated fair market value and the - denominator is the actual sale price. - :type ratio: numeric - - :return: A numeric vector containing the COD of ``ratios``. - :rtype: float - - :Example: - - .. code-block:: python - - # Calculate COD: - import assesspy as ap - - ap.cod(ap.ratios_sample().ratio) - """ - check_inputs(ratio) - - ratio = np.array(ratio) - - n = ratio.size - median_ratio = np.median(ratio) - cod = 100 / median_ratio * (sum(abs(ratio - median_ratio)) / n) - - return cod - - -def prd(assessed, sale_price): - """ - PRD is the mean ratio divided by the mean ratio weighted by sale - price. It is a measure of vertical equity in assessment. Vertical equity - means that properties at different levels of the income distribution - should be similarly assessed. - - PRD centers slightly above 1 and has a generally accepted value of between - 0.98 and 1.03, as defined in the `IAAO Standard on Ratio Studies`_ - Section 9.2.7. Higher PRD values indicate regressivity in assessment. - - .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf - - .. note:: - The IAAO recommends trimming outlier ratios before calculating PRD, - as it is extremely sensitive to large outliers. PRD is being deprecated in - favor of PRB, which is less sensitive to outliers and easier to interpret. - - :param assessed: - A numeric vector of assessed values. Must be the same length as ``sale_price``. - :param sale_price: - A numeric vector of sale prices. Must be the same length - as ``assessed``. - :type assessed: numeric - :type sale_price: numeric - - :return: A numeric vector containing the PRD of the input vectors. - :rtype: float - - :Example: - - .. code-block:: python - - # Calculate PRD: - import assesspy as ap - - ap.prd(ap.ratios_sample().assessed, ap.ratios_sample().sale_price) - """ - - assessed = np.array(assessed) - sale_price = np.array(sale_price) - check_inputs(assessed, sale_price) - - ratio = assessed / sale_price - prd = ratio.mean() / np.average(a=ratio, weights=sale_price) - - return prd - - -def prb(assessed, sale_price, round=None): - r""" - PRB is an index of vertical equity that quantifies the - relationship betweem ratios and assessed values as a percentage. In - concrete terms, a PRB of 0.02 indicates that, on average, ratios increase - by 2\% whenever assessed values increase by 100 percent. - - PRB is centered around 0 and has a generally accepted value of between - -0.05 and 0.05, as defined in the `IAAO Standard on Ratio Studies`_ - Section 9.2.7. Higher PRB values indicate progressivity in assessment, - while negative values indicate regressivity. - - .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf - - .. note: PRB is significantly less sensitive to outliers than PRD or COD. - - :param assessed: - A numeric vector of assessed values. Must be the same - length as ``sale_price``. - :param sale_price: - A numeric vector of sale prices. Must be the same length - as ``assessed``. - :param round: - Indicate desired rounding for output. - :type assessed: numeric - :type sale_price: numeric - :type round: int - - :return: A numeric vector containing the PRB of the input vectors. - :rtype: float - - :Example: - - .. code-block:: python - - # Calculate PRB: - import assesspy as ap - - ap.prb(ap.ratios_sample().assessed, ap.ratios_sample().sale_price) - """ - - assessed = np.array(assessed) - sale_price = np.array(sale_price) - check_inputs(assessed, sale_price) - - ratio = assessed / sale_price - median_ratio = np.median(ratio) - - lhs = (ratio - median_ratio) / median_ratio - rhs = np.log(((assessed / median_ratio) + sale_price) / 2) / np.log(2) - - lhs = np.array(lhs) - rhs = np.array(rhs) - - prb_model = sm.OLS(lhs, rhs).fit() - - prb_val = float(prb_model.params) - prb_ci = prb_model.conf_int(alpha=0.05)[0].tolist() - - if round is not None: - out = { - "prb": np.round(prb_val, round), - "95% ci": np.round(prb_ci, round), - } - - else: - out = {"prb": prb_val, "95% ci": prb_ci} - - return out - - -# Calculate the Gini cofficients needed for KI and MKI -def calculate_gini(assessed, sale_price): - df = pd.DataFrame({"av": assessed, "sp": sale_price}) - df = df.sort_values(by="sp", kind="mergesort") # for stable sort results - assessed_price = df["av"].values - sale_price = df["sp"].values - n = len(assessed_price) - - sale_sum = np.sum(sale_price * np.arange(1, n + 1)) - g_sale = 2 * sale_sum / np.sum(sale_price) - (n + 1) - gini_sale = g_sale / n - - assessed_sum = np.sum(assessed_price * np.arange(1, n + 1)) - g_assessed = 2 * assessed_sum / np.sum(assessed_price) - (n + 1) - gini_assessed = g_assessed / n - - return float(gini_assessed), float(gini_sale) - - -def mki(assessed, sale_price): - r""" - The Modified Kakwani Index (mki) is a GINI-based measures - to test for vertical equity. It first orders properties by sale price - (ascending), then calculates the Gini coefficient for sale values - and assessed values (while remaining ordered by sale price). The - Modified Kakwani Index is the the ratio of Gini of Assessed / Gini of Sale. - - For the Modified Kakwani Index: - - MKI < 1 is regressive - MKI = 1 is vertical equity - MKI > 1 is progressive - - .. Quintos, C. (2020). A Gini measure for vertical equity in property - assessments. https://researchexchange.iaao.org/jptaa/vol17/iss2/2 - - .. Quintos, C. (2021). A Gini decomposition of the sources of inequality in - property assessments. https://researchexchange.iaao.org/jptaa/vol18/iss2/6 - - :param assessed: - A numeric vector of assessed values. Must be the same - length as ``sale_price``. - :param sale_price: - A numeric vector of sale prices. Must be the same length - as ``assessed``. - :type assessed: numeric - :type sale_price: numeric - :return: A numeric vector MKI of the input vectors. - :rtype: float - - :Example: - - .. code-block:: python - - # Calculate MKI: - import assesspy as ap - - mki(ap.ratios_sample().assessed, ap.ratios_sample().sale_price) - """ - - check_inputs(assessed, sale_price) - gini_assessed, gini_sale = calculate_gini(assessed, sale_price) - MKI = gini_assessed / gini_sale - return float(MKI) - - -def ki(assessed, sale_price): - r""" - The Kakwani Index (ki) is a GINI-based measure to test for vertical equity. - It first orders properties by sale price (ascending), then calculates the Gini - coefficient for sale values and assessed values (while remaining ordered by sale price). - The Kakwani Index is the difference between Gini of Assessed - Gini of Sale. - - For the Kakwani Index: - - KI < 0 is regressive - KI = 0 is vertical equity - KI > 0 is progressive - - :param assessed: - A numeric vector of assessed values. Must be the same - length as ``sale_price``. - :param sale_price: - A numeric vector of sale prices. Must be the same length - as ``assessed``. - :type assessed: numeric - :type sale_price: numeric - :return: A numeric vector KI of the input vectors. - :rtype: float - - :Example: - - .. code-block:: python - - # Calculate KI: - import assesspy as ap - - ki(ap.ratios_sample().assessed, ap.ratios_sample().sale_price) - """ - - check_inputs(assessed, sale_price) - gini_assessed, gini_sale = calculate_gini(assessed, sale_price) - KI = gini_assessed - gini_sale - return float(KI) - - -# Functions to determine whether IAAO/Quintos fairness criteria has been met -def cod_met(x): - return 5 <= x <= 15 - - -def prd_met(x): - return 0.98 <= x <= 1.03 - - -def prb_met(x): - return -0.05 <= x <= 0.05 - - -def mki_met(x): - return 0.95 <= x <= 1.05 diff --git a/assesspy/load_data.py b/assesspy/load_data.py index 24e00af..dcca6c7 100644 --- a/assesspy/load_data.py +++ b/assesspy/load_data.py @@ -1,29 +1,52 @@ -# Import necessary libraries +from importlib.resources import as_file, files + import pandas as pd -import pkg_resources -# Load pre-made ratios sample data. -def ratios_sample(): +def ccao_sample() -> pd.DataFrame: """ - This sample was take from Evanston and New Trier in 2019. Ratios are - calculated using assessor certified (post-appeal) fair market values. + Sample of sales and estimated market values taken from Evanston and New + Trier in 2019. Estimates are Assessor certified (post-appeal) fair market + values. :return: - A data frame with 979 observation and 4 variables: + A Pandas DataFrame with 979 observation and 3 variables: - ======================== ======================================================= - **assessed** (`float`) The fair market assessed value predicted by CCAO - assessment models, including any successful appeals - **sale_price** (`float`) The recorded sale price of this property - **ratio** (`float`) Sales ratio representing fair market value / sale price - **town** (`object`) Township name the property is in - ======================== ======================================================= + ============================== ============================================ + **estimate** (`float`) Fair market value predicted by CCAO + assessment models, after any successful appeals + **sale_price** (`float`) Recorded sale price of this property + **township_name** (`object`) Name of the township containing the property + ============================== ============================================ - :rtype: DataFrame + :rtype: pd.DataFrame """ + source = files("assesspy").joinpath("data/ccao_sample.parquet") + with as_file(source) as file: + return pd.read_parquet(file) + + +def quintos_sample() -> pd.DataFrame: + """ + Sample of sales and estimated market values provided by Quintos in the + following MKI papers: + + .. Quintos, C. (2020). A Gini measure for vertical equity in property + assessments. https://researchexchange.iaao.org/jptaa/vol17/iss2/2 - stream = pkg_resources.resource_stream( - __name__, "data/ratios_sample.parquet" - ) - return pd.read_parquet(stream) + .. Quintos, C. (2021). A Gini decomposition of the sources of inequality in + property assessments. https://researchexchange.iaao.org/jptaa/vol18/iss2/6 + + :return: + A Pandas DataFrame with 30 observation and 2 variables: + + ======================== ===================================================== + **estimate** (`float`) Assessed fair market value + **sale_price** (`float`) Recorded sale price of this property + ======================== ===================================================== + + :rtype: pd.DataFrame + """ + source = files("assesspy").joinpath("data/quintos_sample.csv") + with as_file(source) as file: + return pd.read_csv(file) diff --git a/assesspy/metrics.py b/assesspy/metrics.py new file mode 100644 index 0000000..b259bc3 --- /dev/null +++ b/assesspy/metrics.py @@ -0,0 +1,382 @@ +import math +from typing import Union + +import pandas as pd +import statsmodels.api as sm + +from .utils import check_inputs + + +def cod( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> float: + """ + COD is the average absolute percent deviation from the median ratio. + It is a measure of horizontal equity in assessment. Horizontal equity means + properties with a similar fair market value should be similarly assessed. + + Lower COD indicates higher uniformity/horizontal equity in assessment. + The IAAO sets uniformity standards that define generally accepted ranges + for COD depending on property class. See `IAAO Standard on Ratio Studies`_ + Section 9.1, Table 1.3 for a full list of standard COD ranges. + + .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf + + .. note:: + The IAAO recommends trimming outlier ratios before calculating COD, + as it is extremely sensitive to large outliers. The typical method used is + dropping values beyond 3 * IQR (inner-quartile range). See + `IAAO Standard on Ratio Studies`_ Appendix B.1. + + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values + + :return: A single float value containing the COD of the inputs. + :rtype: float + + :Example: + + .. code-block:: python + + # Calculate COD: + import assesspy as ap + + ap.cod(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) + """ + check_inputs(estimate, sale_price) + estimate = pd.Series(estimate, dtype=float) + sale_price = pd.Series(sale_price, dtype=float) + ratio: pd.Series = estimate / sale_price + + n: int = ratio.size + median_ratio: float = ratio.median() + ratio_minus_med: pd.Series = ratio - median_ratio + abs_diff_sum: float = ratio_minus_med.abs().sum() + cod = float(100 / median_ratio * (abs_diff_sum / n)) + + return cod + + +def prd( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> float: + """ + PRD is the mean ratio divided by the mean ratio weighted by sale + price. It is a measure of vertical equity in assessment. Vertical equity + means that properties at different levels of the income distribution + should be similarly assessed. + + PRD centers slightly above 1 and has a generally accepted value of between + 0.98 and 1.03, as defined in the `IAAO Standard on Ratio Studies`_ + Section 9.2.7. Higher PRD values indicate regressivity in assessment. + + .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf + + .. note:: + The IAAO recommends trimming outlier ratios before calculating PRD, + as it is extremely sensitive to large outliers. PRD is being deprecated in + favor of PRB and MKI, which are less sensitive to outliers and easier + to interpret. + + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values + + :return: A single float value containing the PRD of the inputs. + :rtype: float + + :Example: + + .. code-block:: python + + # Calculate PRD: + import assesspy as ap + + ap.prd(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) + """ + check_inputs(estimate, sale_price) + estimate = pd.Series(estimate, dtype=float) + sale_price = pd.Series(sale_price, dtype=float) + ratio: pd.Series = estimate / sale_price + + prd = float(ratio.mean() / (ratio * sale_price / sale_price.sum()).sum()) + + return prd + + +def _calculate_prb( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> sm.regression.linear_model.RegressionResultsWrapper: + check_inputs(estimate, sale_price) + estimate = pd.Series(estimate, dtype=float) + sale_price = pd.Series(sale_price, dtype=float) + ratio: pd.Series = estimate / sale_price + median_ratio: float = ratio.median() + + lhs: pd.Series = (ratio - median_ratio) / median_ratio + rhs: pd.Series = ((estimate / median_ratio) + sale_price).apply( + lambda x: math.log2(x / 2) + ) + + prb_model = sm.OLS(lhs.to_numpy(), rhs.to_numpy()).fit() + + return prb_model + + +def prb( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> float: + r""" + PRB is an index of vertical equity that quantifies the + relationship between ratios and estimated values as a percentage. In + concrete terms, a PRB of 0.02 indicates that, on average, ratios increase + by 2\% whenever the estimated values increase by 100 percent. + + PRB is centered around 0 and has a generally accepted value of between + -0.05 and 0.05, as defined in the `IAAO Standard on Ratio Studies`_ + Section 9.2.7. Higher PRB values indicate progressivity in assessment, + while negative values indicate regressivity. + + .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf + + .. note: PRB is significantly less sensitive to outliers than PRD or COD. + + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values + + :return: A single float value containing the PRB of the inputs. + :rtype: float + + :Example: + + .. code-block:: python + + # Calculate PRB: + import assesspy as ap + + ap.prb(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) + """ + prb_model = _calculate_prb(estimate, sale_price) + prb = float(prb_model.params[0]) + + return prb + + +def _calculate_gini( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> tuple[float, float]: + """ + Helper function to calculate the Gini coefficients of sales and estimated + values. Note that the estimated value Gini is based on the sale price order. + """ + check_inputs(estimate, sale_price) + + estimate = ( + pd.Series(estimate, dtype=float) + .rename("estimate") + .reset_index(drop=True) + ) + sale_price = ( + pd.Series(sale_price, dtype=float) + .rename("sale_price") + .reset_index(drop=True) + ) + df = pd.concat([estimate, sale_price], axis=1) + # Mergesort is required for stable sort results + df.sort_values(by="sale_price", kind="mergesort", inplace=True) + df.reset_index(drop=True, inplace=True) + a_sorted, sp_sorted = df["estimate"], df["sale_price"] + n: int = a_sorted.size + + assessed_sum: float = sum(a_sorted[i] * (i + 1) for i in range(n)) + g_assessed: float = 2 * assessed_sum / a_sorted.sum() - (n + 1) + gini_assessed: float = g_assessed / float(n) + + sale_price_sum: float = sum(sp_sorted[i] * (i + 1) for i in range(n)) + g_sale_price: float = 2 * sale_price_sum / sp_sorted.sum() - (n + 1) + gini_sale_price: float = g_sale_price / float(n) + + return gini_assessed, gini_sale_price + + +def mki( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> float: + r""" + The Modified Kakwani Index (MKI) is a Gini-based measure to test for + vertical equity in assessment. It first orders properties by sale price + (ascending), then calculates the Gini coefficient for sale values + and estimated values (while remaining ordered by sale price). The + Modified Kakwani Index is the ratio between the coefficients: + $Gini of Estimated Values / Gini of Sale Prices$. + + For the Modified Kakwani Index: + + MKI < 1 is regressive + MKI = 1 is vertical equity + MKI > 1 is progressive + + .. Quintos, C. (2020). A Gini measure for vertical equity in property + assessments. https://researchexchange.iaao.org/jptaa/vol17/iss2/2 + + .. Quintos, C. (2021). A Gini decomposition of the sources of inequality in + property assessments. https://researchexchange.iaao.org/jptaa/vol18/iss2/6 + + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values + + :return: A single float value containing the MKI of the inputs. + :rtype: float + + :Example: + + .. code-block:: python + + # Calculate MKI: + import assesspy as ap + + ap.mki(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) + """ + check_inputs(estimate, sale_price) + estimate = pd.Series(estimate, dtype=float) + sale_price = pd.Series(sale_price, dtype=float) + + gini_assessed, gini_sale_price = _calculate_gini(estimate, sale_price) + mki = float(gini_assessed / gini_sale_price) + + return mki + + +def ki( + estimate: Union[list[int], list[float], pd.Series], + sale_price: Union[list[int], list[float], pd.Series], +) -> float: + r""" + The Kakwani Index (KI) is a Gini-based measure to test for + vertical equity in assessment. It first orders properties by sale price + (ascending), then calculates the Gini coefficient for sale values and + estimated values (while remaining ordered by sale price). The Kakwani Index + is the difference between the coefficients: + $Gini of Estimated Values - Gini of Sale Prices$. + + For the Kakwani Index: + + KI < 0 is regressive + KI = 0 is vertical equity + KI > 0 is progressive + + :param estimate: + A list or ``pd.Series`` of estimated values. + Must be the same length as ``sale_price``. + :param sale_price: + A list or ``pd.Series`` of sale prices. + Must be the same length as ``estimate``. + :type estimate: Array-like numeric values + :type sale_price: Array-like numeric values + + :return: A single float value containing the PRB of the inputs. + :rtype: float + + :Example: + + .. code-block:: python + + # Calculate KI: + import assesspy as ap + + ap.ki(ap.ccao_sample().estimate, ap.ccao_sample().sale_price) + """ + + check_inputs(estimate, sale_price) + estimate = pd.Series(estimate, dtype=float) + sale_price = pd.Series(sale_price, dtype=float) + + gini_assessed, gini_sale_price = _calculate_gini(estimate, sale_price) + ki = float(gini_assessed - gini_sale_price) + + return ki + + +# Functions to determine whether IAAO/Quintos fairness criteria is met +def cod_met(x: float) -> bool: + """ + Check whether COD meets IAAO standards (between 5 and 15, inclusive). + + :param x: A single float value containing the COD. + :type x: float + + :return: A boolean value indicating whether the COD meets IAAO standards. + :rtype: bool + """ + return 5 < x <= 15 + + +def prd_met(x: float) -> bool: + """ + Check whether PRD meets IAAO standards (between 0.98 and 1.03, inclusive). + + :param x: A single float value containing the PRD. + :type x: float + + :return: A boolean value indicating whether the PRD meets IAAO standards. + :rtype: bool + """ + return 0.98 < x <= 1.03 + + +def prb_met(x: float) -> bool: + """ + Check whether PRB meets IAAO standards (between -0.05 and 0.05, inclusive). + + :param x: A single float value containing the PRB. + :type x: float + + :return: A boolean value indicating whether the PRB meets IAAO standards. + :rtype: bool + """ + return -0.05 < x <= 0.05 + + +def mki_met(x: float) -> bool: + """ + Check whether MKI meets the recommendations outlined by Quintos + (between 0.95 and 1.05, inclusive). + + :param x: A single float value containing the MKI. + :type x: float + + :return: + A boolean value indicating whether the MKI meets + Quintos' recommendations. + :rtype: bool + """ + return 0.95 < x <= 1.05 diff --git a/assesspy/outliers.py b/assesspy/outliers.py index e532f83..a4ac3f7 100644 --- a/assesspy/outliers.py +++ b/assesspy/outliers.py @@ -1,90 +1,84 @@ -# Import necessary libraries -import numbers import warnings +from typing import Union -import numpy as np -from scipy import stats +import pandas as pd from .utils import check_inputs -# Outlier functions -def quantile_outlier(x, probs=[0.05, 0.95]): +def _quantile_outlier( + x: Union[list[int], list[float], pd.Series], + probs: tuple[float, float] = (0.05, 0.95), +) -> pd.Series: """ - Quantile method for identifying outliers. - + Quantile method for identifying outliers. This simply identifies data + within the percentiles specified in the ``probs`` parameter. """ + check_inputs(x, check_gt_zero=False) + x = pd.Series(x) - check_inputs(x) - - # Determine valid range of the data - range = [np.quantile(a=x, q=probs[0]), np.quantile(a=x, q=probs[1])] - - # Determine which input values are in range - out = (x < range[0]) | (x > range[1]) + # Determine which input values are in the valid quantile range + valid_range = [x.quantile(q=probs[0]), x.quantile(q=probs[1])] + out = (x < valid_range[0]) | (x > valid_range[1]) return out -def iqr_outlier(x, mult=3): +def _iqr_outlier( + x: Union[list[int], list[float], pd.Series], mult: float = 3.0 +) -> pd.Series: """ - IQR method for identifying outliers. - + IQR method for identifying outliers as specified in Appendix B.1 + of the IAAO Standard on Ratio Studies. """ + check_inputs(x, check_gt_zero=False) + x = pd.Series(x) - check_inputs(x) + quartiles = [x.quantile(q=0.25), x.quantile(q=0.75)] + iqr_mult = mult * (quartiles[1] - quartiles[0]) + out = (x < (quartiles[0] - iqr_mult)) | (x > (quartiles[1] + iqr_mult)) - # Check that inputs are well-formed numeric vector - if isinstance(mult, numbers.Number) & mult > 0: - # Calculate quartiles and mult*IQR - quartiles = [np.quantile(a=x, q=0.25), np.quantile(a=x, q=0.75)] - - iqr_mult = mult * stats.iqr(x) - - # Find values that are outliers - out = (x < (quartiles[0] - iqr_mult)) | (x > (quartiles[1] + iqr_mult)) - - # Warn if IQR trimmed values are within 95% CI. This indicates - # potentially non-normal/narrow distribution of data - if any(out & (quantile_outlier(x) == False)): # noqa - warnings.warn( - """Some values flagged as outliers despite being within 95% CI. - Check for narrow or skewed distribution.""" - ) - - return out + return out -def is_outlier(x, method="iqr", probs=[0.05, 0.95]): +def is_outlier( + x: Union[list[int], list[float], pd.Series], + method: str = "iqr", + probs: tuple[float, float] = (0.05, 0.95), + mult: float = 3.0, +) -> pd.Series: """ - Detect outliers in a numeric vector using standard methods. + Detect outliers in numeric values using standard methods. Certain assessment performance statistics are sensitive to extreme outliers. As such, it is often necessary to remove outliers before performing a sales ratio study. - Standard method is to remove outliers that are 3 * IQR. Warnings are thrown - when sample size is extremely small or when the IQR is extremely narrow. See - IAAO Standard on Ratio Studies Appendix B. Outlier Trimming Guidelines for - more information. + The IAAO standard method is to remove outliers that are 3 * IQR. Warnings + are thrown when sample size is extremely small or when the IQR is extremely + narrow. See IAAO Standard on Ratio Studies Appendix B. Outlier Trimming + Guidelines for more information. :param x: - A numeric vector. Must be longer than 2 and not contain - ``Inf`` or ``NaN``. + A list or ``pd.Series`` of numeric values, typically sales ratios. + Must be longer than 2 and cannot contain ``Inf`` or ``NaN`` values. :param method: - Default "iqr". String indicating outlier detection method. + Default ``iqr``. String indicating outlier detection method. Options are ``iqr`` or ``quantile``. :param probs: - Upper and lower percentiles denoting outlier boundaries. - :type x: numeric + Upper and lower percentiles boundaries for the ``quantile`` method. + :param mult: + Default ``3``. Multiple of IQR to use as the outlier detection + threshold. + :type x: Array-like numeric values :type method: str - :type probs: list[numeric] + :type probs: tuple[float] + :type mult: float :return: - A logical vector this same length as ``x`` indicating whether or + A boolean ``pd.Series`` the same length as ``x`` indicating whether or not each value of ``x`` is an outlier. - - :rtype: list[bool] + :rtype: pd.Series :Example: @@ -93,19 +87,27 @@ def is_outlier(x, method="iqr", probs=[0.05, 0.95]): # Detect outliers: import assesspy as ap - ap.is_outlier(ap.ratios_sample().ratio) + ap.is_outlier(ap.ccao_sample().estimate) """ - - out = {"iqr": iqr_outlier(x), "quantile": quantile_outlier(x, probs)}.get( - method - ) + if method == "iqr": + out = _iqr_outlier(x, mult) + iqr_quant = out & ~_quantile_outlier(x) + if any(iqr_quant): + warnings.warn( + f"{iqr_quant.sum()} values flagged as outliers despite being " + "within 95% CI. Check for narrow or skewed distribution." + ) + elif method == "quantile": + out = _quantile_outlier(x, probs) + else: + raise ValueError("Method must be either 'iqr' or 'quantile'") # Warn about removing data from small samples, as it can severely distort # ratio study outcomes - if any(out) & (len(out) < 30): + if any(out) & (out.size < 30): warnings.warn( - """Values flagged as outliers despite small sample size (N < 30). - Use caution when removing values from a small sample.""" + f"{out.sum()} flagged as outliers despite small sample size " + "(N < 30). Use caution when removing values from a small sample." ) return out diff --git a/assesspy/sales_chasing.py b/assesspy/sales_chasing.py index 4b8c3a9..ad1dedd 100644 --- a/assesspy/sales_chasing.py +++ b/assesspy/sales_chasing.py @@ -1,50 +1,52 @@ -# Import necessary libraries import warnings +from typing import Union import numpy as np +import pandas as pd from statsmodels.distributions.empirical_distribution import ECDF from .utils import check_inputs -# Sales chasing functions -def detect_chasing_cdf(ratio, bounds=[0.98, 1.02], cdf_gap=0.03): - # CDF gap method for detecting sales chasing. - - # Input checking and error handling - check_inputs(ratio) - - # Sort the ratios - sorted_ratio = np.sort(np.array(ratio)) +def _cdf_sales_chased( + x: Union[list[int], list[float], pd.Series], + bounds: tuple[float, float] = (0.98, 1.02), + gap: float = 0.05, +) -> bool: + check_inputs(x, check_gt_zero=False) + ratio = pd.Series(x) + sorted_ratio = ratio.sort_values() # Calculate the CDF of the sorted ratios and extract percentile ranking - cdf = ECDF(sorted_ratio)(sorted_ratio) + cdf = pd.Series(ECDF(sorted_ratio)(sorted_ratio)) # Calculate the difference between each value and the next value, the # largest difference will be the CDF gap - diffs = np.diff(cdf) + diffs = cdf.diff().dropna() # Check if the largest difference is greater than the threshold and make # sure it's within the specified boundaries - diff_loc = sorted_ratio[np.argmax(diffs)] - out = (max(diffs) > cdf_gap) & ( + diff_loc = sorted_ratio.iloc[int(diffs.idxmax())] + out = (diffs.max() > gap) & ( (diff_loc > bounds[0]) & (diff_loc < bounds[1]) ) - return out - + return bool(out) -def detect_chasing_dist(ratio, bounds=[0.98, 1.02]): - # Distribution comparison method for detecting sales chasing. - # Input checking and error handling - check_inputs(ratio) - - ratio = np.array(ratio) +def _dist_sales_chased( + x: Union[list[int], list[float], pd.Series], + bounds: tuple[float, float] = (0.98, 1.02), + gap: float = 0.05, +) -> bool: + check_inputs(x, check_gt_zero=False) + ratio = pd.Series(x) # Return the percentage of x within the specified range - def pct_in_range(x, min, max): - out = np.mean(((x >= min) & (x <= max))) + def pct_in_range( + x: Union[np.ndarray, pd.Series], min: float, max: float + ) -> float: + out = float(np.mean(((x >= min) & (x <= max)))) return out # Calculate the ideal normal distribution using observed values from input @@ -53,22 +55,31 @@ def pct_in_range(x, min, max): # Determine what percentage of the data would be within the specified # bounds in the ideal distribution pct_ideal = pct_in_range(ideal_dist, bounds[0], bounds[1]) - - # Determine what percentage of the data is actually within the bounds pct_actual = pct_in_range(ratio, bounds[0], bounds[1]) - return pct_actual > pct_ideal + return bool(abs(pct_actual - pct_ideal) > gap) -def detect_chasing(ratio, method="both"): +def is_sales_chased( + x: Union[list[int], list[float], pd.Series], + method="both", + bounds: tuple[float, float] = (0.98, 1.02), + gap: float = 0.05, +) -> bool: """ Sales chasing is when a property is selectively reappraised to - shift its assessed value toward its actual sale price. Sales chasing is + shift its assessed value toward its recent sale price. Sales chasing is difficult to detect. This function is NOT a statistical test and does not provide the probability of the given result. Rather, it combines two - novel methods to roughly estimate if sales chasing has occurred. + heuristic methods to roughly estimate if sales chasing has occurred. + + The first method (cdf) detects discontinuities in the cumulative + distribution function (CDF) of the ratios of input values. Sales ratios + that are not sales chased should have a fairly smooth CDF. Discontinuous + jumps in the CDF, particularly around 1, may indicate sales chasing. This + can usually be seen visually as a "flat spot" on the CDF. - The first method (dist) uses the technique outlined in the + The second method (dist) uses the technique outlined in the `IAAO Standard on Ratio Studies`_ Appendix E, Section 4. It compares the percentage of real data within +-2% of the mean ratio to the percentage of data within the same bounds given a constructed normal distribution @@ -76,26 +87,33 @@ def detect_chasing(ratio, method="both"): ratios that are sales chased may be more "bunched up" in the center of the distribution. - The second method (cdf) detects discontinuities in the cumulative - distribution function (CDF) of the input vector. Ratios that are not sales - chased should have a fairly smooth CDF. Discontinuous jumps in the CDF, - particularly around 1, may indicate sales chasing. This can usually be seen - visually as a "flat spot" on the CDF. - .. _IAAO Standard on Ratio Studies: https://www.iaao.org/media/standards/Standard_on_Ratio_Studies.pdf - :param ratio: - A numeric vector of ratios centered around 1, where the - numerator of the ratio is the estimated fair market value and the - denominator is the actual sale price. + :param x: + A list or ``pd.Series`` of numeric values. Must be longer than 2 + and cannot contain ``Inf`` or ``NaN`` values. :param method: - Default "both". String indicating sales chasing detection + Default ``both``. String indicating sales chasing detection method. Options are ``cdf``, ``dist``, or ``both``. - :type ratio: numeric + :param bounds: + Default ``(0.98, 1.02)``. Tuple of two floats indicating the + lower and upper bounds of the range of ratios to consider when + detecting sales chasing. Setting this to a narrow band at the + center of the ratio distribution prevents detecting false positives + at the tails. + :param gap: + Default ``0.05``. Float tuning factor. For the CDF method, it sets the + maximum percentage difference between two adjacent ratios. For the + distribution method, it sets the maximum percentage point difference + between the percentage of the data between the ``bounds`` in the real + distribution compared to the ideal distribution. + :type x: Array-like numeric values :type method: str + :type bounds: tuple[float, float] + :type gap: float :return: - A logical value indicating whether or not the input ratios may + A boolean value indicating whether or not the input values may have been sales chased. :rtype: bool @@ -109,39 +127,44 @@ def detect_chasing(ratio, method="both"): from matplotlib import pyplot # Generate fake data with normal vs chased ratios - normal_ratios = np.random.normal(1, 0.15, 10000) + normal_ratios = np.random.normal(1, 0.15, 10000).tolist() chased_ratios = list(np.random.normal(1, 0.15, 900)) + [1] * 100 # Plot to view discontinuity ecdf = ECDF(normal_ratios) pyplot.plot(ecdf.x, ecdf.y) pyplot.show() - ap.detect_chasing(normal_ratios) + ap.is_sales_chased(normal_ratios) ecdf = ECDF(chased_ratios) pyplot.plot(ecdf.x, ecdf.y) pyplot.show() - ap.detect_chasing(chased_ratios) + ap.is_sales_chased(chased_ratios) """ + if not (0 < gap < 1): + raise ValueError("Gap must be a positive value less than 1.") + if not isinstance(bounds, tuple): + raise TypeError("Bounds must be a tuple of two floats.") + if bounds[0] >= bounds[1]: + raise ValueError( + "Bounds must have the left value lower than the right value." + ) - if method not in ("both", "cdf", "dist"): - raise Exception("Unrecognized method.") + if method == "cdf": + out = _cdf_sales_chased(x, bounds, gap) + elif method == "dist": + out = _dist_sales_chased(x, bounds, gap) + elif method == "both": + out_cdf = _cdf_sales_chased(x, bounds, gap) + out_dist = _dist_sales_chased(x, bounds, gap) + out = bool(out_cdf & out_dist) + else: + raise ValueError("Method must be either 'cdf' or 'dist'") - if len(ratio) < 30: + if len(x) < 30: warnings.warn( - """ - Sales chasing detection can be misleading when applied to small - samples (N < 30). Increase N or use a different statistical test. - """ + "Sales chasing detection can be misleading when applied to small " + "samples (N < 30). Increase N or use a different test method." ) - out = None - - else: - out = { - "cdf": detect_chasing_cdf(ratio), - "dist": detect_chasing_dist(ratio), - "both": (detect_chasing_cdf(ratio) & detect_chasing_dist(ratio)), - }.get(method) - return out diff --git a/assesspy/tests/conftest.py b/assesspy/tests/conftest.py new file mode 100644 index 0000000..47239c9 --- /dev/null +++ b/assesspy/tests/conftest.py @@ -0,0 +1,83 @@ +import numpy as np +import pandas as pd +import pytest as pt + +import assesspy as ap + + +@pt.fixture(autouse=True, scope="function") +def set_seed() -> None: + np.random.seed(42) + return None + + +@pt.fixture(scope="session") +def ccao_data() -> tuple: + sample = ap.ccao_sample() + return sample.estimate, sample.sale_price + + +@pt.fixture(scope="session") +def quintos_data() -> tuple: + sample = ap.quintos_sample() + return sample.estimate, sample.sale_price + + +@pt.fixture( + scope="session", + params=[ + ([1] * 30, [1] * 29), + ([0, 0, 0], [0, 0, 0]), + ([-1, -2, -3], [-1, -2, -3]), + ([], []), + ([1], [1]), + ( + pd.concat( + [ap.ccao_sample()["estimate"], pd.Series([1.0], dtype="float")] + ), + pd.concat( + [ + ap.ccao_sample()["sale_price"], + pd.Series([float("Inf")], dtype="float"), + ] + ), + ), + ( + pd.concat( + [ap.ccao_sample()["estimate"], pd.Series([1.0], dtype="float")] + ), + pd.concat( + [ + ap.ccao_sample()["sale_price"], + pd.Series([float("NaN")], dtype="float"), + ] + ), + ), + ], +) +def bad_input(request) -> tuple: + return request.param + + +@pt.fixture( + scope="session", + params=[ + ([1e10, 2e10, 3e10], [1e10, 2e10, 3e10]), + ([1, 2.0, 3], [1.0, 2, 3.0]), + (ap.ccao_sample()["estimate"], ap.ccao_sample()["sale_price"]), + ( + ap.ccao_sample()["estimate"].set_axis( + pd.Index( + np.random.permutation(ap.ccao_sample()["estimate"].index) + ) + ), + ap.ccao_sample()["sale_price"].set_axis( + pd.Index( + np.random.permutation(ap.ccao_sample()["sale_price"].index) + ) + ), + ), + ], +) +def good_input(request) -> tuple: + return request.param diff --git a/assesspy/tests/test_ci.py b/assesspy/tests/test_ci.py index b27a062..957c801 100644 --- a/assesspy/tests/test_ci.py +++ b/assesspy/tests/test_ci.py @@ -1,122 +1,54 @@ -# Import necessary libraries -import numpy as np -import pandas as pd import pytest as pt -from numpy import testing as npt -import assesspy - -# Load the ratios sample dataset for testing -ratios_sample = assesspy.ratios_sample() - -ratio = ratios_sample.ratio -assessed = ratios_sample.assessed -sale_price = ratios_sample.sale_price - -##### TEST BOOT CI ##### # noqa - - -class TestBOOTCI: # Ensure input function is appropriate - def test_in_fun(self): - with pt.raises(Exception): - assert assesspy.boot_ci(str, ratio=ratio) - - with pt.raises(Exception): - assesspy.boot_ci(np.add, one=ratio, two=ratio, three=ratio) - - with pt.raises(Exception): - assesspy.boot_ci(assesspy.prd, fmv=assessed, sale_price=sale_price) - - with pt.raises(Exception): - assesspy.boot_ci(np.add, one=ratio, two=ratio) - - -##### TEST COD ##### # noqa - - -# Calculate COD CI -cod_ci_out_95 = assesspy.cod_ci(ratio, nboot=1000) -cod_ci_out_80 = assesspy.cod_ci(ratio, nboot=1000, alpha=0.2) - - -class TestCODCI: - def test_output_type(self): # Output is expected type - assert type(cod_ci_out_95) is list - assert type(cod_ci_out_95[0]) is np.float64 - - def test_cod(self): # Output equal to expected - npt.assert_allclose( - cod_ci_out_80, [16.89576541901062, 18.641992815316588], rtol=0.02 - ) - npt.assert_allclose( - cod_ci_out_95, [16.32413038955943, 19.226428249424757], rtol=0.02 - ) - - def test_bad_input(self): # Bad input data stops execution - with pt.raises(Exception): - assesspy.cod_ci([1] * 29 + [0]) - - with pt.raises(Exception): - assesspy.cod_ci(10) - - with pt.raises(Exception): - assesspy.cod_ci(pd.concat([ratio, pd.Series(float("Inf"))])) - - with pt.raises(Exception): - assesspy.cod_ci(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.cod_ci(pd.concat([ratio, pd.Series(float("NaN"))])) - - with pt.raises(Exception): - assesspy.cod_ci([1] * 29 + ["1"]) - - -##### TEST PRD ##### # noqa - - -# Calculate PRD CI -prd_ci_out_95 = assesspy.prd_ci(assessed, sale_price, nboot=1000) -prd_ci_out_80 = assesspy.prd_ci(assessed, sale_price, nboot=1000, alpha=0.2) - - -class TestPRDCI: - def test_output_type(self): # Output is expected type - assert type(prd_ci_out_95) is list - assert type(prd_ci_out_95[0]) is np.float64 - - def test_prd(self): # Output equal to expected - npt.assert_allclose( - prd_ci_out_80, [1.0388355155405569, 1.0588098520230935], rtol=0.02 - ) - npt.assert_allclose( - prd_ci_out_95, [1.0333716711646226, 1.0643056985556307], rtol=0.02 +import assesspy as ap + + +class TestCI: + @pt.fixture(params=["cod", "prd", "prb"]) + def metric(self, request): + return request.param + + @pt.fixture(params=[0.80, 0.90, 0.95]) + def alpha(self, request): + return request.param + + def test_metric_ci_output_with_alpha(self, metric, alpha, ccao_data): + expected = { + "cod": { + 0.50: (17.3, 18.0), + 0.80: (17.6, 18.0), + 0.90: (17.7, 18.0), + 0.95: (17.7, 17.9), + }, + "prd": { + 0.50: (1.03, 1.06), + 0.80: (1.04, 1.06), + 0.90: (1.04, 1.05), + 0.95: (1.04, 1.05), + }, + "prb": { + 0.50: (0.000823, 0.00107), + 0.80: (0.000823, 0.00107), + 0.90: (0.000885, 0.00100), + 0.95: (0.000916, 0.00097), + }, + } + ci_l, ci_u = getattr(ap, f"{metric}_ci")( + *ccao_data, nboot=200, alpha=alpha ) + assert pt.approx(ci_l, rel=0.01) == expected[metric][alpha][0] + assert pt.approx(ci_u, rel=0.01) == expected[metric][alpha][1] - def test_bad_input(self): # Bad input data stops execution - with pt.raises(Exception): - assesspy.prd_ci([1] * 30, [1] * 29 + [0]) - - with pt.raises(Exception): - assesspy.prd_ci([1, 1, 1], [1, 1]) - - with pt.raises(Exception): - assesspy.prd_ci(10, 10) - + def test_metric_ci_raises_on_bad_input(self, metric, bad_input): with pt.raises(Exception): - assesspy.prd_ci( - pd.concat([assessed, pd.Series(float("Inf"))]), - pd.concat([sale_price, pd.Series(1.0)]), - ) + getattr(ap, f"{metric}_ci")(*bad_input, nboot=200) - with pt.raises(Exception): - assesspy.prd_ci(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.prd_ci( - pd.concat([assessed, pd.Series(float("NaN"))]), - pd.concat([sale_price, pd.Series(1.0)]), - ) + def test_metric_ci_succeeds_on_good_input(self, metric, good_input): + result = getattr(ap, f"{metric}_ci")(*good_input, nboot=200) + assert isinstance(result, tuple) + @pt.mark.parametrize("metric", ["cod", "prd"]) + @pt.mark.parametrize("nboot", [0, -10]) + def test_metric_ci_raises_on_bad_nboot(self, metric, ccao_data, nboot): with pt.raises(Exception): - assesspy.prd_ci([1] * 30, [1] * 29 + ["1"]) + getattr(ap, f"{metric}_ci")(*ccao_data, nboot=nboot) diff --git a/assesspy/tests/test_formulas.py b/assesspy/tests/test_formulas.py deleted file mode 100644 index 7414066..0000000 --- a/assesspy/tests/test_formulas.py +++ /dev/null @@ -1,254 +0,0 @@ -# Import necessary libraries -import numpy as np -import pandas as pd -import pytest as pt -from numpy import testing as npt - -import assesspy - -# Load the ratios sample dataset for testing -ratios_sample = assesspy.ratios_sample() - -ratio = ratios_sample.ratio -fmv = ratios_sample.assessed -sale_price = ratios_sample.sale_price - -##### TEST COD ##### # noqa - -# Calculate COD -cod_out = assesspy.cod(ratios_sample.ratio) - - -class TestCOD: - def test_cod(self): # Output equal to expected - npt.assert_allclose(cod_out, 17.81456901196891, rtol=0.02) - - def test_numeric_output(self): # Output is numeric - assert type(cod_out) is np.float64 - - def test_bad_input(self): # Bad input data stops execution - with pt.raises(Exception): - assesspy.cod([1] * 29 + [0]) - - with pt.raises(Exception): - assesspy.cod(10) - - with pt.raises(Exception): - assesspy.cod(pd.concat([ratio, pd.Series(float("Inf"))])) - - with pt.raises(Exception): - assesspy.cod(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.cod(pd.concat([ratio, pd.Series(float("NaN"))])) - - with pt.raises(Exception): - assesspy.cod([1] * 29 + ["1"]) - - def test_cod_met(self): # Standard met function - assert not assesspy.cod_met(cod_out) - - -##### TEST PRD ##### # noqa - - -# Calculate PRD -prd_out = assesspy.prd(fmv, sale_price) - - -class TestPRD: - def test_prd(self): # Output equal to expected - npt.assert_allclose(prd_out, 1.0484192615223522, rtol=0.02) - - def test_numeric_output(self): # Output is numeric - assert type(prd_out) is np.float64 - - def test_bad_input(self): # Bad input data stops execution - with pt.raises(Exception): - assesspy.prd_ci([1] * 30, [1] * 29 + [0]) - - with pt.raises(Exception): - assesspy.prd_ci([1, 1, 1], [1, 1]) - - with pt.raises(Exception): - assesspy.prd(10, 10) - - with pt.raises(Exception): - assesspy.prd( - pd.concat([fmv, pd.Series(float("Inf"))]), - pd.concat([sale_price, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.prd(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.prd( - pd.concat([fmv, pd.Series(float("NaN"))]), - pd.concat([sale_price, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.prd([1] * 30, [1] * 29 + ["1"]) - - def test_prd_met(self): # Standard met function - assert not assesspy.prd_met(prd_out) - - -##### TEST PRB ##### # noqa - -# Calculate PRB -prb_out = assesspy.prb(fmv, sale_price)["prb"] - - -class TestPRB: - def test_prb(self): # Output equal to expected - npt.assert_allclose(prb_out, 0.0009470721642262901, rtol=0.02) - - def test_numeric_output(self): # Output is numeric - assert type(prb_out) is float - - def test_bad_input(self): # Bad input data stops execution - with pt.raises(Exception): - assesspy.prb_ci([1] * 30, [1] * 29 + [0]) - - with pt.raises(Exception): - assesspy.prb([1, 1, 1], [1, 1]) - - with pt.raises(Exception): - assesspy.prb(10, 10) - - with pt.raises(Exception): - assesspy.prb( - pd.concat([fmv, pd.Series(float("Inf"))]), - pd.concat([sale_price, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.prb(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.prb( - pd.concat([fmv, pd.Series(float("NaN"))]), - pd.concat([sale_price, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.prb([1] * 30, [1] * 29 + ["1"]) - - def test_round(self): # Rounding must be int - with pt.raises(Exception): - assesspy.prb(fmv, sale_price, "z") - - with pt.raises(Exception): - assesspy.prb(fmv, sale_price, 1.1) - - def test_prb_met(self): # Standard met function - assert assesspy.prb_met(prb_out) - - -with open("assesspy/data/mki_ki.csv", "r") as input_csvfile: - # Create a list to store the extracted columns - gini_data_sale = [] - gini_data_assessed = [] - - # Iterate through each line in the input CSV - for line in input_csvfile: - columns = line.strip().split(",") - - first_column = columns[0].split('"')[1] - second_column = columns[1] - - gini_data_sale.append(first_column) - gini_data_assessed.append(second_column) - -gini_data_assessed = [ - int(value.replace('"', "")) for value in gini_data_assessed -] -gini_data_sale = [int(value.replace('"', "")) for value in gini_data_sale] - -mki_out = assesspy.mki(gini_data_assessed, gini_data_sale) - - -class Test_MKI: - def test_mki(self): # Output equal to expected - npt.assert_allclose(mki_out, 0.794, rtol=0.02) - - def test_numeric_output(self): # Output is numeric - assert type(mki_out) is float - - with pt.raises(Exception): - assesspy.mki([1, 1, 1], [1, 1]) - - with pt.raises(Exception): - assesspy.mki(10, 10) - - with pt.raises(Exception): - assesspy.mki( - pd.concat([gini_data_assessed, pd.Series(float("Inf"))]), - pd.concat([gini_data_sale, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.mki(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.mki( - pd.concat([gini_data_assessed, pd.Series(float("NaN"))]), - pd.concat([gini_data_sale, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.mki([1] * 30, [1] * 29 + ["1"]) - - def test_round(self): # Rounding must be int - with pt.raises(Exception): - assesspy.mki(gini_data_assessed, sale_price, "z") - - with pt.raises(Exception): - assesspy.mki(gini_data_assessed, sale_price, 1.1) - - def test_mki_met(self): # Standard met function - assert not assesspy.mki_met(mki_out) - - -ki_out = assesspy.ki(gini_data_assessed, gini_data_sale) - - -class Test_KI: - def test_ki(self): # Output equal to expected - npt.assert_allclose(ki_out, -0.06, rtol=0.02) - - def test_numeric_output(self): # Output is numeric - assert type(ki_out) is float - - with pt.raises(Exception): - assesspy.ki([1, 1, 1], [1, 1]) - - with pt.raises(Exception): - assesspy.ki(10, 10) - - with pt.raises(Exception): - assesspy.ki( - pd.concat([gini_data_assessed, pd.Series(float("Inf"))]), - pd.concat([gini_data_sale, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.ki(pd.DataFrame(ratio)) - - with pt.raises(Exception): - assesspy.ki( - pd.concat([gini_data_assessed, pd.Series(float("NaN"))]), - pd.concat([gini_data_sale, pd.Series(1.0)]), - ) - - with pt.raises(Exception): - assesspy.ki([1] * 30, [1] * 29 + ["1"]) - - def test_round(self): # Rounding must be int - with pt.raises(Exception): - assesspy.ki(gini_data_assessed, gini_data_sale, "z") - - with pt.raises(Exception): - assesspy.ki(gini_data_assessed, gini_data_sale, 1.1) diff --git a/assesspy/tests/test_metrics.py b/assesspy/tests/test_metrics.py new file mode 100644 index 0000000..946b237 --- /dev/null +++ b/assesspy/tests/test_metrics.py @@ -0,0 +1,48 @@ +import pytest as pt + +import assesspy as ap + + +class TestMetrics: + @pt.fixture(params=["cod", "prd", "prb", "mki", "ki"]) + def metric(self, request): + return request.param + + @pt.fixture + def metric_val(self, metric, ccao_data, quintos_data): + if metric in ["mki", "ki"]: + return getattr(ap, metric)(*quintos_data) + return getattr(ap, metric)(*ccao_data) + + def test_metric_value_is_correct(self, metric, metric_val): + expected = { + "cod": 17.81456901196891, + "prd": 1.0484192615223522, + "prb": 0.0009470721642262903, + "mki": 0.794, + "ki": -0.06, + } + assert pt.approx(metric_val, rel=0.02) == expected[metric] + + def test_metric_has_numeric_output(self, metric_val): + assert type(metric_val) is float + + def test_metric_raises_on_bad_input(self, metric, bad_input): + with pt.raises(Exception): + getattr(ap, metric)(*bad_input) + + def test_metric_succeeds_on_good_input(self, metric, good_input): + result = getattr(ap, metric)(*good_input) + assert type(result) is float + assert result != float("NaN") + + def test_metric_met_function_thresholds(self, metric, metric_val): + if metric == "ki": + pt.skip("Skipping test for 'ki' metric (ki_met does not exist)") + expected = { + "cod": False, + "prd": False, + "prb": True, + "mki": False, + } + assert getattr(ap, f"{metric}_met")(metric_val) == expected[metric] diff --git a/assesspy/tests/test_outliers.py b/assesspy/tests/test_outliers.py index 605c671..85454e6 100644 --- a/assesspy/tests/test_outliers.py +++ b/assesspy/tests/test_outliers.py @@ -1,69 +1,93 @@ -# Import necessary libraries +import warnings + import numpy as np import pandas as pd import pytest as pt -import assesspy - -# Create test vectors of data with certain distributions -np.random.seed(13378) - -# Normal distribution, no outliers -test_dist1 = np.random.normal(size=100) - -# Normal distribution, some outliers -test_dist2 = np.append(np.random.normal(size=100), [3, 4, 5, 6, 7]) - -# Non-normal, super narrow distribution -test_dist3 = np.append( - np.append(np.random.uniform(size=20), np.repeat(1, 50)), [5, 6, 7] -) - -# Create outputs for all distributions -dist1_iqr_out = assesspy.is_outlier(test_dist1, method="iqr") -dist1_qnt_out = assesspy.is_outlier(test_dist1, method="quantile") -dist2_iqr_out = assesspy.is_outlier(test_dist2, method="iqr") -dist2_qnt_out = assesspy.is_outlier(test_dist2, method="quantile") - -##### TEST OUTLIER ##### # noqa - - -class TestOUTTIES: - def test_output_type(self): # Output is logical array - assert type(dist1_iqr_out[0]) is np.bool_ - assert type(dist1_iqr_out) is np.ndarray - - assert type(dist1_qnt_out[0]) is np.bool_ - assert type(dist1_qnt_out) is np.ndarray - - def test_output_value(self): - assert sum(dist1_iqr_out) == 0 - assert sum(dist1_qnt_out) == 10 - assert sum(dist2_iqr_out) == 3 - assert sum(dist2_qnt_out) == 12 - - def test_bad_input(self): # Bad input data stops execution +import assesspy as ap + + +class TestOutliers: + @pt.fixture(params=["normal", "outlier", "narrow", "ccao", "quintos"]) + def distribution(self, request, ccao_data, quintos_data): + return request.param, { + "normal": np.random.normal(size=100).tolist(), + "outlier": np.append( + np.random.normal(size=100), [3, 4, 5, 6, 7] + ).tolist(), + "narrow": np.append( + np.append(np.random.uniform(size=20), np.repeat(1, 100)), + [5, 6, 7], + ).tolist(), + "ccao": ccao_data[0] / ccao_data[1], + "quintos": quintos_data[0] / quintos_data[1], + }[request.param] + + @pt.fixture(params=["iqr", "quantile"]) + def method(self, request): + return request.param + + def test_is_outlier_output_is_boolean_array(self, distribution, method): + dist_name, dist_data = distribution + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + assert isinstance(ap.is_outlier(dist_data, method), pd.Series) + assert ap.is_outlier(dist_data, method).dtype == np.bool_ + + def test_is_outlier_has_expected_outlier_counts( + self, + distribution, + method, + ): + dist_name, dist_data = distribution + expected = { + "normal": {"iqr": 0, "quantile": 10}, + "outlier": {"iqr": 2, "quantile": 12}, + "narrow": {"iqr": 23, "quantile": 10}, + "ccao": {"iqr": 28, "quantile": 98}, + "quintos": {"iqr": 0, "quantile": 4}, + } + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + assert ( + ap.is_outlier(dist_data, method).sum() + == expected[dist_name][method] + ) + + @pt.mark.parametrize( + "bad_input", + [ + 10, + pd.DataFrame([1, 2, 3]), + [1] * 29 + ["1"], + ], + ) + def test_is_outlier_raises_on_bad_input(self, bad_input): with pt.raises(Exception): - assesspy.is_outlier([1] * 29 + [0]) - - with pt.raises(Exception): - assesspy.is_outlier(10) - - with pt.raises(Exception): - assesspy.is_outlier(np.append(test_dist1, float("Inf"))) - - with pt.raises(Exception): - assesspy.is_outlier(pd.DataFrame(test_dist1)) - - with pt.raises(Exception): - assesspy.is_outlier(np.append(test_dist1, float("NaN"))) - + ap.is_outlier(bad_input) + + @pt.mark.parametrize( + "input_data", + [ + lambda x: np.append(x, float("Inf")), + lambda x: np.append(x, float("NaN")), + ], + ) + def test_is_outlier_raises_on_invalid_values( + self, input_data, distribution, method + ): with pt.raises(Exception): - assesspy.is_outlier([1] * 29 + ["1"]) - - def test_warnings(self): - with pt.warns(UserWarning): - assesspy.is_outlier(test_dist3, method="iqr") - + dist_name, dist_data = distribution + ap.is_outlier(input_data(dist_data), method) + + def test_is_outlier_warns_on_narrow_distribution(self, distribution): + dist_name, dist_data = distribution + if dist_name == "narrow": + with pt.warns(UserWarning): + ap.is_outlier(dist_data, "iqr") + else: + ap.is_outlier(dist_data, "iqr") + + def test_is_outlier_warns_on_small_sample(self): with pt.warns(UserWarning): - assesspy.is_outlier(np.random.normal(size=20), method="quantile") + ap.is_outlier(np.random.normal(size=20).tolist(), "quantile") diff --git a/assesspy/tests/test_sales_chasing.py b/assesspy/tests/test_sales_chasing.py index 7cb5710..f7f5bc3 100644 --- a/assesspy/tests/test_sales_chasing.py +++ b/assesspy/tests/test_sales_chasing.py @@ -1,62 +1,92 @@ -# Import necessary libraries -# Import necessary libraries import numpy as np import pandas as pd import pytest as pt -import assesspy - -# Create test vectors of data with certain distributions -np.random.seed(13378) - -# Load the ratios sample dataset for testing -ratios_sample = assesspy.ratios_sample() - -# Extract the components of the dataframe as vectors -sample_ratios = ratios_sample.ratio -normal_ratios = np.random.normal(1, 0.15, 100) -chased_ratios = np.append(np.random.normal(1, 0.15, 900), [1] * 100) - -##### TEST CHASING DETECTION ##### # noqa - -# Run detection -sample_out = assesspy.detect_chasing(sample_ratios) -normal_out = assesspy.detect_chasing(normal_ratios) -chased_out = assesspy.detect_chasing(chased_ratios) - - -class TestCHASE: - def test_method(self): +import assesspy as ap + + +class TestSalesChasing: + @pt.fixture + def sample_dist(self, ccao_data): + estimate, sale_price = ccao_data + ratio = estimate / sale_price + return ratio + + @pt.fixture(params=["normal", "chased", "sample"]) + def distribution(self, request, ccao_data, sample_dist): + return request.param, { + "normal": np.random.normal(1, size=1000).tolist(), + "chased": np.append( + np.random.normal(1, 0.15, 900), [1] * 100 + ).tolist(), + "sample": sample_dist, + }[request.param] + + @pt.fixture(params=["cdf", "dist", "both"]) + def method(self, request): + return request.param + + def test_is_sales_chased_output_is_boolean(self, distribution, method): + dist_name, dist_data = distribution + assert isinstance(ap.is_sales_chased(dist_data, method), bool) + + def test_is_sales_chased_has_expected_output(self, distribution, method): + dist_name, dist_data = distribution + expected = { + "normal": {"cdf": False, "dist": False, "both": False}, + "chased": {"cdf": True, "dist": True, "both": True}, + "sample": {"cdf": False, "dist": True, "both": False}, + } + assert ( + ap.is_sales_chased(dist_data, method) + == expected[dist_name][method] + ) + + @pt.mark.parametrize( + "bad_input", + [10, pd.DataFrame([1, 2, 3]), [1] * 29 + ["1"], None], + ) + def test_is_sales_chased_raises_on_bad_input(self, bad_input): with pt.raises(Exception): - assesspy.detect_chasing(sample_ratios, method="hug") - - def test_output_type(self): # Output is logical - assert type(sample_out) is np.bool_ - - def test_output_value(self): - assert not sample_out - assert not normal_out - assert chased_out - - def test_bad_input(self): # Bad input data stops execution - with pt.raises(Exception): - assesspy.detect_chasing([1] * 29 + [0]) - + ap.is_sales_chased(bad_input) + + @pt.mark.parametrize( + "input_data", + [ + lambda x: np.append(x, float("Inf")), + lambda x: np.append(x, float("NaN")), + ], + ) + def test_is_sales_chased_raises_on_invalid_values( + self, input_data, distribution, method + ): with pt.raises(Exception): - assesspy.detect_chasing(10) + dist_name, dist_data = distribution + ap.is_outlier(input_data(dist_data), method) + def test_is_sales_chased_raises_on_invalid_method(self, distribution): with pt.raises(Exception): - assesspy.detect_chasing(np.append(sample_ratios, float("Inf"))) + dist_name, dist_data = distribution + ap.is_sales_chased(dist_data, method="hug") - with pt.raises(Exception): - assesspy.detect_chasing(pd.DataFrame(sample_ratios)) + def test_is_sales_chased_warns_on_small_sample(self): + with pt.warns(UserWarning): + ap.is_sales_chased(np.random.normal(size=29).tolist()) + @pt.mark.parametrize( + "bounds", + [(0.0, 0.0), [0.5, 0.4], (2.0, 1.0), (2.0, "1.0"), None, "2.0"], + ) + def test_is_sales_chased_raises_on_invalid_bounds(self, bounds): with pt.raises(Exception): - assesspy.detect_chasing(np.append(sample_ratios, float("NaN"))) - + ap.is_sales_chased( + np.random.normal(size=40).tolist(), bounds=bounds + ) + + @pt.mark.parametrize( + "gap", + [0, 1, -1, 2, float("NaN"), float("Inf"), None], + ) + def test_is_sales_chased_raises_on_invalid_gap(self, gap): with pt.raises(Exception): - assesspy.detect_chasing([1] * 29 + ["1"]) - - def test_warnings(self): # Small sample throughs a warning - with pt.warns(UserWarning): - assesspy.detect_chasing(np.random.normal(size=29)) + ap.is_sales_chased(np.random.normal(size=40).tolist(), gap=gap) diff --git a/assesspy/utils.py b/assesspy/utils.py index ef99488..39703f9 100644 --- a/assesspy/utils.py +++ b/assesspy/utils.py @@ -1,35 +1,28 @@ -# Import necessary libraries import numpy as np import pandas as pd from pandas.api.types import is_numeric_dtype -def check_inputs(*args): - out = [""] - - for x in args: - # *args passed into *args can created nested tuples - unnest - if isinstance(x, tuple): - args = x - +def check_inputs(*args, check_gt_zero: bool = True) -> None: + out_msg = [""] for x in args: - if isinstance(x, pd.core.frame.DataFrame): - raise Exception("Input cannot be a dataframe.") - check = pd.Series(x) if not is_numeric_dtype(check): - raise Exception("All input vectors must be numeric.") - if check.isnull().values.any(): - out.append("\nInput vectors contain null values.") + out_msg.append("All input values must be numeric.") + if check.isnull().any(): + out_msg.append("All input values cannot be null.") if len(check) <= 1: - out.append("\nAll input vectors must have length greater than 1.") + out_msg.append("All input values must have length greater than 1.") if not all(np.isfinite(check) | check.isnull()): - out.append("\nInfinite values in input vectors.") - if any(check == 0): - out.append("\nInput vectors cannot contain values of 0.") + out_msg.append("All input values cannot be infinite.") + if any(check <= 0) and check_gt_zero: + out_msg.append("All input values must be greater than 0.") - out = set(out) + lengths = [len(pd.Series(x)) for x in args] + if len(set(lengths)) > 1: + out_msg.append("All input values must have the same length.") - if len(out) > 1: - raise Exception("".join(map(str, out))) + out_msg_set = set(out_msg) + if len(out_msg_set) > 1: + raise Exception("\n".join(out_msg_set)) diff --git a/docs/source/ccao_sample.rst b/docs/source/ccao_sample.rst new file mode 100644 index 0000000..b6c7e05 --- /dev/null +++ b/docs/source/ccao_sample.rst @@ -0,0 +1,5 @@ +=========================================================== +Sample of estimates and sales data pulled from CCAO records +=========================================================== + +.. autofunction:: assesspy.ccao_sample diff --git a/docs/source/ci.rst b/docs/source/ci.rst index 0b5010b..2b99f54 100644 --- a/docs/source/ci.rst +++ b/docs/source/ci.rst @@ -1,5 +1,13 @@ -=========================================== -Calculate bootstrapped confidence intervals -=========================================== +============================== +Calculate confidence intervals +============================== .. autofunction:: assesspy.boot_ci +.. autofunction:: assesspy.cod_ci + :noindex: + +.. autofunction:: assesspy.prd_ci + :noindex: + +.. autofunction:: assesspy.prb_ci + :noindex: diff --git a/docs/source/cod.rst b/docs/source/cod.rst index fa8824f..feabd89 100644 --- a/docs/source/cod.rst +++ b/docs/source/cod.rst @@ -3,3 +3,5 @@ Calculate Coefficient of Dispersion (COD) ========================================= .. autofunction:: assesspy.cod +.. autofunction:: assesspy.cod_ci +.. autofunction:: assesspy.cod_met diff --git a/docs/source/conf.py b/docs/source/conf.py index b7c8738..dc8350a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,4 +12,5 @@ # which can't be parsed by sphinx-pyproject source_suffix = {".rst": "restructuredtext", ".ipynb": "myst-nb"} nb_render_image_options = {"width": "450px", "align": "center"} +nb_execution_timeout = 600 html_sidebars = {"**": []} diff --git a/docs/source/index.rst b/docs/source/index.rst index a51848f..e94ccc1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -33,11 +33,11 @@ believe that reporters, taxpayers, and members of academia will find this package helpful in monitoring the performance of local assessors and conducting research. -For detailed documentation on included functions and data, `visit the -full reference list `_. +For detailed documentation on included functions and data, :doc:`visit the +full reference list `. For examples of specific tasks you can complete with ``assesspy`` -functions, see the `vignettes page `_. +functions, see the :doc:`vignettes page `. Installation ------------ diff --git a/docs/source/ki.rst b/docs/source/ki.rst index f8d6057..4c73dae 100644 --- a/docs/source/ki.rst +++ b/docs/source/ki.rst @@ -1,5 +1,5 @@ -========================================= +============= Kakwani Index -========================================= +============= .. autofunction:: assesspy.ki diff --git a/docs/source/mki.rst b/docs/source/mki.rst index c910faf..a6f2952 100644 --- a/docs/source/mki.rst +++ b/docs/source/mki.rst @@ -1,5 +1,6 @@ -========================================= +====================== Modified Kakwani Index -========================================= +====================== .. autofunction:: assesspy.mki +.. autofunction:: assesspy.mki_met diff --git a/docs/source/mki_ki.rst b/docs/source/mki_ki.rst deleted file mode 100644 index 47e38d7..0000000 --- a/docs/source/mki_ki.rst +++ /dev/null @@ -1,5 +0,0 @@ -======================================================= -Data To Replicate Quintos Study -======================================================= - -.. autofunction:: mki_ki diff --git a/docs/source/notebooks/example-ratio-study.ipynb b/docs/source/notebooks/example-ratio-study.ipynb index da0c79a..de5c7ef 100644 --- a/docs/source/notebooks/example-ratio-study.ipynb +++ b/docs/source/notebooks/example-ratio-study.ipynb @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -127,11 +127,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Pivot to longer, Join the two datasets based on PIN, keeping only those that have assessed\n", + "# Pivot to longer, join the two datasets based on PIN, keeping only those that have assessed\n", "# values AND sales\n", "combined = pd.merge(\n", " pd.melt(\n", @@ -146,10 +146,11 @@ " how=\"inner\",\n", ")\n", "\n", - "# Remove multisales, then calculate the ratio for each property\n", + "# Remove multisales, then calculate the sale ratio for each property\n", "# and assessment stage\n", - "combined = combined[not combined.is_multisale]\n", - "combined[\"ratio\"] = combined.assessed * 10 / combined.sale_price" + "combined = combined[~combined.is_multisale]\n", + "combined[\"assessed\"] = combined.assessed * 10\n", + "combined[\"ratio\"] = combined.assessed / combined.sale_price" ] }, { @@ -163,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -220,199 +221,159 @@ " mailed_tot\n", " 353\n", " 22.10\n", - " [20.12, 24.46]\n", + " [20.35, 23.8]\n", " False\n", " 1.04\n", - " [1.02, 1.07]\n", + " [1.03, 1.07]\n", " False\n", - " 0.003\n", - " [0.001, 0.004]\n", + " 0.0027\n", + " [0.001, 0.0045]\n", " True\n", " \n", " \n", " certified_tot\n", " 353\n", " 22.03\n", - " [20.02, 23.81]\n", + " [20.51, 23.81]\n", " False\n", " 1.05\n", - " [1.02, 1.08]\n", + " [1.03, 1.07]\n", " False\n", - " 0.002\n", - " [0.001, 0.004]\n", + " 0.0023\n", + " [0.0006, 0.0041]\n", " True\n", " \n", " \n", " board_tot\n", " 353\n", " 20.75\n", - " [18.72, 23.04]\n", + " [19.22, 22.36]\n", " False\n", " 1.05\n", - " [1.02, 1.08]\n", - " False\n", - " 0.001\n", - " [-0.0, 0.003]\n", - " True\n", - " \n", - " \n", - " New Trier\n", - " mailed_tot\n", - " 88\n", - " 21.57\n", - " [17.01, 26.59]\n", - " False\n", - " 1.06\n", - " [1.02, 1.11]\n", - " False\n", - " 0.003\n", - " [-0.0, 0.007]\n", - " True\n", - " \n", - " \n", - " certified_tot\n", - " 88\n", - " 22.69\n", - " [17.8, 29.23]\n", - " False\n", - " 1.08\n", - " [1.03, 1.14]\n", + " [1.03, 1.07]\n", " False\n", - " 0.003\n", - " [-0.001, 0.007]\n", - " True\n", - " \n", - " \n", - " board_tot\n", - " 88\n", - " 21.66\n", - " [16.93, 26.57]\n", - " False\n", - " 1.08\n", - " [1.03, 1.13]\n", - " False\n", - " 0.003\n", - " [-0.001, 0.006]\n", + " 0.0014\n", + " [-0.0003, 0.003]\n", " True\n", " \n", " \n", " Northfield\n", " mailed_tot\n", - " 364\n", - " 18.32\n", - " [16.39, 20.19]\n", + " 334\n", + " 17.37\n", + " [16.04, 18.65]\n", " False\n", - " 1.03\n", + " 1.04\n", " [1.02, 1.05]\n", - " True\n", - " 0.002\n", - " [0.0, 0.003]\n", + " False\n", + " 0.0012\n", + " [-0.0002, 0.0025]\n", " True\n", " \n", " \n", " certified_tot\n", - " 364\n", - " 18.10\n", - " [16.45, 20.06]\n", + " 334\n", + " 17.22\n", + " [15.82, 18.47]\n", " False\n", " 1.03\n", - " [1.01, 1.05]\n", + " [1.02, 1.05]\n", " True\n", - " 0.001\n", - " [0.0, 0.003]\n", + " 0.0011\n", + " [-0.0003, 0.0025]\n", " True\n", " \n", " \n", " board_tot\n", - " 364\n", - " 17.04\n", - " [15.35, 18.97]\n", + " 334\n", + " 16.45\n", + " [15.22, 17.69]\n", " False\n", " 1.05\n", " [1.03, 1.06]\n", " False\n", - " 0.001\n", - " [-0.0, 0.002]\n", + " 0.0006\n", + " [-0.0008, 0.0019]\n", " True\n", " \n", " \n", " Palatine\n", " mailed_tot\n", - " 2133\n", - " 15.68\n", - " [14.82, 16.82]\n", + " 2156\n", + " 15.64\n", + " [14.94, 16.44]\n", " False\n", " 1.01\n", " [1.0, 1.02]\n", " True\n", - " 0.002\n", - " [0.001, 0.003]\n", + " 0.0022\n", + " [0.0015, 0.0029]\n", " True\n", " \n", " \n", " certified_tot\n", - " 2133\n", - " 15.62\n", - " [14.57, 17.07]\n", + " 2156\n", + " 15.56\n", + " [14.84, 16.38]\n", " False\n", " 1.01\n", " [1.0, 1.01]\n", " True\n", - " 0.002\n", - " [0.001, 0.003]\n", + " 0.0021\n", + " [0.0014, 0.0028]\n", " True\n", " \n", " \n", " board_tot\n", - " 2133\n", - " 15.40\n", - " [14.36, 16.47]\n", + " 2156\n", + " 15.35\n", + " [14.63, 16.17]\n", " False\n", " 1.01\n", " [1.0, 1.02]\n", " True\n", - " 0.002\n", - " [0.001, 0.003]\n", + " 0.0019\n", + " [0.0012, 0.0026]\n", " True\n", " \n", " \n", " Wheeling\n", " mailed_tot\n", - " 2196\n", - " 17.84\n", - " [16.72, 19.29]\n", + " 2272\n", + " 17.56\n", + " [16.72, 18.47]\n", " False\n", - " 1.03\n", - " [1.02, 1.04]\n", + " 1.02\n", + " [1.02, 1.03]\n", " True\n", - " 0.002\n", - " [0.001, 0.003]\n", + " 0.0018\n", + " [0.0009, 0.0026]\n", " True\n", " \n", " \n", " certified_tot\n", - " 2196\n", - " 17.78\n", - " [16.88, 19.0]\n", + " 2272\n", + " 17.51\n", + " [16.73, 18.44]\n", " False\n", " 1.03\n", - " [1.02, 1.04]\n", + " [1.02, 1.03]\n", " True\n", - " 0.002\n", - " [0.001, 0.002]\n", + " 0.0016\n", + " [0.0008, 0.0024]\n", " True\n", " \n", " \n", " board_tot\n", - " 2196\n", - " 17.55\n", - " [16.45, 19.03]\n", + " 2272\n", + " 17.26\n", + " [16.47, 18.2]\n", " False\n", " 1.03\n", - " [1.02, 1.04]\n", + " [1.02, 1.03]\n", " True\n", - " 0.002\n", - " [0.001, 0.002]\n", + " 0.0015\n", + " [0.0007, 0.0023]\n", " True\n", " \n", " \n", @@ -422,48 +383,39 @@ "text/plain": [ " n cod cod_ci cod_met prd \\\n", "township_name stage \n", - "Barrington mailed_tot 353 22.10 [20.12, 24.46] False 1.04 \n", - " certified_tot 353 22.03 [20.02, 23.81] False 1.05 \n", - " board_tot 353 20.75 [18.72, 23.04] False 1.05 \n", - "New Trier mailed_tot 88 21.57 [17.01, 26.59] False 1.06 \n", - " certified_tot 88 22.69 [17.8, 29.23] False 1.08 \n", - " board_tot 88 21.66 [16.93, 26.57] False 1.08 \n", - "Northfield mailed_tot 364 18.32 [16.39, 20.19] False 1.03 \n", - " certified_tot 364 18.10 [16.45, 20.06] False 1.03 \n", - " board_tot 364 17.04 [15.35, 18.97] False 1.05 \n", - "Palatine mailed_tot 2133 15.68 [14.82, 16.82] False 1.01 \n", - " certified_tot 2133 15.62 [14.57, 17.07] False 1.01 \n", - " board_tot 2133 15.40 [14.36, 16.47] False 1.01 \n", - "Wheeling mailed_tot 2196 17.84 [16.72, 19.29] False 1.03 \n", - " certified_tot 2196 17.78 [16.88, 19.0] False 1.03 \n", - " board_tot 2196 17.55 [16.45, 19.03] False 1.03 \n", + "Barrington mailed_tot 353 22.10 [20.35, 23.8] False 1.04 \n", + " certified_tot 353 22.03 [20.51, 23.81] False 1.05 \n", + " board_tot 353 20.75 [19.22, 22.36] False 1.05 \n", + "Northfield mailed_tot 334 17.37 [16.04, 18.65] False 1.04 \n", + " certified_tot 334 17.22 [15.82, 18.47] False 1.03 \n", + " board_tot 334 16.45 [15.22, 17.69] False 1.05 \n", + "Palatine mailed_tot 2156 15.64 [14.94, 16.44] False 1.01 \n", + " certified_tot 2156 15.56 [14.84, 16.38] False 1.01 \n", + " board_tot 2156 15.35 [14.63, 16.17] False 1.01 \n", + "Wheeling mailed_tot 2272 17.56 [16.72, 18.47] False 1.02 \n", + " certified_tot 2272 17.51 [16.73, 18.44] False 1.03 \n", + " board_tot 2272 17.26 [16.47, 18.2] False 1.03 \n", "\n", - " prd_ci prd_met prb prb_ci \\\n", - "township_name stage \n", - "Barrington mailed_tot [1.02, 1.07] False 0.003 [0.001, 0.004] \n", - " certified_tot [1.02, 1.08] False 0.002 [0.001, 0.004] \n", - " board_tot [1.02, 1.08] False 0.001 [-0.0, 0.003] \n", - "New Trier mailed_tot [1.02, 1.11] False 0.003 [-0.0, 0.007] \n", - " certified_tot [1.03, 1.14] False 0.003 [-0.001, 0.007] \n", - " board_tot [1.03, 1.13] False 0.003 [-0.001, 0.006] \n", - "Northfield mailed_tot [1.02, 1.05] True 0.002 [0.0, 0.003] \n", - " certified_tot [1.01, 1.05] True 0.001 [0.0, 0.003] \n", - " board_tot [1.03, 1.06] False 0.001 [-0.0, 0.002] \n", - "Palatine mailed_tot [1.0, 1.02] True 0.002 [0.001, 0.003] \n", - " certified_tot [1.0, 1.01] True 0.002 [0.001, 0.003] \n", - " board_tot [1.0, 1.02] True 0.002 [0.001, 0.003] \n", - "Wheeling mailed_tot [1.02, 1.04] True 0.002 [0.001, 0.003] \n", - " certified_tot [1.02, 1.04] True 0.002 [0.001, 0.002] \n", - " board_tot [1.02, 1.04] True 0.002 [0.001, 0.002] \n", + " prd_ci prd_met prb prb_ci \\\n", + "township_name stage \n", + "Barrington mailed_tot [1.03, 1.07] False 0.0027 [0.001, 0.0045] \n", + " certified_tot [1.03, 1.07] False 0.0023 [0.0006, 0.0041] \n", + " board_tot [1.03, 1.07] False 0.0014 [-0.0003, 0.003] \n", + "Northfield mailed_tot [1.02, 1.05] False 0.0012 [-0.0002, 0.0025] \n", + " certified_tot [1.02, 1.05] True 0.0011 [-0.0003, 0.0025] \n", + " board_tot [1.03, 1.06] False 0.0006 [-0.0008, 0.0019] \n", + "Palatine mailed_tot [1.0, 1.02] True 0.0022 [0.0015, 0.0029] \n", + " certified_tot [1.0, 1.01] True 0.0021 [0.0014, 0.0028] \n", + " board_tot [1.0, 1.02] True 0.0019 [0.0012, 0.0026] \n", + "Wheeling mailed_tot [1.02, 1.03] True 0.0018 [0.0009, 0.0026] \n", + " certified_tot [1.02, 1.03] True 0.0016 [0.0008, 0.0024] \n", + " board_tot [1.02, 1.03] True 0.0015 [0.0007, 0.0023] \n", "\n", " prb_met \n", "township_name stage \n", "Barrington mailed_tot True \n", " certified_tot True \n", " board_tot True \n", - "New Trier mailed_tot True \n", - " certified_tot True \n", - " board_tot True \n", "Northfield mailed_tot True \n", " certified_tot True \n", " board_tot True \n", @@ -475,7 +427,7 @@ " board_tot True " ] }, - "execution_count": 5, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -499,17 +451,17 @@ " lambda x: pd.Series(\n", " {\n", " \"n\": np.size(x[\"pin\"]),\n", - " \"cod\": np.round(ap.cod(ratio=x[\"ratio\"]), 2),\n", - " \"cod_ci\": np.round(ap.cod_ci(ratio=x[\"ratio\"]), 2),\n", + " \"cod\": np.round(ap.cod(x[\"assessed\"], x[\"sale_price\"]), 2),\n", + " \"cod_ci\": np.round(ap.cod_ci(x[\"assessed\"], x[\"sale_price\"]), 2),\n", " \"prd\": np.round(ap.prd(x[\"assessed\"], x[\"sale_price\"]), 2),\n", " \"prd_ci\": np.round(ap.prd_ci(x[\"assessed\"], x[\"sale_price\"]), 2),\n", - " \"prb\": ap.prb(x[\"assessed\"], x[\"sale_price\"], 3),\n", + " \"prb\": np.round(ap.prb(x[\"assessed\"], x[\"sale_price\"]), 4),\n", + " \"prb_ci\": np.round(ap.prb_ci(x[\"assessed\"], x[\"sale_price\"]), 4),\n", " }\n", " )\n", ")\n", "\n", - "town_stats[\"prb_ci\"] = town_stats.prb.str[\"95% ci\"]\n", - "town_stats[\"prb\"] = town_stats.prb.str[\"prb\"]\n", + "\n", "town_stats[\"cod_met\"] = town_stats.cod.apply(ap.cod_met)\n", "town_stats[\"prd_met\"] = town_stats.prd.apply(ap.prd_met)\n", "town_stats[\"prb_met\"] = town_stats.prb.apply(ap.prb_met)\n", @@ -543,7 +495,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -580,42 +532,42 @@ " \n", " 1\n", " 20%\n", - " $175,000\n", + " $174,000\n", " \n", " \n", " 2\n", " 30%\n", - " $217,850\n", + " $215,000\n", " \n", " \n", " 3\n", " 40%\n", - " $257,000\n", + " $255,000\n", " \n", " \n", " 4\n", " 50%\n", - " $295,000\n", + " $290,745\n", " \n", " \n", " 5\n", " 60%\n", - " $330,000\n", + " $328,000\n", " \n", " \n", " 6\n", " 70%\n", - " $378,150\n", + " $372,500\n", " \n", " \n", " 7\n", " 80%\n", - " $454,600\n", + " $445,000\n", " \n", " \n", " 8\n", " 90%\n", - " $595,000\n", + " $575,000\n", " \n", " \n", "\n", @@ -624,17 +576,17 @@ "text/plain": [ " Decile Sale Price\n", "0 10% $130,000\n", - "1 20% $175,000\n", - "2 30% $217,850\n", - "3 40% $257,000\n", - "4 50% $295,000\n", - "5 60% $330,000\n", - "6 70% $378,150\n", - "7 80% $454,600\n", - "8 90% $595,000" + "1 20% $174,000\n", + "2 30% $215,000\n", + "3 40% $255,000\n", + "4 50% $290,745\n", + "5 60% $328,000\n", + "6 70% $372,500\n", + "7 80% $445,000\n", + "8 90% $575,000" ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -663,12 +615,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -727,12 +679,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -743,10 +695,10 @@ { "data": { "text/plain": [ - "{'Blue Chased?': np.False_, 'Red Chased?': np.True_}" + "{'Blue Chased?': False, 'Red Chased?': True}" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -769,8 +721,8 @@ "plt.show()\n", "\n", "{\n", - " \"Blue Chased?\": ap.detect_chasing(normal_ratios),\n", - " \"Red Chased?\": ap.detect_chasing(chased_ratios),\n", + " \"Blue Chased?\": ap.is_sales_chased(normal_ratios),\n", + " \"Red Chased?\": ap.is_sales_chased(chased_ratios),\n", "}" ] }, @@ -778,7 +730,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Ratios that include selective appraisals will be clustered around the value of one much more than ratios produced from a CAMA system. We can see this visually in the graph where the cumulative distribution curve shows a discontinuous jump, or 'flat spot', near one." + "Ratios that include selective appraisals will be clustered around the value of one much more than ratios produced from a CAMA system. We can see this visually in the graph where the cumulative distribution curve shows a discontinuous jump, or 'flat spot', near 1." ] }, { @@ -796,12 +748,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -814,8 +766,8 @@ "# Combine sale price and assessed value, calculate cumulative sums\n", "gini_data = combined[[\"sale_price\", \"assessed\"]].sort_values(by=\"sale_price\")\n", "\n", - "sale_price = gini_data[\"sale_price\"]\n", "assessed = gini_data[\"assessed\"]\n", + "sale_price = gini_data[\"sale_price\"]\n", "\n", "lorenz_data_price = pd.DataFrame(\n", " {\n", @@ -860,14 +812,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In this graphic, the green line (Line of Equality) represents a hypothetical environment, where property valuations are completely equitable. The axes represent the cumulative percentage of value (y-axis) as the percentage of properties (x-axis) increases.\n", + "In this graphic, the green line (Line of Equality) represents a hypothetical environment where property valuations are completely equitable (all properties have the same value). The axes represent the cumulative percentage of value (y-axis) as the percentage of properties (x-axis) increases.\n", "\n", - "The curves show that for the vast majority of the income distribution, assessed values are closer to the Line of Equality. This can be interpreted two ways:\n", + "The curves show that for the vast majority of the price distribution, assessed values are closer to the Line of Equality. This can be interpreted two ways:\n", "\n", "1. When the assessed value curve is above the sale price curve, the gap between the the two lines at any individual point, represents the cumulative over-assessment for all houses at that value or below.\n", "2. Gini coefficient for sale price is going to be higher than the Gini coefficient for assessed price (larger area between the the curve and the Line of Equality).\n", "\n", - "In this situation, the graph shows slightly regressive property valuations. This is not immediately intuitive, but to conceptualize this, think of an exaggerated \"progressive\" policy, where all houses were valued at $0 with one house responsible for all the assessed value. In this distribution, curve would be at 0 until the final house, where it would jump to 100% of the cumulative value (a Gini of 1). Thus, a higher Gini represents more progressive assessments, where tax assessments become larger as property value increases.\n", + "In this situation, the graph shows slightly regressive property valuations. This is not immediately intuitive, but to conceptualize this, think of an exaggerated \"progressive\" policy, where all houses were valued at $0 with one house responsible for all the assessed value. In this distribution, curve would be at 0 until the final house, where it would jump to 100\\% of the cumulative value (a Gini of 1). Thus, a higher Gini represents more progressive assessments, where tax assessments become larger as property value increases.\n", "\n", "#### KI and MKI\n", "\n", @@ -879,16 +831,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'MKI': 0.9350431709690498, 'KI': -0.02132444068329764}" + "{'MKI': 0.9424756027397724, 'KI': -0.018245001455845455}" ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -914,10 +866,30 @@ "G_sale = sum(s * (i + 1) for i, s in enumerate(sale_price))\n", "G_sale = 2 * G_sale / sum(sale_price) - (n + 1)\n", "GINI_sale = G_sale / n\n", - "\n", "{\"MKI\": GINI_assessed / GINI_sale, \"KI\": GINI_assessed - GINI_sale}" ] }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'MKI': 0.9424756027397724, 'KI': -0.018245001455845455}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Or directly from the package\n", + "{\"MKI\": ap.mki(assessed, sale_price), \"KI\": ap.ki(assessed, sale_price)}" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -935,7 +907,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "python3", "language": "python", "name": "python3" }, @@ -949,7 +921,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.10" }, "vscode": { "interpreter": { diff --git a/docs/source/outliers.rst b/docs/source/outliers.rst index 4394d51..f94b3ee 100644 --- a/docs/source/outliers.rst +++ b/docs/source/outliers.rst @@ -1,5 +1,5 @@ -=========================================================== -Detect outlier values in a vector using IQR/quantile method -=========================================================== +=============================================== +Detect outlier values using IQR/quantile method +=============================================== .. autofunction:: assesspy.is_outlier diff --git a/docs/source/prb.rst b/docs/source/prb.rst index f628ec9..85d739e 100644 --- a/docs/source/prb.rst +++ b/docs/source/prb.rst @@ -3,3 +3,5 @@ Price-Related Bias (PRB) ======================== .. autofunction:: assesspy.prb +.. autofunction:: assesspy.prb_ci +.. autofunction:: assesspy.prb_met diff --git a/docs/source/prd.rst b/docs/source/prd.rst index 0147be5..64d0287 100644 --- a/docs/source/prd.rst +++ b/docs/source/prd.rst @@ -3,3 +3,5 @@ Price-Related Differential (PRD) ================================ .. autofunction:: assesspy.prd +.. autofunction:: assesspy.prd_ci +.. autofunction:: assesspy.prd_met diff --git a/docs/source/quintos_sample.rst b/docs/source/quintos_sample.rst new file mode 100644 index 0000000..3c28030 --- /dev/null +++ b/docs/source/quintos_sample.rst @@ -0,0 +1,5 @@ +================================ +Sample data from Quintos studies +================================ + +.. autofunction:: assesspy.quintos_sample diff --git a/docs/source/ratios_sample.rst b/docs/source/ratios_sample.rst deleted file mode 100644 index 41c2e15..0000000 --- a/docs/source/ratios_sample.rst +++ /dev/null @@ -1,5 +0,0 @@ -======================================================= -Sample of ratio and sales data pulled from CCAO records -======================================================= - -.. autofunction:: assesspy.ratios_sample diff --git a/docs/source/reference.rst b/docs/source/reference.rst index 205a5e0..2c69f82 100644 --- a/docs/source/reference.rst +++ b/docs/source/reference.rst @@ -16,6 +16,7 @@ properties with a similar fair market value should be similarly assessed. :doc:`cod_ci() ` |nbsp| :doc:`cod_met() ` + Price-Related Differential (PRD) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -28,6 +29,7 @@ similarly assessed. :doc:`prd_ci() ` |nbsp| :doc:`prd_met() ` + Price-Related Bias (PRB) ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -42,51 +44,48 @@ increase by 100 percent. Kakwani Index (KI) -^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^ The Kakwani Index (KI) is a Gini-based measure to test for vertical equity. -The output is the cumulative distribution of assessed values minus -the distribution of sale prices. +The output is the ordered cumulative distribution of assessed values minus +the ordered distribution of sale prices. :doc:`ki() ` Modified Kakwani Index (MKI) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The Modified Kakwani Index (MKI) is a Gini-based measure to test for vertical -equity. The output is the cumulative distribution of assessed values divided by -the distribution of sale prices. +equity. The output is the ordered cumulative distribution of assessed values +divided by the ordered distribution of sale prices. :doc:`mki() ` |nbsp| -:doc:`mki_met() ` |nbsp| +:doc:`mki_met() ` Other functions ^^^^^^^^^^^^^^^ -| Calculate bootstrapped confidence intervals +| Calculate confidence intervals :doc:`boot_ci() ` -| Detect sales chasing in a vector of sales ratios +| Detect sales chasing in sale ratios + +:doc:`is_sales_chased() ` -:doc:`detect_chasing() ` |nbsp| -:doc:`detect_chasing_cdf() ` |nbsp| -:doc:`detect_chasing_dist() ` +| Detect outlier values -| Calculate bootstrapped confidence intervals +:doc:`is_outlier() ` -:doc:`is_outlier() ` |nbsp| -:doc:`quantile_outlier() ` |nbsp| -:doc:`iqr_outlier() ` Data ---- | Sample data used for testing and demonstrations -:doc:`ratios_sample() ` -:doc:`mki_ki() ` +:doc:`ccao_sample() ` +:doc:`quintos_sample() ` .. |nbsp| unicode:: 0xA0 diff --git a/docs/source/sales_chasing.rst b/docs/source/sales_chasing.rst index b82d389..0da5644 100644 --- a/docs/source/sales_chasing.rst +++ b/docs/source/sales_chasing.rst @@ -2,4 +2,4 @@ Detect sales chasing in a vector of sales ratios ================================================ -.. autofunction:: assesspy.detect_chasing +.. autofunction:: assesspy.is_sales_chased diff --git a/pyproject.toml b/pyproject.toml index 4cbd2dc..472f570 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "assesspy" -version = "1.2.0" +version = "2.0.0" description = "Python package for measuring assessment performance" keywords = ["assessment", "property taxes", "local government"] authors = [ @@ -15,7 +15,8 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12" + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13" ] license = {file = "LICENSE"} readme = "README.md" @@ -24,7 +25,6 @@ dependencies = [ "numpy >= 1.23.0", "pandas >= 1.4.0", "pyarrow >= 9.0.0", - "scipy >= 1.8.1", "statsmodels >= 0.13.0" ] @@ -33,7 +33,7 @@ Repository = "https://github.com/ccao-data/assesspy" Documentation = "https://ccao-data.github.io/assesspy/" [project.optional-dependencies] -test = [ +dev = [ "setuptools >= 61.0", "pytest >= 7.3.0", "pytest-cov >= 4.1.0", @@ -71,7 +71,7 @@ testpaths = "assesspy/tests" legacy_tox_ini = """ [tox] min_version = 4.0 -envlist = py39, py310, py311, py312 +envlist = py39, py310, py311, py312, py313 [testenv] deps = @@ -94,7 +94,7 @@ reportUnusedImport = false github_username = "ccao-data" github_repository = "assesspy" project = "assesspy" -copyright = "2022, Cook County Assessor's Office" +copyright = "2024, Cook County Assessor's Office" language = "en" package_root = "assesspy" extensions = [