corona.ptexw

% !TEX program = xelatex
% !TEX pweaveOutputFormat = tex
% cSpell: disable

\documentclass{report}
\usepackage{arxiv}
\renewcommand{\arraystretch}{1.25}

\usepackage{multirow}
\usepackage{amssymb,mathtools}
\usepackage{booktabs}
\usepackage{verbatim}

\usepackage{hyperref}
\hypersetup
{ pdfauthor = {Gyan Sinha},
  pdftitle={Loan Payment Deferments Due to Labor Market Shocks: A Case Study},
  colorlinks=TRUE,
  linkcolor=black,
  citecolor=blue,
  urlcolor=blue
}
%
\RequirePackage{fontspec}
\setmainfont{Source Sans Pro}
\usepackage{graphicx}
\graphicspath{/home/gsinha/admin/docs/logos}

\setcounter{tocdepth}{3}
\setcounter{secnumdepth}{3}

<<imports, echo=False>>=
import warnings
warnings.filterwarnings("ignore")
import sys
import datetime

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
sns.set()

plt.rcParams.update({
    "font.family": "Source Sans Pro",
    "font.serif": ["Source Sans Pro"], 
    "font.sans-serif": ["Source Sans Pro"],
    "font.size": 10,
})

import pathlib
import joblib
import collections

import numpy as np
import pandas as pd
import geopandas as gpd
import pytoml

from fredapi import Fred

import feather

import pymc3 as pm
import arviz as az

import lifelines
from lifelines import KaplanMeierFitter, NelsonAalenFitter
from scipy.special import expit

from analytics import utils

omap = {"LC": "I", "PR": "II", "ALL": None}
base_dir = "/home/gsinha/admin/db/dev/Python/projects/models/"
results_dir = {
  "LC": base_dir + "defers/pymc3/" + "originator_" + omap["LC"] + "/results",
  "PR": base_dir + "defers/pymc3/" + "originator_" + omap["PR"] + "/results",
  "ALL": base_dir + "defers/pymc3/" + "results/"
}

import_dir = base_dir + "defers/"
sys.path.append(import_dir)
from common import *

data_dir = base_dir + "data/"

idx = pd.IndexSlice

ASOF_DATE = datetime.date(2020, 7, 12)
dep_var = "defer"

fname = data_dir + "claims.pkl"
with open(fname, "rb") as f:
    claims_dict = joblib.load(f)
@


\title{Loan Payment Deferments Due to Labor Market Shocks: A Case Study}
\author{Gyan Sinha, Godolphin Capital Management, LLC%
\thanks{\scriptsize \emph{%Godolphin Capital Management, LLC,%
\href{mailto:gsinha@godolphincapital.com}{Email Gyan}. This report
has been prepared by Godolphin Capital Management, LLC
(``Godolphin'') and is provided for informational purposes only and
does not constitute an offer to sell or a solicitation to purchase
any security. The contents of this research report are not intended
to provide investment advice and under no circumstances does this
research report represent a recommendation to buy or sell a security.
The information contained herein reflects the opinions of Godolphin.
Such opinions are based on information received by Godolphin from
independent sources. While Godolphin believes that the information
provided to it by its sources is accurate, Godolphin has not independently
verified such information and does not vouch for its accuracy. Neither
the author, nor Godolphin has undertaken any responsibility to update
any portion of this research report in response to events which may
transpire subsequent to its original publication date. As such, there
can be no guarantee that the information contained herein continues
to be accurate or timely or that Godolphin continues to hold the views
contained herein. Godolphin is an investment adviser. Investments
made by Godolphin are made in the context of various factors including
investment parameters and restrictions. As such, there may not be
a direct correlation between the views expressed in this article and
Godolphin's trading on behalf of its clients. 
<%print(f'Version:{datetime.datetime.now()}') %>}}
}

\date{\today}
%{\includegraphics[width=.25\textwidth]{GodolphinLogo.jpg}}

\begin{document}
\maketitle

\begin{abstract}

This report analyzes loan payment deferment as a result of
COVID-19 related shutdowns in the US. We focus on a
portfolio of unsecured consumer loans originated by 2 different
institutions. Our analysis focuses on a few key questions:
\begin{itemize}
\item what is the magnitude of COVID-related deferment?
\item are there systematic relationships between loan attributes and
  payment deferment?
\item how are labor market trends related to the probability
  of loan deferment? 
\item does the sensitivity to labor market shocks vary by region?

\end{itemize}

The model and results presented provide a general framework that 
can be applied not only to unsecured consumer loans but also more broadly
to other lending sectors. While the data are still 
preliminary and the events they capture relatively recent, our conclusions are based on 
a rigorous and transparent statistical analysis and presented with confidence 
bounds that respect the intrinsic uncertainty of the data-generating process.
The chief contribution of this paper, in terms of techniques, is the
use of a ``mixed-model'' with random effects within a bayesian estimation 
framework which has enabled us to answer some of the questions we posed 
earlier and which would not have been possible using more traditional approaches. 

This study should be useful to investors and policy-makers alike, allowing
for data-driven estimates of potential deferment (and distress) rates on
loan portfolios.

\end{abstract}

<<out_dicts, echo=False>>=
out_dict = {}
for i in ["pooled", "hier"]:
    out_dict[i] = read_results(i, None, ASOF_DATE, results_dir["ALL"])
@

<<datasets, echo=False>>=
hard_df = out_dict["hier"]["hard_df"]
ic_date = (
  out_dict["hier"]["pipe"]["p_s_1"].named_steps.add_state_macro_vars.ic_long_df["edate"].max().date()
)
data_scaler = (
    out_dict["hier"]["pipe"]["p_s_2"].named_steps["standardize"].numeric_transformer.named_steps["scaler"]
)
numeric_features = [
    "fico", "original_balance", "dti", "stated_monthly_income", "age", "pct_ic"
]
data_scaler_dict = {
    "mu" : dict(zip(numeric_features, data_scaler.mean_)),
    "sd":  dict(zip(numeric_features, data_scaler.scale_))
}
@

\section{Introduction}

Our reasons for undertaking this research project were driven by
practical considerations --- like many other investors in consumer and
mortgage lending, we happen to be long these loans. As such, it is
critical for us to evaluate future losses and prospective returns on
these loans and make assessments about their ``fundamental'' value.
We do this with the explicit recognition of the unprecedented nature
of the COVID shock and the fact that in many ways, we are sailing
through uncharted waters.

While our motivations were pragmatic, a natural question that 
arises in this context is the applicability of the analysis
to a broader population if loans. While there is a natural
tendency to always seek out more and greater amounts of data, in
practice, investors in most cases hold narrow subsets of the overall population of
loans. While larger datasets may give us more precise estimates (up to
a point), the fact is that we want to make statements about OUR
portfolio, not a fictional universe which is not owned by anyone in
particular. The challenge then is to employ statistical methods that
allow us to extract information from ``small'' not ``big'' data and
turn these into useful insights for decision-making but which may 
nevertheless provide guidance about the broader population as well. 
This is where the bayesian methods we deploy in this report come in useful since they
explicitly deal with inferential uncertainty in an intrinsic way and
can be used to provide insights in other contexts as well.

There are two parts to our project. First, we 
describe the data set in some detail and present
stratifications by different loan attributes. We also
present the deferment rates within each strata in order to get
intuition around the impact of loan attributes. We then
provide statistics around the labor markets in various states. We look
at the impact of the annual percentage change in initial claims, 
starting March 14th (which we peg as the start of the COVID crisis for our 
purposes) and through the week ending <%print(f'{ic_date.strftime("%B %-d, %Y")}')%>. 
An open question that the modeling seeks to answer is the impact of the
claims variables on deferment rates and whether these can be leveraged into a 
prediction framework going forward. A discussion of the statistical model 
that relates the observed outcome (did the loan defer: Yes/No?) to the 
various loan attributes is provided in the appendix. The framework employed is based on 
Survival Analysis, using a hierarchical bayes approach as
in~\cite{8328358dab6746d884ee538c687aa0dd}
and~\cite{doi:10.1198/004017005000000661}. 

In the second part of our work, we develop a methodology for
forecasting the path of initial claims at the national and state
levels over the next few months. This analysis is unique in its own
way and leverages a brief descriptive note put out by Federal Reserve
Bank of NY researchers in a blog article. We use the claims forecast
as inputs into the predictions for deferment rates at the end of
second quarter of 2020, which is our forecast horizon. The model 
and the estimation results and forecasts are provided in an 
accompanying piece.

Before we dive into the details, there are three key technical aspects in
this report that are worth highlighting.  \textbf{First, the use of Survival or
Hazard models} to estimate the marginal deferment probability, as a
function of weeks elapsed since the crisis, is \textbf{key} to sensible
projections of deferment \footnote{This is a benefit over and above
the intrinsic gain from using this framework in the context of 
``censored'' data where most of the observations have not yet 
experienced deferment}. As we show, these marginal
hazards have a very strong ``duration'' component which impacts
longer-term forecasts of the cumulative amount of deferment we expect
in the future. 

Second, we extend the survival model framework by incorporating \textbf{parameter 
hierarchies (within a bayesian framework)} that explicitly account for random 
variation in the impact of variables across loan clusters. This allows for the 
possibility of ``unobserved heterogeneity'' in the data by
explicitly modeling a cluster-specific random variable that interacts
with and modifies the hazards for loan clusters. This 
is an important enhancement since (i) there may be
differences in the composition of the workforce across groups that
impact the way in which a given volume of claims affect deferment
rates, and (ii) the borrower base itself may differ across groups in both
observable and unobservable ways. We control for the observed
attributes explicitly but the hierarchical framework allows us to
model unobserved factors as well. 

Third, we develop a \textbf{statistical framework 
to model ``decay'' rates for weekly claims} and the role that labor markets 
play in determining deferment rates, building upon ideas first discussed 
by researchers at the NY Fed. The projections from this framework serve as 
inputs to our longer-term deferment forecasts and allows us to model the 
impact of different economic scenarios in the future, an important tool to have
in the arsenal given the considerable uncertainties that still remain
regarding the future path of the economy.

\section{Data}
In Table~\ref{tbl:portfolio_summary}, we provided an overview of our 
data sample. In all, we have <%print(f'{hard_df.shape[0]}')%> loans
in our data, in roughly a 50/50 split (by count) across the 2 institutions.

\begin{table}[ht]
\centering
\caption{Portfolio Summary}
\label{tbl:portfolio_summary}
\scalebox{0.75}{
<<portfolio_summary, echo=False, results="tex">>=

hard_df["defer_dollar"] = hard_df[dep_var] * hard_df["current_balance"]

def wavg(x):
    return np.nansum(
        x * hard_df.loc[x.index, "current_balance"], axis=0
    )/np.nansum(hard_df.loc[x.index, "current_balance"])

aaa = hard_df.groupby(["originator", "grade"]).agg(
    n=('loan_id', "count"),
    original_balance=('original_balance', sum),
    current_balance=('current_balance', sum),
    wac=('original_rate', wavg),
    age=('age', wavg),
    fico=('fico', wavg),
    term=('original_term', wavg),
    dti=('dti', wavg),
    income=('stated_monthly_income', wavg),
    outcome=(dep_var, wavg),
).rename(columns={"outcome": dep_var})

bbb = hard_df.groupby(["originator"]).agg(
    n=('loan_id', "count"),
    original_balance=('original_balance', sum),
    current_balance=('current_balance', sum),
    wac=('original_rate', wavg),
    age=('age', wavg),
    fico=('fico', wavg),
    term=('original_term', wavg),
    dti=('dti', wavg),
    income=('stated_monthly_income', wavg),
    outcome=(dep_var, wavg),
).rename(columns={"outcome": dep_var})

bbb.index = pd.MultiIndex.from_tuples(
    [(omap["LC"], 'ALL'), (omap["PR"], 'ALL')], names=['originator', 'grade']
)

aaa = pd.concat([aaa, bbb])

ccc = pd.concat(
    [
        pd.Series(hard_df["loan_id"].apply("count"), name="n"),
        pd.Series(hard_df["original_balance"].sum(), name="original_balance"),
        pd.Series(hard_df["current_balance"].sum(), name="current_balance"),
        hard_df[
          [
            "original_rate", "age", "fico", "original_term", "dti", 
            "stated_monthly_income"
          ]
        ].apply(wavg).to_frame().T.rename(
            columns={
              "original_term": "term", "original_rate": "wac", "dti": "dti",
              "stated_monthly_income": "income"
            }
        ),
        pd.Series(wavg(hard_df[dep_var]), name=dep_var)
    ], axis=1
)
ccc.index = [('ALL', 'ALL')]

ddd = pd.concat([aaa, ccc])
ddd["pct"] = ddd["current_balance"]/ddd.loc[pd.IndexSlice["ALL", "ALL"],  "current_balance"]
ddd.index.names = ["Originator", "Grade"]

cfmt = "".join(["r"] * (ddd.shape[1] + 2))
header = [
  "N", "Orig. Bal.", "Cur. Bal.", "WAC", "WALA", "FICO", 
  "WAOT", "DTI", "Income", "Defer", "Share",
]

tbl_fmt = {
  "original_balance": utils.dollar,
  "current_balance": utils.dollar,
  "n": utils.integer, "fico": utils.number,
  "term": utils.number, "age": utils.number,
  "pct": utils.percent, dep_var: utils.percent,
  "wac": utils.percent, "dti": utils.number,
  "income": utils.dollar
}

print(
    ddd.to_latex(
      index=True, multirow=True, 
      header=header,
      formatters=tbl_fmt,
      column_format=cfmt,
      multicolumn_format="r",
    ))

one_line = ddd.loc[pd.IndexSlice["ALL", :], :]
@
}
\end{table}

The aggregate original amount issued is \$%
<%print(f'{float(one_line.original_balance):,.2f}')%>,
with a weighted-average interest rate of
<%print(f'{utils.number(100*float(one_line.wac))}')%>\%,
a weighted-average FICO score of %
<%print(f'{utils.number(float(one_line.fico))}')%> and
is <%print(f'{utils.number(float(one_line.age))}')%>
months seasoned. The weighted-average original-term %
is <%print(f'{utils.number(float(one_line.term))}')%> months.
\textbf{Overall, the deferment rate on this portfolio is %
<%print(f'{100*float(one_line.defer):.2f}')%>\%.}

The portfolio statistics presented here are as of
<%print(f'{ASOF_DATE.strftime("%B %-d, %Y")}')%> which is more than
one month into the onset of the significant ''shelter-at-home'' orders
across the country and resulting economic disruptions. Since 
most of these payment deferrals are for anywhere from 1 to 
3 months, the deferment percentages can be viewed as the
cumulative share of loans deferred or delinquent since the start of the
COVID crisis. By way of comparison, we provide recent deferment figures for other 
related sectors such as mortgages. Approximately 8.46\% of all mortgage loans
were in forebearance as of May 24th, 2020 which is a roughly
<%print(f'{(pd.to_datetime(ASOF_DATE) - pd.to_datetime("2020-05-24")).days}') %>
days earlier than the cutoff date for our data set. In the Ginnie Mae
sector, 11.82\% of loans were in forebearance while the comparable
figure for conventional mortgages was 6.39\%.

In figure~\ref{fig:due_day_dist}, we present the frequency 
distribution of the payment dates on the loans in our sample.
Since borrowers may have a tendency to hold off
on requesting a deferral until they are close to or past their
due day, and given the relatively short data window, this may
lead to biases. A relatively uniform distribution of payment
due dates would serve to assuage this concern. Thankfully,
this is exactly what we find in the data presented here,
eliminating this aspect of the data as a potential source
of concern.

\begin{figure}
\caption{Distribution of due dates}
\label{fig:due_day_dist}
\scalebox{1}{
<<due_day_dist, echo=False>>=
pos = []
for i in [omap["LC"], omap["PR"]]:
    pos.append(get_due_day(i, ASOF_DATE))
pos_df = pd.concat(pos, ignore_index=True)
pos_df = pos_df[pos_df["loan_id"].isin(hard_df["loan_id"].to_list())]

fig, ax = plt.subplots(2, 1, figsize=(10, 5), sharey=True)
for i, v in enumerate([omap["LC"], omap["PR"]]):
    df = pos_df[pos_df["originator"] == v]
    ax[i].hist(df.pmt_day)
    ax[i].set_xlabel("Due day")
    ax[i].set_ylabel("Frequency")
    ax[i].set_title(f"Originator: {v}")
    
plt.tight_layout()
@
}
\end{figure}

In Table~\ref{tbl:port_summary_purpose}, we provide a stratification of 
the portfolio by loan purpose. More than two-thirds of the loans are used for
consolidating existing debt, mostly drawn on credit cards. The second 
largest category is for purchases, while less than 10\% is used for 
expenses such as for education, wedding etc. (``LifeCyle'').

\begin{table}[ht]
\centering
\caption{Portfolio summary, by purpose}
\label{tbl:port_summary_purpose}
\scalebox{0.7}{
<<port_summary_purpose, echo=False, results="tex">>=
purpose_tbl = summary_by_group(
    ["originator", "purpose"], dep_var, hard_df
)
purpose_tbl.index.names = ["Originator", "Purpose"]
cfmt = "".join(["r"] * (purpose_tbl.shape[1] + 2))

print(
    purpose_tbl.to_latex(
      index=True, multirow=True, 
      header=header,
      formatters=tbl_fmt,
      column_format=cfmt,
      multicolumn_format="r",
    ))
@
}
\end{table}

In Table~\ref{tbl:port_summary_emp_status}, a stratification across 
the borrower's employment status is provided. The ``Self-employed''
and ``Other'' categories generally comprise anywhere from 10\% to 15\%
of the portfolio\footnote{In the case of Originator I, the employment
category is really a dummy variable for the presence or absence
of employment history --- if there is information on this count,
this field is coded as ``Employed'' otherwise it is coded as 
``Other''}.

\begin{table}[ht]
\centering
\caption{Portfolio summary, by employment status}
\label{tbl:port_summary_emp_status}
\scalebox{0.70}{
<<port_summary_emp_status, echo=False, results="tex">>=
emp_tbl = summary_by_group(
    ["originator", "employment_status"], dep_var, hard_df
)
emp_tbl = emp_tbl.fillna(0)
emp_tbl.index.names = ["Originator", "Employment"]
cfmt = "".join(["r"] * (emp_tbl.shape[1] + 2))
print(
    emp_tbl.to_latex(
      index=True, multirow=True, 
      header=header,
      
      formatters=tbl_fmt,
      column_format=cfmt,
      multicolumn_format="r",
    ))
@
}
\end{table}

In Table~\ref{tbl:port_summary_homeowner}, the portfolio is stratified 
across housing tenure. Across the 2 institutions, roughly a quarter to
two-thirds of the borrowing is by renters.

\begin{table}[ht]
\centering
\caption{Portfolio summary, by homeownership}
\label{tbl:port_summary_homeowner}
\scalebox{0.75}{
<<port_summary_homeowner, echo=False, results="tex">>=
homeowner_tbl = summary_by_group(
    ["originator", "home_ownership"], dep_var, hard_df
)
homeowner_tbl.index.names = ["Originator", "Housing"]
cfmt = "".join(["r"] * (homeowner_tbl.shape[1] + 2))
print(
    homeowner_tbl.to_latex(
      index=True, multirow=True, 
      header=header,
      
      formatters=tbl_fmt,
      column_format=cfmt,
      multicolumn_format="r",
    ))
@
}
\end{table}

Finally, in Table~\ref{tbl:port_summary_term}, we stratify by
loan term. Across the 2 institutions, roughly 50\% - 70\% of
the loans are for 3-year amortization terms, with the remainder
for a 5-year term.

\begin{table}[ht]
\centering
\caption{Portfolio summary, by term}
\label{tbl:port_summary_term}
\scalebox{0.75}{
<<port_summary_term, echo=False, results="tex">>=
term_tbl = summary_by_group(
    ["originator", "original_term"], dep_var, hard_df
)
term_tbl.index.names = ["Originator", "Term"]
cfmt = "".join(["r"] * (term_tbl.shape[1] + 2))
print(
    term_tbl.to_latex(
      index=True, multirow=True, 
      header=header,
      
      formatters=tbl_fmt,
      column_format=cfmt,
      multicolumn_format="r",
    ))
@
}
\end{table}

An important question, with possible implications about the
prospective cure rates for the group of defermented loans, is 
what they look like versus the subset of loans that were already
delinquent before the crisis. This is presented in 
Table~\ref{tbl:pre_covid_dq_profile}.

\begin{table}[ht]
\centering
\caption{DQ \emph{vs} Deferment profile}
\label{tbl:pre_covid_dq_profile}
\scalebox{0.70}{
<<pre_covid_dq_profile, echo=False, results="tex">>=
dq_tbl = summary_by_group(
    ["originator", "loanstatus"], dep_var, hard_df
)
dq_tbl.index.names = ["Originator", "DQ Status"]
cfmt = "".join(["r"] * (dq_tbl.shape[1] + 2))
print(
    dq_tbl.iloc[:-1].to_latex(
      index=True, multirow=True, 
      header=header,
      
      formatters=tbl_fmt,
      column_format=cfmt,
      multicolumn_format="r",
    ))
@
}
\end{table}

The deferment subset (labeled ``Covid'') has better credit quality (as measured)
by their FICO scores than both ``Current'' and delinquent sub-population 
for Originator I. This may imply that the cure rate from deferments may be
better on the deferred sub-population than has been the experience on the 
delinquent sub-population. In the case of Originator II, the deferment and
delinquent sets have roughly the same FICO score which is lower
than that on the set of loans that are ``Current''.

\section{Employment}
The economic disruption caused by COVID is in many ways unusual 
in that it strikes at the Consumption component of overall GDP. 
As such, the disruption is much broader than would be the case,
say, for an investment led recession, caused by a contraction in
an isolated segment of the economy. 

In some ways, this resembles the 2008 recession which was caused by a 
massive asset writedowns in the banking sector (on a global basis) 
leading to an economy-wide credit crunch. To the extent that it strikes 
at almost two-thirds of overall economic output, the disruption is naturally 
even larger, as has become obvious in the labor market figures released over 
the last month. Labor markets are likely to be the key to explaining deferment, and 
both the full magnitude of job losses and how quickly they are reversed
is going to be the driver of ultimate loan performance.

The trend in annualized percentage change in weekly initial claims 
and their distribution is presented in Figure~\ref{fig:claims_pct_trend}.
The underlying data are the individual state/week observations on claims,
merged with the appropriate loan histories starting March 14th.
When we first did this figure, we thought we had made a mistake but the
percentage changes depicted here are correct - on an year-over-year basis,
initial claims did really increase by aproximately 8000\% at their peak
in early April! The annual percentage changes in claims are standardized
by subtracting the mean and dividing by the standard deviation.

\begin{figure}[ht]
\caption{Weekly Claims (Year-over-Year pct. change): trend and distribution}
\label{fig:claims_pct_trend}
\scalebox{1}{
<<claims_pct_trend, echo=False>>=

s_3_df = out_dict["hier"]["s_3_df"]

fig, ax = plt.subplots(2, 1, figsize=(8, 6.4))

sns.boxplot(s_3_df.sdate.dt.date, s_3_df.pct_ic, ax=ax[0])
sns.distplot(s_3_df.pct_ic, ax=ax[1], kde=False)

ax[0].set_xlabel("Week ending: "); 
ax[0].set_ylabel("Year-over-Year pct. change")
ax[1].set_xlabel("Year-over-Year pct. change")
ax[0].yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
ax[1].xaxis.set_major_formatter(mtick.PercentFormatter(1.0))

ax[0].set_xticklabels(ax[0].get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
@
} 
\end{figure}

\subsection{Claims}
In Figures~\ref{fig:lc_defer_hazard_by_state} and
~\ref{fig:pr_defer_hazard_by_state}, we depict the 
relationship between payment deferment and the claims measure.
The solid trend line is a robust fit for the scatter plot 
depicted here. The patterns appear to show a relatively small 
relationship (and possibly opposite effects across the 2 originators)
--- in both cases, there is considerable variation across states.

<<defer_hazard_by_state, echo=False>>=

def plot_defer_hazard_by_state(originator):
  ''' plots deferment hazards by state and week '''
  zzz = out_dict["hier"]["s_3_df"]
  zzz = zzz[zzz["originator"] == originator].copy()

  a_df = zzz.groupby(["state"]).agg(
    n=("loan_id", "count"), k=(dep_var, np.sum), outcome=(dep_var, np.mean),
    pct_ic=("pct_ic", np.mean)
  ).reset_index().rename(columns={"outcome": dep_var})

  g = sns.FacetGrid(
    data=a_df.reset_index(),
  )
  g.map(sns.regplot, "pct_ic", dep_var, ci=True)
  g.ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.0))

  # add annotations one by one with a loop
  for line in range(0, a_df.shape[0]):
    g.ax.text(
      a_df["pct_ic"][line]+0.001, a_df[dep_var][line], a_df["state"][line], 
      horizontalalignment='left', size='medium', color='red', 
      weight='semibold', alpha=0.20
    )

  g.ax.figure.set_size_inches(10, 5)
  g.ax.set_xlabel("Year-over-Year pct. change")
  g.ax.set_ylabel("Deferment hazard")

  return g
@

\begin{figure}[htb!]
\caption{Originator I: deferment hazard}
\label{fig:lc_defer_hazard_by_state}
\scalebox{1}{
<<lc_defer_hazard_by_state, echo=False>>=
g = plot_defer_hazard_by_state(omap["LC"])
sns.despine(left=True)
@
}
\end{figure}

\begin{figure}[htb!]
\caption{Originator II: deferment hazard}
\label{fig:pr_defer_hazard_by_state}
\scalebox{1}{
<<pr_defer_hazard_by_state, echo=False>>=
g = plot_defer_hazard_by_state(omap["PR"])
sns.despine(left=True)
@
}
\end{figure}

The modeling exercise will seek to examine how much of the difference in 
slopes and the variability across states can be explained by individual
loan attributes and unobservable effects modeled as ``random effects''.


\section{Model}
The modeling framework used in this report draws upon statistical
tools used in the analysis of events with a ``time-until'' component
to them. In our case, the time-until, or ``lifetime''  we are 
interested in predicting is the time until a borrower asks the
servicer for a deferment or goes delinquent. Time in this context is measured from
an assumed epoch start date of March 14th, 2020 which we assume
as the start of the COVID-19 crisis for our purposes and is the
same across all loans. 

We incorporate state-based differences in the distribution
of hazard rates that manifest themselves in both the pattern of duration dependence 
as well as the impact that changes in state-level initial claims have on hazards. 
As is detailed in the appendix, duration dependence is captured using a set of
interval-specifc intercepts. All numerical
covariates are standardized by subtracting the mean and dividing by the standard 
deviation. Categorical features are encoded using ``dummy variables'' where the
first category is treated as the baseline or reference category.

The  model is calibrated to a ``training'' data set that consists of a
random sample of 80\% of the loans from the full dataset, stratified on state. 
The paramete estimates derived from the training set are then used to derive 
predictions for the remaining loans that constitute the ``test'' sample. 

Further details are provided in the appendix.

\section{Estimates}
We now turn to a discussion of the results. We use the \href{https://docs.pymc.io/}{PyMC3} Python
package to estimate the model~\cite{pymc3}. The parameter estimates are presented for each 
originator in turn.


\subsection{Hazards}
In this section, we present depictions of the marginal deferment
probabilities (using 
\href{https://en.wikipedia.org/wiki/Nelson%E2%80%93Aalen_estimator}{Nelson-Aalen hazards}
), 
to set the stage for what we should expect our fully-specified hazards to look like.
The hazard estimates depicted in Figure~\ref{fig:nelson_aalen} were computed using the
\href{https://lifelines.readthedocs.io/en/latest/#}{Lifelines}
Python package~\cite{lifelines}.

\begin{figure}[ht]
\centering
\caption{Hazards}
\label{fig:nelson_aalen}
\scalebox{1}{
<<nelson_aalen, echo=False>>=

T = hard_df.dur
E = hard_df[dep_var]

bandwidth = 1
naf = NelsonAalenFitter()
lc = hard_df["originator"].isin([omap["LC"]])

naf.fit(T[lc],event_observed=E[lc], label="Originator I")
ax = naf.plot_hazard(bandwidth=bandwidth, figsize=(10, 5))

naf.fit(T[~lc], event_observed=E[~lc], label="Originator II")
naf.plot_hazard(ax=ax, bandwidth=bandwidth)

ax.set_xlabel("Weeks since March 14th, 2020")
ax.set_ylabel("Weekly hazard")

_  = plt.xlim(0, hard_df.dur.max() + 1)
@
}
\end{figure}

The hazards rose sharply in the first weeks after the start of the crisis,
to between 2\% and 2.5\%, but have declined since then. They reveal a difference
in operating protocols where it appears that in the case of Originator I, 
the initial flurry of claims were processed in a batch and then approved 
all at once in the second week. The overall pattern of events and censoring 
is presented in Table~\ref{tbl:survival_table_all}. The ``observed'' column
indicates the count of loans where deferment was observed during
the interval specified in the ``event-at'' column on the left. Since the
study inception date is the same for all loans, a large fraction
of the data show up as ``censored''
in the last interval, and reported under the censored column. The other entries
in this column pertain to loans that either prepaid or were charged-off during
the period starting March 14th, 2020 and the cutoff date. The removed
column represents the portion of the ``at-risk'' population that is no longer
at risk since the loan was either censored or experienced an event. The
at-risk figure for the previous interval is decremented by the removed column
for that interval to give a new at-risk number.

\begin{table}[ht]
\centering
\caption{Survival table: all loans}
\label{tbl:survival_table_all}
\scalebox{1}{
<<survival_table_all, echo=False, results="tex">>=
lt_df = lifelines.utils.survival_table_from_events(
    hard_df.dur, hard_df[dep_var], collapse=True
)
print(lt_df.to_latex(column_format='rrrrr'))
@
}

\end{table}

Nelson-Aalen hazards for the top 12 states by loan count are 
presented in Figure~\ref{fig:na_top_12_states}.

\begin{figure}[htb!]
\centering
\caption{Top 12 states: Nelson-Aalen hazards}
\label{fig:na_top_12_states}
<<na_top_12_states, echo=False>>=
hard_df = out_dict["hier"]["hard_df"]
top_states = hard_df.groupby("state").agg(
    n=("loan_id", "count")
).sort_values(by=["n"], ascending=False).iloc[:12].index.to_list()

fig, ax = plt.subplots(4, 3, figsize=(10, 10), sharex=True, sharey=True)
naf = {}
for u, v in zip(top_states, ax.flatten()):
    naf[u] = fit_na(u, hard_df, "dur", dep_var)
    naf[u].plot_hazard(bandwidth=1, ax=v)
    v.set_xlabel("Weeks")
    v.set_ylabel("Hazard")
    
plt.tight_layout()
@
\end{figure}

\subsection{Pooled}

<<pooled_results, echo=False>>=

pooled_result = make_az_data("pooled", out_dict)
pooled_df = out_dict["pooled"]["test"]
pooled_ppc, pooled_out_df =  predict(
    None, pooled_df, dep_var, out_dict["pooled"], ic_long_df=None,
    n_samples=4000, verbose=False
)
@

We first provide a summary of the estimates for the pooled model, in
Table~\ref{tbl:pooled_estimates}. The pooled model treats all observations as
being derived from the same underlying distribution ignoring the impact of differences
driven by loan clusters identified by either region or originator. The estimates
serve to provide a baseline against which the results of the hierarchical model
can be compared and contrasted.

\begin{table}
\caption{Pooled model: population means}
\label{tbl:pooled_estimates}
\scalebox{1}{
<<pooled_estimates, echo=False, results="tex">>=

pooled_b_out = pooled_result.b_out
print(
  pooled_b_out[["mean", "sd", "hdi_3%", "hdi_97%", "r_hat"]].to_latex(
     column_format="rrrrrr"
  )
)
@
}
\end{table}

\subsubsection{Predictive distribution}
We examine the posterior predictive distribution
of the probability of the binary deferment outcome variable versus the mean of the
observed outcome in the hold-out \textbf{test} data set. This is presented in Figure~\ref{fig:pooled_ppc} 
where the vertical line represents the observed deferment percent while the barchart shows
the distribution of posterior predicted probabilities in the sample, together with the 
95\% Highest Posterior Density (HPD) interval. Note that these are hazards and not 
unconditional probabilities.

\begin{figure}[htb!]
\caption{Pooled model: posterior predictive distribution}
\label{fig:pooled_ppc}
\scalebox{1}{
<<pooled_ppc, echo=False>>=
pooled_train_ppc, pooled_train_df =  predict(
    None, out_dict["hier"]["train"], dep_var, 
    out_dict["hier"], ic_long_df=None,
    n_samples=1000, verbose=False
)
fig = make_ppc_plot(pooled_train_ppc, pooled_train_df, dep_var)
fig.show()
@
}
\end{figure}

The mean of the distribution of predicted hazards matches the average
deferment rate in the sample quite well. We have also examined other 
standard metrics for measuring convergence for the MCMC sampler that support the
the validity of the sampling results presented here but have witheld
them in the interest of brevity.

\subsubsection{Predicted hazard}
We depict the fitted hazard and its 2 standard-deviation interval
as a function of duration $t$ in Figure~\ref{fig:pooled_fitted_hazard}
for loans in the test dataset. The observed hazards are also plotted.
Again, when viewed in terms of emprical versus fitted hazards, the
model seems to capture the hold-out data quite well, with most of
the observations within the 2 standard-deviation intervals and the
general pattern of duration dependence captured with the interval-specific
intercepts.

\begin{figure}
\caption{Pooled model: fitted hazard}
\label{fig:pooled_fitted_hazard}
<<pooled_fitted_hazard, echo=False>>=
pctile = np.percentile(pooled_ppc, q=[5, 95], axis=0).T
    
zzz = pd.concat(
    [
        pooled_out_df, pd.DataFrame(
            np.hstack(
                (
                    pooled_ppc.mean(axis=0).reshape(-1, 1), pooled_ppc.std(axis=0).reshape(-1, 1),
                    pctile
                )
            ), 
            columns=["ymean", "ystd", "y5", "y95"], index=pooled_out_df.index
        )
    ], axis=1
)

zzz_df = zzz.groupby("start").agg(
    y=(dep_var, np.mean), ymean=("ymean", np.mean), ystd=("ystd", np.mean),
    y5=("y5", np.mean), y95=("y95", np.mean)
).reset_index()

fig, ax = plt.subplots(1, 1, figsize=(10, 5))

ax.plot(zzz_df["start"], zzz_df["ymean"], label="Predicted")
ax.scatter(zzz_df["start"], zzz_df["y"], label="Actual")

ax.fill_between(
    zzz_df["start"], zzz_df["y5"], zzz_df["y95"], color="red", alpha=0.05, label="95% Interval"
)

ax.set(xlabel='Week', ylabel='Hazard')
_ = ax.legend(loc="upper right")
@
\end{figure}

\subsubsection{Feature impact}
The Average Marginal Effect (AME) of the features in the model is presented in
Figure~\ref{fig:pooled_avg_marginal_effect}. The estimates are converted to
basis points and represent the impact of a 1-unit change in the covariate
on the weekly hazard rate. Since all our numerical covariates are standardized, 
the AME represents the impact of a 1 standard-deviation change in the variable.
In the case of categorical covariates, the AME measures the probability impact of a 
specific level versus the reference or ``baseline'' category for that variable.

The AME is calculated as follows:
\begin{equation}
\Delta P(y | X\beta) = \beta \exp(X\beta)
\end{equation}
This is an $N$ element vector where $N$ is the number of rows
in the dataset. The $\beta$ coefficient is multiplied by the
average of the term in square brackets to derive the AME 
for the covariate. For reference, the hazard is 
specified as $P(y | X\beta) = \exp(X\beta)$.

\begin{figure}[htb!]
\caption{Pooled Model: Average Marginal Effects}
\label{fig:pooled_avg_marginal_effect}
<<pooled_avg_marginal_effect, echo=False>>=
plot_ame(out_dict, "pooled", pooled_ppc)
@
\end{figure}

\subsection{Hierarchical}
We now present the results of the multi-level model, where loans
are grouped into nested clusters of originators within states.
Treating all loans without regard to the state they belong to
papers over the regional nature of this crisis. In addition, modeling
all loans within a state as the same without regard to the
originator/servicer may also be misleading in that it may 
average any differences in the approach different originators
may be taking with respect to the treatment of deferment 
requests. Further technical details are provided in the appendix.

<<hier_results, echo=False>>=

hier_result = make_az_data("hier", out_dict)
hier_df = out_dict["hier"]["test"]
hier_ppc, hier_out_df =  predict(
    None, hier_df, dep_var, out_dict["hier"], ic_long_df=None,
    n_samples=1000, verbose=False
)
@

\subsubsection{Predictive distribution}
In Figure~\ref{fig:hier_ppc}, we present the Posterior
Predictive Density for the hierarchical model. The
fitted distribution of the outcome variable and the
95\% interval is presented versus the mean of the 
observed outcome variable.

\begin{figure}[htb!]
\caption{Hierarchical model: posterior predictive distribution}
\label{fig:hier_ppc}
\scalebox{1}{
<<hier_ppc, echo=False>>=
hier_train_ppc, hier_train_df =  predict(
    None, out_dict["hier"]["train"], dep_var, 
    out_dict["hier"], ic_long_df=None,
    n_samples=1000, verbose=False
)
fig = make_ppc_plot(hier_train_ppc, hier_train_df, dep_var)
fig.show()
@
}
\end{figure}


\subsubsection{Predicted hazard}
We depict the fitted hazard and its 95\% credibility interval
as a function of duration $t$ in Figure~\ref{fig:hier_fitted_hazard}
for loans in the test dataset. The observed hazards are also plotted.
The hierarchical estimates also model the variability in the test
data quite well, with the average deferment rate well within
the distribution of predicted hazards in the validation set.

\begin{figure}
\caption{Hierarchical model: fitted hazard}
\label{fig:hier_fitted_hazard}
<<hier_fitted_hazard, echo=False>>=
pctile = np.percentile(hier_ppc, q=[5, 95], axis=0).T
    
zzz = pd.concat(
    [
        hier_out_df, pd.DataFrame(
            np.hstack(
                (
                    hier_ppc.mean(axis=0).reshape(-1, 1), 
                    hier_ppc.std(axis=0).reshape(-1, 1),
                    pctile
                )
            ), 
            columns=["ymean", "ystd", "y5", "y95"], index=hier_out_df.index
        )
    ], axis=1
)

zzz_df = zzz.groupby("start").agg(
    y=(dep_var, np.mean), ymean=("ymean", np.mean), ystd=("ystd", np.mean),
    y5=("y5", np.mean), y95=("y95", np.mean)
).reset_index()

fig, ax = plt.subplots(1, 1, figsize=(10, 5))

ax.plot(zzz_df["start"], zzz_df["ymean"], label="Predicted")
ax.scatter(zzz_df["start"], zzz_df["y"], label="Actual")

ax.fill_between(
    zzz_df["start"], zzz_df["y5"], zzz_df["y95"], color="red", alpha=0.05, label="95% Interval"
)

ax.set(xlabel='Week', ylabel='Hazard')
_ = ax.legend(loc="upper right")
@
\end{figure}
The wider range of predicted outcomes is also evident in
Figure~\ref{fig:hier_fitted_hazard} in that the 95\% 
credibility interval is wider than what we observed
in the model where the data were pooled across states
and originators.

\subsubsection{Feature impact}

The fixed-effect estimates are presented in Table~\ref{tbl:pop_means}.

\begin{table}[htb!]
\centering
\caption{Hierarchical model: population means}
\label{tbl:pop_means}
\scalebox{1}{
<<global_mean, echo=False, results="tex">>=
b_μ_out = hier_result.b_μ_out

print(b_μ_out[["mean", "sd", "hdi_3%", "hdi_97%", "r_hat"]].to_latex(
  index=True, column_format="rrrrrr"))
@
}
\end{table}


\begin{figure}[htb!]
\caption{Hierarchical model: Average Marginal Effects (population)}
\label{fig:hier_pop_avg_marginal_effect}
<<hier_pop_avg_marginal_effect, echo=False>>=
plot_ame(out_dict, "hier", hier_ppc)
@
\end{figure}

Average Marginal Effects for the hierarchical model are 
presented in Figure~\ref{fig:hier_pop_avg_marginal_effect}.
The variation in the the impact of claims across states 
is presented in Figure~\ref{fig:claims_sensitivity}.

\begin{comment}
\begin{figure}
\centering
\caption{Claims impact: variation across states}
\label{fig:pop_variation_feature_impact}
<<pop_variation_feature_impact, echo=False>>=
b_names = hier_result.b_names
frailty = True

X = hier_result.X
A = hier_result.A
U = hier_result.U 
E = hier_result.E

a = hier_result.a_out["mean"]
b = hier_result.b_out["mean"]
d = hier_result.trace["γ_μ"].mean()

cbeta_d = []
for i, v in enumerate(hier_result.state_index_map.state):

    indx = hier_result.state_index_map.set_index("state").loc[v, "level_0"]
    c = hier_result.trace["c"][:, indx].mean()
    dp_dx =  10000 * np.array(d_poisson(X, A, U, E, a, b, c, d, "std_pct_ic", "hier", frailty))
    cbeta_d.append([v, dp_dx])

cbeta_df = pd.DataFrame(cbeta_d, columns=["state", "ame"])
cbeta_df.sort_values(by=["ame"], ascending=False, inplace=True)

# now plot
fig, ax = plt.subplots(1, 1, figsize=(8,10))
sns.barplot(x="ame", y="state", data=cbeta_df, ax=ax)
ax.set_xlabel("dp/dX (bps)")
ax.set_ylabel("State")
_ = ax.set_yticklabels(ax.get_yticklabels(), size=10)
@
\end{figure}
\end{comment}


The uncertainty around the impact of claims on deferment rates is 
visually presented in Figure~\ref{fig:claims_vol}.
\begin{figure}
\centering
\caption{Uncertainty in claims impact}
\label{fig:claims_vol}
<<claims_vol, echo=False >>=
cbeta_d = []
for i, v in enumerate(hier_result.state_index_map.state):

    indx = hier_result.state_index_map.set_index("state").loc[v, "level_0"]
    trace = hier_result.trace["c"][:, indx]
    x = pd.Series(ame_obs_var(trace, hier_ppc), name=v)
    cbeta_d.append(
        pd.DataFrame(
            np.quantile(x, q=[0.025, 0.50, 0.975]).reshape(-1, 3),
            columns=["lo", "med", "hi"]
        )
    )
    
cbeta_df = pd.DataFrame(10000 * pd.concat(cbeta_d)).reset_index()
cbeta_df["state"] = hier_result.state_index_map.state
cbeta_df.sort_values("med", ascending=False, inplace=True)

fig, ax = plt.subplots(1, 1, figsize=(12, 12/1.61))

ax.vlines(cbeta_df.state, cbeta_df.lo, cbeta_df.hi, "tab:red")
ax.scatter(cbeta_df.state, cbeta_df.med, color="tab:blue")
plt.setp(ax.get_xticklabels(), ha="right", size=9, rotation=30)
ax.set_xlabel("State")
_ = ax.set_ylabel("dP/dX (bps)")

@
\end{figure}

\begin{figure}[htb!]
\centering
\caption{Claims sensitivity}
\label{fig:claims_sensitivity}
\scalebox{1}{
<<claims_sensitivity, echo=False>>=

us_states = gpd.read_file(
  "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_20m.zip"
)

merged_us_states_c = pd.merge(us_states, cbeta_df, left_on="STUSPS", right_on="state", how="right")

fig, ax = plt.subplots(1, figsize=(15, 18))

albers_epsg = 2163
ax = us_states[~us_states["STATEFP"].isin(['02', '15'])].to_crs(epsg=albers_epsg).plot(
    ax=ax, linewidth=0.25, edgecolor='white', color='grey'
)

ax = merged_us_states_c[~merged_us_states_c["STATEFP"].isin(['02', '15'])].to_crs(epsg=albers_epsg).plot(
    column='med', ax=ax, cmap='viridis', 
    scheme="quantiles", legend=True,  legend_kwds={"loc": "upper center", "ncol": 3}
)
_ = ax.axis('off')
@
}

\end{figure}

\subsection{Originator effect}
In this section, we provide estimates of the effect that 
the identity of the originator on the hazards. These
are provided in Table~\ref{tbl:orig_impact}.

\begin{table}
\centering
\caption{Originator effect}
\label{tbl:orig_impact}
<<orig_impact, echo=False, results="tex" >>=
g_out = hier_result.g_out
print(g_out[["mean", "sd", "hdi_3%", "hdi_97%", "r_hat"]].to_latex(
  index=True, column_format="rrrrrr"))
@
\end{table}

Confirming the results of the non-parametric Aalen fits presented
earlier, the first originator has a higher estimated baseline
hazard than the second, as evidenced by its larger $\gamma$
parameter.

\section{Forecasts}
In this section, we take the test portfolio and project the deferment hazards forward until
the end of the third quarter. We do this by using the estimated hazard 
model and feed into it the claims forecast we generated in the previous
section. The claims forecast for each state is generated by taking the US
decay factor projection and applying it to the peak claims figure for the
state. The projections are depicted in Figure~\ref{fig:defer_proj}. The
cumulative amount of loans in deferment between the start of the projections
(the last observed date for the data collection) and the end of the 
projection period is shown in the first panel of the figure. Note that
a basic assumption underlying our analysis everywhere is the absence 
of loans ``curing'' from deferment --- we prefer to handle that separately
rather than in a unified but substantially more complex model with different
states.

<<deferment_projection, echo=False>>=
horizon_date = datetime.date(2020, 9, 30)
test_df = out_dict["hier"]["test"]
sub_df = make_df(test_df, dep_var, horizon_date)

aaa, zzz, _ = simulate(
    None, sub_df, dep_var, "hier", out_dict, claims_dict["chg_df"]
)
zzz.set_index(["loan_id", "edate"], inplace=True)
zzz.sort_index(inplace=True)

zzz["fhaz"] = zzz.groupby(level=0).agg(chaz=("ymean", np.cumsum))["chaz"].map(lambda x: 1 - np.exp(-x))
zzz_df = zzz.groupby("start").agg(
    y=(dep_var, np.mean), ymean=("ymean", np.mean),
    ystd=("ystd", np.mean), y5=("y5", np.mean), y95=("y95", np.mean)
).reset_index()
@

\begin{figure}[htb!]
\caption{Deferment hazards}
\label{fig:defer_proj}
\scalebox{1}{
<<forecast_hazard, echo=False>>=
fig, ax = plt.subplots(1, 2, figsize=(13, 5), gridspec_kw={'width_ratios': [3, 5]})

# remaining deferments

first_weekend = (
    pd.to_datetime(ASOF_DATE) + pd.tseries.offsets.DateOffset(day=(7 - pd.to_datetime(ASOF_DATE).dayofweek))
).date()
last_weekend = pd.to_datetime("2020-09-26").date()
pred = zzz.loc[idx[:, [ic_date.strftime("%Y-%m-%d"), "2020-09-26"]], "fhaz"].groupby(level=0).diff().dropna()

sns.distplot(pred, ax=ax[0])
tot_pctile = np.percentile(pred, q=[5, 25, 50, 75, 95])

ymin, ymax = ax[0].get_ylim()
xmin, xmax = ax[0].get_xlim()
_ = ax[0].text(0.95 * (xmin+xmax)/2, 0.8 * ymax,
  f'Median: {tot_pctile[2]:.2%}\n'
  f'95% interval: [{tot_pctile[0]:.2%}, {tot_pctile[-1]:.2%}]', alpha=1
)

_ = ax[0].xaxis.set_major_formatter(mtick.PercentFormatter(1.0))
_ =ax[0].set_xlabel("Deferment Pct.")

# hazards

ax[1].plot(zzz_df["start"], zzz_df["ymean"], label="Predicted")
ax[1].fill_between(
    zzz_df["start"], zzz_df["y5"], 
    zzz_df["y95"], color="red", alpha=0.05, label="95% Interval"
)

_ = ax[1].set(xlabel='Week', ylabel='Hazard')
_ = ax[1].legend()
_ = plt.tight_layout()
@
}
\end{figure}

\section{Comparison}
In this section, we examine a basic question --- given the additional
complexity of mixed (or multi-level) models and the bayesian
estimation framework, is any of it worth it? We demonstrate 
the utility of the approach adopted here by asking an important
question, one of many that can be asked.

We approach the model comparison exercise in two ways. First,
we use Leave-One-Out (LOO) cross validation to derive an 
estimate of out-of-sample fit for both models. Second, we 
use a more traditional probability calibration framework
to compare predicted and observed survival outcomes at
a defined event-time.

\subsection{Leave-One-Out (LOO)}
In the LOO comparison framework, the
training dataset is repeatedly partitioned into training 
and testing sets by leaving a single observation out,
using an efficient algorithm developed by~\cite{vehtari_practical_2017}
that uses posterior samples from the MCMC algorithm. 
The computation uses Pareto-smoothed Importance Sampling (PSIS) to calculate
point-wise out-of-sample prediction accuracy. These
results are provided in Table~\ref{tbl:loo_pool_hier}.

\begin{table}
\centering
\caption{Leave-One-Out (LOO) comparison}
\label{tbl:loo_pool_hier}
<<loo_pool_hier, echo=False, results="tex">>=
do_loo = True
if do_loo:
    compare_dict = {
      "hierarchical": hier_result.az_data, 
      "pooled": pooled_result.az_data
    }
    compare_tbl = az.compare(compare_dict, ic="loo")
    print(
      compare_tbl.to_latex(index=True, float_format="{:0.2f}".format)
    )
    
@
\end{table}


\subsection{Probability calibration}
Another important real-world justification for any probabilistic model,
be it binary or otherwise is its ability to generate probability predictions
that match the observed frequency (or rate) of the outcomes under study. In
our case, given a fixed time horizon say $t_{0}$, we would like to compare
the predicted probability of deferment at that horizon versus the actual 
rates observed. This is what is depicted in Figure~\ref{fig:prob_calib_ici}.

The framework used is laid out in \cite{austin_harrell_klaveren}. 
Each (x, y) point in the figure represents a mapping between the 
predicted probability of the outcome at $t_{0}$ versus the 
smoothed observed frequency of the outcome. An unresolved aspect
of this framework being applied in a Bayesian context (at least 
in my mind) is the interaction between the ``shrinkage'' inherent
in a mixed-model framewrk where cluster means are shrunk towards
the grand mean depending on the strength of the data, and the 
requirement of fealty to the empirical estimates --- the 2
approaches seem to be working at cross-purposes. We cannot
resolve this here and leave this as a topic for future research.
\textbf{Regardless, both PSIS-LOO and ICI comparisons seem
to provide a definitive thumbs-up for the hierarchical model,
especially given that the comparison was performed using an 
out-of-sample dataset in the latter exercise.}

<<calibration, echo=False>>=
n_samples = 4000

pooled_fff, t0 = predict_survival_function(
    out_dict["pooled"]["test"], dep_var, out_dict["pooled"], 
    claims_dict, ASOF_DATE
)
pooled_alist_df, pooled_blist_df = glm_calibration_plot(pooled_fff, dep_var, None)

hier_fff, t0 = predict_survival_function(
    out_dict["hier"]["test"], dep_var, out_dict["hier"], 
    claims_dict, ASOF_DATE
)
hier_alist_df, hier_blist_df = glm_calibration_plot(hier_fff, dep_var, None)

_, pool_calib = calibration_data(pooled_fff, dep_var)
_, hier_calib = calibration_data(hier_fff, dep_var)
@

\begin{figure}[htb!]
\centering
\caption{Probability calibration: Deciles}
\label{fig:prob_calib_deciles}
<<prob_calib_deciles, echo=False >>=
fig, ax = plt.subplots(1, 2, figsize=(10, 6))
color = "tab:red"

calib_dict = {
    "Pooled": pool_calib,
    "Mixed": hier_calib
}

i = 0
for k, v in calib_dict.items():
    max_x = v["poutcome"].max() + 0.01
    ax[i].scatter(v["poutcome"], v["observed"], color=color, label=k)
    ax[i].plot(np.linspace(0, max_x,100), np.linspace(0, max_x,100), c="k", ls="--")

    ax[i].set_xlabel("Predicted probability of \nt ≤ %.1f deferment" % t0)
    ax[i].set_ylabel("Observed probability of \nt ≤ %.1f deferment" % t0, color=color)
    ax[i].tick_params(axis="y", labelcolor=color)

    for j in v.itertuples():
        ax[i].text(j.poutcome - 0.001, j.observed - 0.01, j[0])

    ax[i].legend(loc = "upper left")
    
    i += 1

plt.tight_layout()
@
\end{figure}

\begin{figure}[htb!]
\centering
\caption{Probability calibration: Integrated Calibration Index (ICI)}
\label{fig:prob_calib_ici}
<<prob_calib_ici, echo=False>>=
_, ax = plt.subplots(1, 2, figsize=(10, 5), sharex=False)

# plot histogram of our original predictions
color = "tab:blue"
twin_ax = ax[0].twinx()
twin_ax.set_ylabel("Count of \npredicted probabilities", color=color)  # we already handled the x-label with ax1
twin_ax.tick_params(axis="y", labelcolor=color)
twin_ax.hist(pooled_fff["poutcome"], bins="sqrt", color=color, alpha=0.2)
twin_ax.grid(None)

color = "tab:red"
ax[0].plot(pooled_alist_df.x, pooled_alist_df.y, label="Pooled", color=color)

ax[0].set_xlabel("Predicted probability of \nt ≤ %.1f mortality" % t0)
ax[0].set_ylabel("Observed probability of \nt ≤ %.1f mortality" % t0, color=color)
ax[0].tick_params(axis="y", labelcolor=color)

# plot x=y line
ax[0].plot(np.linspace(0, pooled_fff.poutcome.max() + 0.01, 100),
           np.linspace(0, pooled_fff.poutcome.max() + 0.01, 100),
           c="k", ls="--")
ax[0].legend(loc = "upper center")

# now do hierarchical

# plot histogram of our original predictions
color = "tab:blue"
twin_ax = ax[1].twinx()
twin_ax.set_ylabel("Count of \npredicted probabilities", color=color)  # we already handled the x-label with ax1
twin_ax.tick_params(axis="y", labelcolor=color)
twin_ax.hist(hier_fff["poutcome"], bins="sqrt", color=color, alpha=0.2)

color = "tab:red"
ax[1].plot(hier_alist_df.x, hier_alist_df.y, label="Hierarchical", color=color)
ax[1].set_xlabel("Predicted probability of \nt ≤ %.1f mortality" % t0)
ax[1].set_ylabel("Observed probability of \nt ≤ %.1f mortality" % t0, color=color)
ax[1].tick_params(axis="y", labelcolor=color)

# plot x=y line
ax[1].plot(np.linspace(0, hier_fff.poutcome.max() + 0.01, 100),
           np.linspace(0, hier_fff.poutcome.max() + 0.01, 100),
           c="k", ls="--")
ax[1].legend(loc = "upper center")
twin_ax.grid(None)

plt.tight_layout()
@
\end{figure}

A perfectly calibrated model would have all pairs of points line up with the 45-degree
line. The Integrated Calibration Index (ICI) for the two models, is the mean 
of the absolute value of the differences between predicted probabilities and observed rates. 

\begin{equation}
ICI = \frac{1}{N} \sum | P^{o}_{t_{0}} - P^{p}_{t_{0}} | 
\end{equation}

\begin{table}
\centering
\caption{Integrated Calibration Index}
\label{tbl:ici_table}
<<ici_table, echo=False, results="tex">>=
ici_df = pd.DataFrame.from_dict(
    {
        "Pooled": pooled_blist_df[1:],
        "Hierarchical": hier_blist_df[1:]
    }
)
ici_df.index = ["E50", "E95", "Mean"]
print(
  ici_df.to_latex()
)
@
\end{table}
We also report the the median and 95th percentiles of the absolute differences
for each observation in the ``test'' dataset. The hierarchical model is 
unambibuously better than the pooled model
in terms of the ICI, the median of the absolute differences
and the predictions in the tail of the probability distribution of outcomes,
offering substantial improvement over the pooled model.

Taken together, the two model comparison frameworks support
our view that there is potential for meaningful improvement in 
forecasting ability using a mixed models approach relative to a 
more traditional approach which pools the data across states and 
originators.

\section{Conclusion}

Our goals in this report were two-fold. First, we wanted to set out a
rigorous and transparent statistical framework for analyzing and
forecasting deferment rates due to the COVID shock to the economy
using (i) the machinery of survival analysis to properly account for
censored deferment times, and (ii) bayesian estimation to account
for state-specific differentials in labor market impacts on deferment
rates. While the data in the study are from
the consumer loan sector, the framework established here has
general applicability to a much broader range of assets. 

Second,
we wanted to establish quantitative bounds around what we should
expect for deferment rates in the future. An important
derivative work to this report is the development of a cashflow
engine and valuation model to incorporate these assessments into
loan pricing. A key element of that analysis will be a
determination of the ``cure'' rate from deferment --- a complicated
excercse in its own right. For example, Groshen~\cite{groshencovid}
writes that
\begin{quote}
 \emph{
   \dots 83 percent of the increase in unemployment in March come[\emph{sic}]
 from workers on temporary furloughs, not permanent layoffs.
 }
\end{quote}
This may be significant in determining whether we are more
likely to have a speedier recovery rather than a more traditional, drawn out 
slog from a classical recession. We see hints of this in our data as well ---
for example, in the vast majority of cases, borrowers have cited ``Curtailment
of Income'' as the reason for seeking hardship deferments.
On the other hand, if the nature of the employment
shock turns into a more permanent layoffs instead of temporary furloughs, the outlook could be
more dire.

These aspects need to be explored in greater 
detail and we leave these considerations for another paper, preferring to 
deal with our problem in manageable bite-sized chunks. Regardless, the forecasts 
presented in this report should still serve to provide a lower bound on valuation 
if one is willing to assume that all borrowers granted deferrals are likely to 
simply go delinquent when their deferment term expires.

\clearpage
\section{Appendix}
\subsection{Model}

The modeling framework used in this report draws upon statistical
tools used in the analysis of events with a ``time-until'' component
to them. In our case, the time-until, or ``lifetime''  we are 
interested in predicting is the time until a borrower asks the
servicer for a deferment or goes delinquent. Time in this context is measured from
an assumed epoch start date of March 14th, 2020 which we assume
as the start of the COVID-19 crisis for our purposes and is the
same across all loans.

\subsubsection{Pooled}
We first describe the general setup of the model, assuming a 
``pooled'' setup where the concept of clusters and correlation
of outcomes for loans within clusters is initially ignored. This
allows us to provide a basic framework which is then extended
to the hierarchical setting. The exposition here is based on
and closely follows the excellent series of lecture notes by 
\cite{grodriguez}.

Let the time-to-event $T$ be a continuous variable with Cumulative
Distribution Function (CDF), $F(t)$ and density $f(t)$. $F(t)$
defines $P(T < t)$ or the cumulative probability that the
lifetime lasts until duration $t$. An alternative representation
more commonly employed in the survival literature is the
``Survival Function'' $S(t)$ which is the complement of the CDF:

\begin{equation}
S(t) = P(T >= t) = 1 - F(t) = \int_t^\infty \! f(x) \, \mathrm{d}x.
\end{equation}
The density function of lifetimes can be represented as $F'(t)$,
the derivative of the CDF.

An alternative characterization of the distribution of $T$
is given by the hazard function, or instantaneous rate of
occurrence of the event, defined as:
\begin{equation} \label{eq:hazard}
\lambda(t) = \lim_{\mathrm{d}t \to 0} \frac{P(t
    \leq T < t + \mathrm{d}t| T \geq t)}{\mathrm{d}t}
\end{equation}

The numerator in equation~\ref{eq:hazard} is the conditional
probability of an event ocurring in the interval $t, t + \mathrm{d}t)$,
conditional on it not having occured before $t$. The denominator
is the length of the time interval, which means the hazard
represents a rate of ocurrence. In the limit, as
$\mathrm{d}t$ goes to 0 --- the hazard represents the
instanteous rate of occurence of the event.

The conditional probability in the numerator may be written as the ratio
of the joint probability that $T$ is in the interval $[t, t + \mathrm{d}t)$ and $T \geq t$ (which
is, of course, the same as the probability that $t$ is in the interval), to the
probability that $T \geq t$. The former may be written as $f(t)\mathrm{d}t$ for
small $\mathrm{d}t$, while the latter is $S(t)$ by definition. Dividing by $\mathrm{d}t$ and passing
to the limit gives the useful result:
\begin{equation} \label{eq:haz_alt}
\lambda(t) = \frac{f(t)}{S(t)}
\end{equation}
Since $-f(t)$ is the derivative of $S(t)$, equation~\ref{eq:haz_alt} can also be written as:
\begin{equation}
\lambda(t) = -\frac{\mathrm{d}}{\mathrm{d}t} \ln(S(t))
\end{equation}
Integrating from 0 to $t$ and imposing the known boundary condition $S(0) = 1$
(since by construction, the event cannot have occurred at time 0), 
we obtain the following expression:
\begin{equation} \label{eq:cum_hazard}
S(t) = \exp\left\{-\int_{0}^{t}\lambda(x)\mathrm{d}x\right\}
\end{equation}

The integration term in equation~\ref{eq:cum_hazard} is referred to as the
\emph{cumulative hazard} and is denoted
\begin{equation}
\Lambda(t) = \int_{0}^{t}\lambda(x)\mathrm{d}x
\end{equation}

One can think of the cumulative hazard as the total risk of the event
happening from the time of entry into the state to time $t$.

In the simplest case, the hazard is constant over time spent in the state,
i.e., $\lambda(t) = \lambda$ for all $t$. The corresponding survival
function is $S(t) = \exp(-\lambda t)$ which is an exponential
distribution with parameter $\lambda$. The density is obtained as the
product of the hazard and the survivor function, or:
\begin{equation}
f(t) = \lambda \exp \left \{-\lambda t \right \}
\end{equation}
the mean of which is $1/\lambda$.

As an aside, a few interest points are worth noting here: 
\begin{itemize}
    \item There is a close connection between industry practice around the modeling
        of cash flows on loans and hazard rates. There are two operative concepts
        here - SMM and MDR which stand for Single Monthly Mortality and
        Monthly Default Rate respectively.
    \item Both represent unscheduled principal
        that is paid or written down, as a fraction of the balance outstanding.
        In this respect, the equivalence to the hazard function is clear -
        the unscheduled principal paid at time $t$ is the density $f(t)$ while
        the balance still outstanding is $S(t)$.
    \item Typically these concepts are
        applied to groups of loans and aggregations of principal paid and
        balance outstanding, but the analytical apparatus of hazard rates is
        directly application at each individual loan level.
\end{itemize}

In analyzing cash flows, we are interested in events that can
lead to a change in state and change the profile of scheduled
cashflows. Once a loan is created, its lifetime
(and/or its state) can change or be terminated through a multiplicity of ways.
These events can be modeled using \textbf{cause-specific hazard functions}.
The cause-specific hazard function for the $j$th failure type is:

\begin{equation} \label{eq:cause_specific_hazard}
\lambda_{j}(t) = \lim_{\mathrm{d}t \to 0} \frac{P(t \leq T < t +
    \mathrm{d}t, J = j| T \geq t)}{\mathrm{d}t}
\end{equation}
The total hazard of leaving the state can be written as the sum of
cause-specific hazards, $\lambda(t,x) = \sum_{j=1}^{J} \lambda_{j}(t,x)$.
Here, $x$ represents covariates that impact the cause-specific hazards
in a systematic way and allows us to write the hazard as:

\begin{equation}
\label{eqn:prop_hazard}
\lambda_{j}(t) = \lambda_{0,j} \exp \left \{x_{t}\beta \right \}
\end{equation}
where $\lambda_{0,j}$ represents an unspecified baseline hazard.

\begin{comment}
When durations are observed only over grouped intervals (we know an
event occurred between period $t_{k-1}$ and period $t_{k}$), then
the probability of observing an exit in the interval $[t_{k-1}, t_{k}]$
can be derived as follows. Since the failure in this interval is the
complement of surviving the interval, conditional on the observation
having survived until period $t_{k-1}$, it can written as:

\begin{eqnarray}\label{eq:grouped_hazard}
P(y_{j,t_{k}} = 1) & = & 1 - P(T > t_{k}|T \geq t_{k-1}) \\
               & = & 1 - \exp \left \{ -\exp(x_{t}'\beta) \int_{t_{k-1}}^{t_{k}}
               \lambda_{0,j}(\tau) \mathrm{d}\tau \right \}
\end{eqnarray}

Defining the rightmost term as a constant:
\begin{equation}
\alpha_{t_{k}} = \log\left \{ \int_{t_{k-1}}^{t_{k}}
    \lambda_{0,j}(\tau)\mathrm{d}\tau \right \}
\end{equation}
we can rewrite equation~\ref{eq:grouped_hazard} as follows:
\begin{equation}\label{eq:grouped_hazard_final}
P(y_{j,t_{k}} = 1) = 1 - \exp \left \{ -\exp(x_{t}'\beta + \alpha_{t_{k}} ) \right \}
\end{equation}

\begin{itemize}
    \item This is a binary dependent variable with a complementary log-log link
        and can be analyzed using standard discrete-choice models. The data is
        structured in the form of a panel for each borrower, contributing 1 record
        for every time unit that the loan is observed for.
    \item A status indicator
        represents whether the loan was still alive at the time of data
        collection or had an event ($y_{t_{k}} = 0,1$).
    \item The hazard function parameters can be estimated using either multinomial
        models or equivalently as a set of pairwise binary dependent models in a
        situation where multiple events are possible.
\end{itemize}

\end{comment}

The likelihood function for this model can be derived as follows. Suppose we
have observations $t_{i}$ on $N$ lifetimes of interest (e.g., time to a deferral
request) and we also have, for each loan, an indicator variable $d_{i}$ which
is 1 if $t_{i}$ denotes an observed time to a deferral request, and 0 otherwise.
In the latter situation, the observation is considered ``censored''.

The likelihood for an observation that is known to have deferred at time $t_{i}$
can be written as the product of the survivor and hazard
functions:
\begin{equation}
L_i = f(t_i) = S(t_i) \lambda(t_i).
\end{equation}
If we consider a censored observation, we have:
\begin{equation}
L_i = S(t_i),
\end{equation}

Since we have both types of observations, we know all observations lived
at least until time $t_{i}$, but an observed deferral time with $d_{i} = 1$
implies we need to multiply the survivor funtion by the hazard at time $t_{i}$.
The combined likelihood across all observations is:
\begin{equation}
L = \prod_{i=1}^n L_i = \prod_i \lambda(t_i)^{d_i} S(t_i).
\end{equation}
Taking logs and simplifying, we have:
\begin{equation}
\log L = \sum_{i=1}^n \{ d_i \log \lambda(t_i) - \Lambda(t_i) \}.
\end{equation}

\subsubsection{Piecewise-exponential}
In our specific case, we know the exact day on which the
borrower's deferment request was approved, or they were declared
delinquent. Thus, we can take advantage
of this extra bit of information by treating time $t_{i}$ as continous,
but in a setup where the observed times are bucketed into discrete
intervals. This also allows us to accomodate covariates that may change 
over the course of time $t_{i}$ but are external to the process
and known at the start of each interval. Ths is referred to as Piece-Wise 
Exponential (PWE) for the underlying baseline hazard $\lambda_{t}$ in the literature. 

Under this specification, the time $t_{i}$ can be grouped into $J$ intervals, 
[$\tau_{0}, \tau_{1}, \tau_{j-1},\tau_{j}, \dots, \tau_{J}$].
If $t_{i} > \tau_{j}$, then the time spent in the interval is simply
$\tau_{j} - \tau_{j-1}$. If the individual experienced an event or was
censored in the interval $j$, then the time spent in that interval
is $t_{ij} = t_{i} - \tau_{j-1}$. For each of these intervals, we
can create a binary variable $d_{ij} = 0/1$ depending on whether
the loan survived that interval and went into the next interval.

The first term in the log-likelihood can now be written as:
\begin{equation}
d_i \log \lambda_i(t_i) = d_{ij(i)}\log\lambda_{ij(i)},
\end{equation}

The second term in the log-likelihood can be expressed as:
\begin{equation}
\Lambda_i(t_i) = \int_0^{t_i} \lambda_i(t)dt = \sum_{j=1}^{j(i)} t_{ij}\lambda_{ij},
\end{equation}
Here, we rely on the fact that the cumulative hazard integral is
composed of $j(i)$ terms and can be represented as the sum of these ``mini''
integrals. Each interval contributes the hazard $\lambda_{i}$ multiplied by
the time spent in the interval, which is 1 for all except the last where it
is $t_{ij}$. Since the $d_{ij}$ is zero for all intervals except the last,
we can consolidate both terms in the log-likelihood under one summation:
\begin{equation}
\log L_i = \sum_{j=1}^{j(i)} \{ d_{ij}\log\lambda_{ij} - t_{ij}\lambda_{ij}\}.
\end{equation}

The contribution of each interval, which is a sub-observation for each loan,
to the log-likelihood, is equivalent to the $d_{ij}$ being drawn from a
Poisson distribution with mean $\mu_{ij} = t_{ij}\lambda_{ij}$. The likelihood
for this would be written as:
\begin{equation}
\log L_{ij} = d_{ij}\log \mu_{ij} - \mu_{ij} =
d_{ij}\log(t_{ij}\lambda_{ij}) - t_{ij}\lambda_{ij}.
\end{equation}

If we substitute $t_{ij}\lambda_{ij}$ for $\mu_{ij}$, we get
\begin{equation}
\log L_{ij} = d_{ij}\log \mu_{ij} - \mu_{ij} =
d_{ij}\log(t_{ij}\lambda_{ij}) - t_{ij}\lambda_{ij}.
\end{equation}

This is the same as the previous formulation, with the exception of
the $d_{ij}\log(t_{ij})$ term --- this is only valid for the last
interval and so contributes $\log(t_{ij})$ to the log-likelihood.
This is equivalent to a Poisson model with an offset term equal
to $\log(t_{ij})$. This is the final model specification we use
in this report.
 
\subsubsection{Hierarchical}
In order to extend this to a hierarchical bayesian framework, we make the
additional assumption that the terms in $\beta$ are cluster-specific. In
other words, the impact of these variables on the deferment hazard is
different for loans in different clusters. We treat each geographic region,
more specifically, a state, as a cluster, and within each state, each
originator as a sub-cluster. Thus, there are three levels in the model,
with the bottom-most level being the repeated observations for the
individual loan, which are grouped within originator which is itself
grouped within states. 

All loans from the same state have a close relationship to each other, which may be the result of 
state-specific laws, regulations and customs. In the case of COVID-19,
given the state-specific nature of restrictions, the grouping is a natural
one to consider. In addition, different policies related to how deferment
requests are handled may also lead to differences in how observationally
similar loans respond to the same stimuli.

The schematic for the 
hierarchical model (incorporating additional tweaks to aid in the sampling)
is presented in Figure~\ref{fig:hierarchical_bayes_model_dag_pr}.
The ``hierarchical'' nature of the parameters is clearly
drawn as one moves from the top to the bottom of the
graph, with the final step being the simulation from
the posteriors using the observed value of the dichotomous
deferment variable as the observed outcome.

\begin{figure}[hbt!]
\centering
\caption{Hierarchical bayes model}
\label{fig:hierarchical_bayes_model_dag_pr}
\scalebox{0.5}{
<<hierarchical_bayes_model_dag_pr, echo=False>>=
zzz = pm.model_to_graphviz(out_dict["hier"]["model"])
_ = zzz.render(
  directory="figures", filename="hier_model", format="png", cleanup=True
)
@
}
\includegraphics[width=\linewidth]{figures/hier_model.png}
\end{figure}

In our model, the impact of the year-over-year percentage change in 
weekly initial claims is treated this way. 

\begin{equation}
\label{eqn:hierarchical}
\begin{array}{lcl}
  c_{ij} & \sim & \mathcal{N}(\mu_{c}^{j}, \sigma_{c}^{j})\\
  \mu_{c}^{j} & \sim & \mathcal{N}(\mu_{c}, \sigma_{c})\\
\end{array}
\end{equation}

By now, we have the basic outlines of the task at hand. We have a portfolio
of loans, and on some of these, borrowers have asked the servicer for a
deferral of payment. This is the result of an economic shock generated by 
the widespread shutdowns in response to the COVID-19 virus. We want to understand
how the probability of a borrower requesting a deferral depends on the attributes
of the borrower and the loan, as well as state level metrics of labor market deferment.

The components of the $x$ matrices encapsulate follows:
\begin{itemize}
\item loan-level: FICO, original balance, Debt-To-Income (DTI) ratio,
  stated monthly income, loan age. In addition, we have categorical 
  variables for loan credit grade, purpose, employment status, and
  original term.
\item state-level: Year-over-year percentage change in weekly claims (reported as of the Saturday
  of the week). Since the weekly intervals in which $t$ is observed coincides
  with the claims reporting period, we are assuming that these are observed 
  contemporaneously. In other word, the trigger for the decision to file
  a claim and ask for a deferment occurred sometime in the past, but both
  are recorded within the same week. The parameter measuring the impact
  of weekly claims for each loan is state-specific.
\end{itemize}

All right-hand-side variables are standardized by subtracting the
mean and dividing by the standard deviation. In addition, all the
categorical variables are encoded as dummmy variables with the first
level of the category being treated as the reference category. This
simplifies the interpretation of the model coefficients. The intercept
term for each state can then be used to compute the probability of
deferment for a ``reference'' loan. 

In our case, this loan can 
be defined as a \textbf{Grade 1} loan, with the following additional characterstics:\,
\textbf{FICO:} <%print(f'{data_scaler_dict["mu"]["fico"]:.2f}')%>, \,
\textbf{Original Balance:} \$<%print(f'{data_scaler_dict["mu"]["original_balance"]:,.2f}')%>, \,
\textbf{DTI:} <%print(f'{(data_scaler_dict["mu"]["dti"]):.2f}')%>\%, \,
\textbf{Monthly Income:} \$<%print(f'{data_scaler_dict["mu"]["stated_monthly_income"]:,.2f}')%>, \,
\textbf{Age:} <%print(f'{data_scaler_dict["mu"]["age"]:.2f}')%> months. 

It is further assumed
that the borrower used the loan proceeds for \textbf{consolidating debt},
that the borrower was either \textbf{employed} at the time of loan application
or employment history information was available at the application date, 
the \textbf{amortization term} of the loan was 3 years, and that the borrower 
\textbf{owned} their home. In effect, for a loan that matches these attributes, 
the $x$ term is a zero matrix, leaving the intercept as the only
term on the right-hand side. 

As a final iteration on the basic model, we have an originator-specific
``frailty'' term that multiplies the hazard for each observation, scaling
it up or down. This is the $\gamma$ term in the hierarchical model illustration.

\subsection{Priors}
A combination of ``generic weakly informative priors'' (to use the language
employed in the 
\href{https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations}{STAN Wiki})
and domain knowledge guided our choice of priors for both models. In particular, 
different values of the variance priors generated very different shapes for the prior-predictive
distribution of the outcome variable. Our knowledge about processing
capacity for deferment requests at the servicers allowed us to think
of thresholds above which the predictive distribution tail values did
not make sense.   

\begin{figure}
\caption{Prior Predictive Distributions}
\label{fig:prior_predictive_dist}
<<prior_predictive_dist, echo=False >>=
fig, ax = plt.subplots(1, 2, figsize=(10, 5), sharex=True)

pooled_prior = pm.sample_prior_predictive(model=out_dict["pooled"]["model"], random_seed=12345)
sns.distplot([x.mean() for x in pooled_prior["yobs"].mean(axis=1)], ax=ax[0], label="Pooled")

hier_prior = pm.sample_prior_predictive(model=out_dict["hier"]["model"], random_seed=12345)
sns.distplot([x for x in hier_prior["yobs"].mean(axis=1)], ax=ax[1], label="Hierarchical")
for i in ax:
    i.set_xlabel("Poisson λ")
    i.set_ylabel("Frequency")
    i.axvline(
        out_dict["pooled"]["s_3_df"][dep_var].mean(), color="tab:red", 
        label="Sample μ"
    )
    i.axvline(
        np.array([x.mean() for x in (hier_prior["yobs"].mean(axis=1))]).mean(), 
        color="tab:blue", label="Prior μ"
    )
    i.legend()
@
\end{figure}

\begin{comment}
The final model specifications are presented in Tables~\ref{tbl:pooled_model_latex} 
and~\ref{tbl:hier_model_latex}.

\begin{table}
\centering
\caption{Pooled Model}
\label{tbl:pooled_model_latex}
\scalebox{0.8}{
<<pooled_model_latex, echo=False, results="tex">>=
print(out_dict["pooled"]["model"].__latex__())
@
}
\end{table}

\begin{table}
\centering
\caption{Hierarchical Model}
\label{tbl:hier_model_latex}
\scalebox{0.8}{
<<hier_model_latex, echo=False, results="tex">>=
print(out_dict["hier"]["model"].__latex__())
@
}
\end{table}
\end{comment}

\bibliographystyle{plainnat}
\bibliography{corona}

\end{document}