Revise James demographics notebook to multiple year data

monash-emu · Nov 9, 2023 · c3bc84e · c3bc84e
1 parent 38082cb
commit c3bc84e
Showing 1 changed file with 61 additions and 237 deletions.
diff --git a/notebooks/user/vbui/demo_jt.ipynb b/notebooks/user/vbui/demo_jt.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,257 +14,81 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Values in 1952, queried from the autumn database, hard-coded\n",
-    "death_rates = pd.Series(\n",
-    "    [0.048912, 0.005655, 0.003052, 0.009387, 0.0141716], \n",
-    "    index=[0, 5, 15, 35, 50]\n",
-    ")"
+    "years = [1952.5, 1957.5, 1962.5, 1967.5, 1972.5, 1977.5, 1982.5, 1987.5, 1992.5, 1997.5, 2002.5, 2007.5, 2012.5, 2017.5]\n",
+    "death_rate_data = {\n",
+    "    0: [0.04891194320512563, 0.033918668027576884, 0.024432727316924446, 0.019408181689238014, 0.018148370056426787, 0.014535299525724738, 0.013035498777442808, 0.011254176500418787, 0.00931061542295055, 0.006118489529305104, 0.005678373734277731, 0.005333479069860507, 0.004783694403203202, 0.0043347634430972014],\n",
+    "    5: [0.005654833192668025, 0.005297274566250849, 0.0044664893264966974, 0.003846307644222109, 0.005107979016750706, 0.002711430406000275, 0.002147435231479338, 0.0016691038222857585, 0.0013164357375949395, 0.0010181764308804923, 0.0007447183497013854, 0.0005658632755602, 0.0005441317519979431, 0.0005100562239187423],\n",
+    "    15: [0.0030517485729701097, 0.002700755057428503, 0.002489838533286184, 0.002584612327589561, 0.004088806859043201, 0.0023927769572424534, 0.002209771395787853, 0.001885328047816437, 0.0016581044683420282, 0.0015277277213098862, 0.001427063312059633, 0.0012953587483924324, 0.0012356506891999784, 0.0011729827102219131],\n",
+    "    35: [0.009386682178966717, 0.007892734017957679, 0.006517135357865979, 0.006260889686458326, 0.008689175571776193, 0.00479116361370093, 0.004179181883878279, 0.004093556688069908, 0.004031166261049662, 0.003907751109471642, 0.0034886248607591164, 0.003155553373359392, 0.003040343537587886, 0.002980208830203494],\n",
+    "    50: [0.1417162315914261, 0.14004771975249025, 0.14050114600763583, 0.13991314779584452, 0.1446541208480976, 0.12199942887738785, 0.11497764682078969, 0.10618998428462792, 0.09778759372213668, 0.09191279368157758, 0.08603053502488202, 0.08429645911423628, 0.08132411751977145, 0.08108759595140565]\n",
+    "}\n",
+    "raw_death_rates = pd.DataFrame(death_rate_data, index=years).transpose()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "upper_age = 100\n",
-    "ages = range(upper_age)"
+    "def adapt_death_rates_for_lifetable(\n",
+    "    upper_age: int, \n",
+    "    rates: pd.Series,\n",
+    ") -> pd.Series:\n",
+    "    \"\"\"Get the death rates applicable to each year of age.\n",
+    "    \n",
+    "    Args:\n",
+    "        upper_age: The top year of age to consider\n",
+    "        rates: The raw data for the death rates\n",
+    "    \n",
+    "    Returns:\n",
+    "        The death rates by year of age\n",
+    "        \n",
+    "    \"\"\"\n",
+    "    ages = range(upper_age)\n",
+    "    revised_rates = pd.Series(index=ages)\n",
+    "    for a in ages:\n",
+    "        idx = next((i for i, age in enumerate(rates.index) if age > a), 0)  # The zero is a trick to get the last element when one is subtracted\n",
+    "        revised_rates.loc[a] = rates.iloc[idx - 1]\n",
+    "    return revised_rates\n",
+    "\n",
+    "\n",
+    "def get_lifetable_from_rates(\n",
+    "    rates: pd.Series,\n",
+    ") -> pd.Series:\n",
+    "    \"\"\"Calculate cohort sizes - note this only works for increments of one year\n",
+    "    (intended to be used with adapt_death_rates_for_lifetable above).\n",
+    "    \n",
+    "    Args:\n",
+    "        rates: Annual death rates from adapt_death_rates_for_lifetable\n",
+    "    \n",
+    "    Returns:\n",
+    "        The life table\n",
+    "        \n",
+    "    \"\"\"\n",
+    "    lifetable = pd.Series(index=rates.index)\n",
+    "    cohort_size = 100000\n",
+    "    for a in rates.index:\n",
+    "        lifetable[a] = cohort_size\n",
+    "        cohort_size -= cohort_size * age_rates.loc[a]\n",
+    "    return lifetable"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Get the death rates applicable to each age\n",
-    "life_table = pd.DataFrame(index=ages, columns=['death_rate'])\n",
-    "for a in ages:\n",
-    "    idx = next((i for i, age in enumerate(death_rates.index) if age > a), 0)  # The zero is a trick to get the last element when one is subtracted\n",
-    "    life_table.loc[a, 'death_rate'] = death_rates.iloc[idx - 1]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>age</th>\n",
-       "      <th>qx</th>\n",
-       "      <th>lx</th>\n",
-       "      <th>dx</th>\n",
-       "      <th>μx</th>\n",
-       "      <th>Tx</th>\n",
-       "      <th>e0x</th>\n",
-       "      <th>Lx</th>\n",
-       "      <th>ex</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0.048912</td>\n",
-       "      <td>100000.000000</td>\n",
-       "      <td>4891.200000</td>\n",
-       "      <td>0.050606</td>\n",
-       "      <td>5.774515e+06</td>\n",
-       "      <td>57.245149</td>\n",
-       "      <td>5.674515e+06</td>\n",
-       "      <td>56.745149</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0.048912</td>\n",
-       "      <td>95108.800000</td>\n",
-       "      <td>4651.961626</td>\n",
-       "      <td>0.050170</td>\n",
-       "      <td>5.674515e+06</td>\n",
-       "      <td>59.163405</td>\n",
-       "      <td>5.579406e+06</td>\n",
-       "      <td>58.663405</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>0.048912</td>\n",
-       "      <td>90456.838374</td>\n",
-       "      <td>4424.424879</td>\n",
-       "      <td>0.050170</td>\n",
-       "      <td>5.579406e+06</td>\n",
-       "      <td>61.180312</td>\n",
-       "      <td>5.488949e+06</td>\n",
-       "      <td>60.680312</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "      <td>0.048912</td>\n",
-       "      <td>86032.413496</td>\n",
-       "      <td>4208.017409</td>\n",
-       "      <td>0.050170</td>\n",
-       "      <td>5.488949e+06</td>\n",
-       "      <td>63.300944</td>\n",
-       "      <td>5.402917e+06</td>\n",
-       "      <td>62.800944</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>4</td>\n",
-       "      <td>0.048912</td>\n",
-       "      <td>81824.396087</td>\n",
-       "      <td>4002.194861</td>\n",
-       "      <td>0.050170</td>\n",
-       "      <td>5.402917e+06</td>\n",
-       "      <td>65.530635</td>\n",
-       "      <td>5.321092e+06</td>\n",
-       "      <td>65.030635</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>95</th>\n",
-       "      <td>95</td>\n",
-       "      <td>0.014172</td>\n",
-       "      <td>31589.631852</td>\n",
-       "      <td>447.675627</td>\n",
-       "      <td>0.014273</td>\n",
-       "      <td>1.535344e+05</td>\n",
-       "      <td>4.360278</td>\n",
-       "      <td>1.219448e+05</td>\n",
-       "      <td>3.860278</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>96</th>\n",
-       "      <td>96</td>\n",
-       "      <td>0.014172</td>\n",
-       "      <td>31141.956225</td>\n",
-       "      <td>441.331347</td>\n",
-       "      <td>0.014273</td>\n",
-       "      <td>1.219448e+05</td>\n",
-       "      <td>3.415771</td>\n",
-       "      <td>9.080281e+04</td>\n",
-       "      <td>2.915771</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>97</th>\n",
-       "      <td>97</td>\n",
-       "      <td>0.014172</td>\n",
-       "      <td>30700.624878</td>\n",
-       "      <td>435.076976</td>\n",
-       "      <td>0.014273</td>\n",
-       "      <td>9.080281e+04</td>\n",
-       "      <td>2.457686</td>\n",
-       "      <td>6.010218e+04</td>\n",
-       "      <td>1.957686</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>98</th>\n",
-       "      <td>98</td>\n",
-       "      <td>0.014172</td>\n",
-       "      <td>30265.547903</td>\n",
-       "      <td>428.911239</td>\n",
-       "      <td>0.014273</td>\n",
-       "      <td>6.010218e+04</td>\n",
-       "      <td>1.485828</td>\n",
-       "      <td>2.983664e+04</td>\n",
-       "      <td>0.985828</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>99</th>\n",
-       "      <td>99</td>\n",
-       "      <td>0.014172</td>\n",
-       "      <td>29836.636664</td>\n",
-       "      <td>29836.636664</td>\n",
-       "      <td>0.507188</td>\n",
-       "      <td>2.983664e+04</td>\n",
-       "      <td>0.500000</td>\n",
-       "      <td>0.000000e+00</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>100 rows × 9 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    age        qx             lx            dx        μx            Tx  \\\n",
-       "0     0  0.048912  100000.000000   4891.200000  0.050606  5.774515e+06   \n",
-       "1     1  0.048912   95108.800000   4651.961626  0.050170  5.674515e+06   \n",
-       "2     2  0.048912   90456.838374   4424.424879  0.050170  5.579406e+06   \n",
-       "3     3  0.048912   86032.413496   4208.017409  0.050170  5.488949e+06   \n",
-       "4     4  0.048912   81824.396087   4002.194861  0.050170  5.402917e+06   \n",
-       "..  ...       ...            ...           ...       ...           ...   \n",
-       "95   95  0.014172   31589.631852    447.675627  0.014273  1.535344e+05   \n",
-       "96   96  0.014172   31141.956225    441.331347  0.014273  1.219448e+05   \n",
-       "97   97  0.014172   30700.624878    435.076976  0.014273  9.080281e+04   \n",
-       "98   98  0.014172   30265.547903    428.911239  0.014273  6.010218e+04   \n",
-       "99   99  0.014172   29836.636664  29836.636664  0.507188  2.983664e+04   \n",
-       "\n",
-       "          e0x            Lx         ex  \n",
-       "0   57.245149  5.674515e+06  56.745149  \n",
-       "1   59.163405  5.579406e+06  58.663405  \n",
-       "2   61.180312  5.488949e+06  60.680312  \n",
-       "3   63.300944  5.402917e+06  62.800944  \n",
-       "4   65.530635  5.321092e+06  65.030635  \n",
-       "..        ...           ...        ...  \n",
-       "95   4.360278  1.219448e+05   3.860278  \n",
-       "96   3.415771  9.080281e+04   2.915771  \n",
-       "97   2.457686  6.010218e+04   1.957686  \n",
-       "98   1.485828  2.983664e+04   0.985828  \n",
-       "99   0.500000  0.000000e+00   0.000000  \n",
-       "\n",
-       "[100 rows x 9 columns]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Calculate cohort sizes - note this only works for increments of one year\n",
-    "cohort_series = pd.Series(index=ages)\n",
-    "cohort_size = 100000\n",
-    "for a in ages:\n",
-    "    cohort_series[a] = cohort_size\n",
-    "    cohort_size -= cohort_size * life_table.loc[a, 'death_rate']"
+    "all_rates = pd.DataFrame(columns=years)\n",
+    "lifetables = pd.DataFrame(columns=years)\n",
+    "for year in years:\n",
+    "    all_rates[year] = adapt_death_rates_for_lifetable(100, raw_death_rates[year])\n",
+    "    lifetables[year] = get_lifetable_from_rates(all_rates[year])"
    ]
   },
   {
@@ -273,7 +97,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cohort_series.plot(labels={'value': 'survivng', 'index': 'age'}, height=500, title='life table').update_layout(showlegend=False)"
+    "lifetable.plot(labels={'value': 'survivng', 'index': 'age'}, height=500, title='life table').update_layout(showlegend=False)"
    ]
   }
  ],
@@ -293,7 +117,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,