Skip to content

Commit

Permalink
Revise James demographics notebook to multiple year data
Browse files Browse the repository at this point in the history
  • Loading branch information
jtrauer committed Nov 9, 2023
1 parent 38082cb commit c3bc84e
Showing 1 changed file with 61 additions and 237 deletions.
298 changes: 61 additions & 237 deletions notebooks/user/vbui/demo_jt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -14,257 +14,81 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Values in 1952, queried from the autumn database, hard-coded\n",
"death_rates = pd.Series(\n",
" [0.048912, 0.005655, 0.003052, 0.009387, 0.0141716], \n",
" index=[0, 5, 15, 35, 50]\n",
")"
"years = [1952.5, 1957.5, 1962.5, 1967.5, 1972.5, 1977.5, 1982.5, 1987.5, 1992.5, 1997.5, 2002.5, 2007.5, 2012.5, 2017.5]\n",
"death_rate_data = {\n",
" 0: [0.04891194320512563, 0.033918668027576884, 0.024432727316924446, 0.019408181689238014, 0.018148370056426787, 0.014535299525724738, 0.013035498777442808, 0.011254176500418787, 0.00931061542295055, 0.006118489529305104, 0.005678373734277731, 0.005333479069860507, 0.004783694403203202, 0.0043347634430972014],\n",
" 5: [0.005654833192668025, 0.005297274566250849, 0.0044664893264966974, 0.003846307644222109, 0.005107979016750706, 0.002711430406000275, 0.002147435231479338, 0.0016691038222857585, 0.0013164357375949395, 0.0010181764308804923, 0.0007447183497013854, 0.0005658632755602, 0.0005441317519979431, 0.0005100562239187423],\n",
" 15: [0.0030517485729701097, 0.002700755057428503, 0.002489838533286184, 0.002584612327589561, 0.004088806859043201, 0.0023927769572424534, 0.002209771395787853, 0.001885328047816437, 0.0016581044683420282, 0.0015277277213098862, 0.001427063312059633, 0.0012953587483924324, 0.0012356506891999784, 0.0011729827102219131],\n",
" 35: [0.009386682178966717, 0.007892734017957679, 0.006517135357865979, 0.006260889686458326, 0.008689175571776193, 0.00479116361370093, 0.004179181883878279, 0.004093556688069908, 0.004031166261049662, 0.003907751109471642, 0.0034886248607591164, 0.003155553373359392, 0.003040343537587886, 0.002980208830203494],\n",
" 50: [0.1417162315914261, 0.14004771975249025, 0.14050114600763583, 0.13991314779584452, 0.1446541208480976, 0.12199942887738785, 0.11497764682078969, 0.10618998428462792, 0.09778759372213668, 0.09191279368157758, 0.08603053502488202, 0.08429645911423628, 0.08132411751977145, 0.08108759595140565]\n",
"}\n",
"raw_death_rates = pd.DataFrame(death_rate_data, index=years).transpose()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"upper_age = 100\n",
"ages = range(upper_age)"
"def adapt_death_rates_for_lifetable(\n",
" upper_age: int, \n",
" rates: pd.Series,\n",
") -> pd.Series:\n",
" \"\"\"Get the death rates applicable to each year of age.\n",
" \n",
" Args:\n",
" upper_age: The top year of age to consider\n",
" rates: The raw data for the death rates\n",
" \n",
" Returns:\n",
" The death rates by year of age\n",
" \n",
" \"\"\"\n",
" ages = range(upper_age)\n",
" revised_rates = pd.Series(index=ages)\n",
" for a in ages:\n",
" idx = next((i for i, age in enumerate(rates.index) if age > a), 0) # The zero is a trick to get the last element when one is subtracted\n",
" revised_rates.loc[a] = rates.iloc[idx - 1]\n",
" return revised_rates\n",
"\n",
"\n",
"def get_lifetable_from_rates(\n",
" rates: pd.Series,\n",
") -> pd.Series:\n",
" \"\"\"Calculate cohort sizes - note this only works for increments of one year\n",
" (intended to be used with adapt_death_rates_for_lifetable above).\n",
" \n",
" Args:\n",
" rates: Annual death rates from adapt_death_rates_for_lifetable\n",
" \n",
" Returns:\n",
" The life table\n",
" \n",
" \"\"\"\n",
" lifetable = pd.Series(index=rates.index)\n",
" cohort_size = 100000\n",
" for a in rates.index:\n",
" lifetable[a] = cohort_size\n",
" cohort_size -= cohort_size * age_rates.loc[a]\n",
" return lifetable"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get the death rates applicable to each age\n",
"life_table = pd.DataFrame(index=ages, columns=['death_rate'])\n",
"for a in ages:\n",
" idx = next((i for i, age in enumerate(death_rates.index) if age > a), 0) # The zero is a trick to get the last element when one is subtracted\n",
" life_table.loc[a, 'death_rate'] = death_rates.iloc[idx - 1]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>qx</th>\n",
" <th>lx</th>\n",
" <th>dx</th>\n",
" <th>μx</th>\n",
" <th>Tx</th>\n",
" <th>e0x</th>\n",
" <th>Lx</th>\n",
" <th>ex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0.048912</td>\n",
" <td>100000.000000</td>\n",
" <td>4891.200000</td>\n",
" <td>0.050606</td>\n",
" <td>5.774515e+06</td>\n",
" <td>57.245149</td>\n",
" <td>5.674515e+06</td>\n",
" <td>56.745149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0.048912</td>\n",
" <td>95108.800000</td>\n",
" <td>4651.961626</td>\n",
" <td>0.050170</td>\n",
" <td>5.674515e+06</td>\n",
" <td>59.163405</td>\n",
" <td>5.579406e+06</td>\n",
" <td>58.663405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>0.048912</td>\n",
" <td>90456.838374</td>\n",
" <td>4424.424879</td>\n",
" <td>0.050170</td>\n",
" <td>5.579406e+06</td>\n",
" <td>61.180312</td>\n",
" <td>5.488949e+06</td>\n",
" <td>60.680312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>0.048912</td>\n",
" <td>86032.413496</td>\n",
" <td>4208.017409</td>\n",
" <td>0.050170</td>\n",
" <td>5.488949e+06</td>\n",
" <td>63.300944</td>\n",
" <td>5.402917e+06</td>\n",
" <td>62.800944</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>0.048912</td>\n",
" <td>81824.396087</td>\n",
" <td>4002.194861</td>\n",
" <td>0.050170</td>\n",
" <td>5.402917e+06</td>\n",
" <td>65.530635</td>\n",
" <td>5.321092e+06</td>\n",
" <td>65.030635</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>95</td>\n",
" <td>0.014172</td>\n",
" <td>31589.631852</td>\n",
" <td>447.675627</td>\n",
" <td>0.014273</td>\n",
" <td>1.535344e+05</td>\n",
" <td>4.360278</td>\n",
" <td>1.219448e+05</td>\n",
" <td>3.860278</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>96</td>\n",
" <td>0.014172</td>\n",
" <td>31141.956225</td>\n",
" <td>441.331347</td>\n",
" <td>0.014273</td>\n",
" <td>1.219448e+05</td>\n",
" <td>3.415771</td>\n",
" <td>9.080281e+04</td>\n",
" <td>2.915771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>97</td>\n",
" <td>0.014172</td>\n",
" <td>30700.624878</td>\n",
" <td>435.076976</td>\n",
" <td>0.014273</td>\n",
" <td>9.080281e+04</td>\n",
" <td>2.457686</td>\n",
" <td>6.010218e+04</td>\n",
" <td>1.957686</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>98</td>\n",
" <td>0.014172</td>\n",
" <td>30265.547903</td>\n",
" <td>428.911239</td>\n",
" <td>0.014273</td>\n",
" <td>6.010218e+04</td>\n",
" <td>1.485828</td>\n",
" <td>2.983664e+04</td>\n",
" <td>0.985828</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>99</td>\n",
" <td>0.014172</td>\n",
" <td>29836.636664</td>\n",
" <td>29836.636664</td>\n",
" <td>0.507188</td>\n",
" <td>2.983664e+04</td>\n",
" <td>0.500000</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" age qx lx dx μx Tx \\\n",
"0 0 0.048912 100000.000000 4891.200000 0.050606 5.774515e+06 \n",
"1 1 0.048912 95108.800000 4651.961626 0.050170 5.674515e+06 \n",
"2 2 0.048912 90456.838374 4424.424879 0.050170 5.579406e+06 \n",
"3 3 0.048912 86032.413496 4208.017409 0.050170 5.488949e+06 \n",
"4 4 0.048912 81824.396087 4002.194861 0.050170 5.402917e+06 \n",
".. ... ... ... ... ... ... \n",
"95 95 0.014172 31589.631852 447.675627 0.014273 1.535344e+05 \n",
"96 96 0.014172 31141.956225 441.331347 0.014273 1.219448e+05 \n",
"97 97 0.014172 30700.624878 435.076976 0.014273 9.080281e+04 \n",
"98 98 0.014172 30265.547903 428.911239 0.014273 6.010218e+04 \n",
"99 99 0.014172 29836.636664 29836.636664 0.507188 2.983664e+04 \n",
"\n",
" e0x Lx ex \n",
"0 57.245149 5.674515e+06 56.745149 \n",
"1 59.163405 5.579406e+06 58.663405 \n",
"2 61.180312 5.488949e+06 60.680312 \n",
"3 63.300944 5.402917e+06 62.800944 \n",
"4 65.530635 5.321092e+06 65.030635 \n",
".. ... ... ... \n",
"95 4.360278 1.219448e+05 3.860278 \n",
"96 3.415771 9.080281e+04 2.915771 \n",
"97 2.457686 6.010218e+04 1.957686 \n",
"98 1.485828 2.983664e+04 0.985828 \n",
"99 0.500000 0.000000e+00 0.000000 \n",
"\n",
"[100 rows x 9 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Calculate cohort sizes - note this only works for increments of one year\n",
"cohort_series = pd.Series(index=ages)\n",
"cohort_size = 100000\n",
"for a in ages:\n",
" cohort_series[a] = cohort_size\n",
" cohort_size -= cohort_size * life_table.loc[a, 'death_rate']"
"all_rates = pd.DataFrame(columns=years)\n",
"lifetables = pd.DataFrame(columns=years)\n",
"for year in years:\n",
" all_rates[year] = adapt_death_rates_for_lifetable(100, raw_death_rates[year])\n",
" lifetables[year] = get_lifetable_from_rates(all_rates[year])"
]
},
{
Expand All @@ -273,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"cohort_series.plot(labels={'value': 'survivng', 'index': 'age'}, height=500, title='life table').update_layout(showlegend=False)"
"lifetable.plot(labels={'value': 'survivng', 'index': 'age'}, height=500, title='life table').update_layout(showlegend=False)"
]
}
],
Expand All @@ -293,7 +117,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down

0 comments on commit c3bc84e

Please sign in to comment.