Merge branch 'master' of github.com:monash-emu/AuTuMN

monash-emu · Aug 14, 2023 · 9178009 · 9178009
2 parents 0c162ee + 525f581
commit 9178009
Show file tree

Hide file tree

Showing 23 changed files with 580 additions and 362 deletions.
diff --git a/autumn/projects/sm_covid2/common_school/calibration.py b/autumn/projects/sm_covid2/common_school/calibration.py
@@ -66,25 +66,24 @@ def get_bcm_object(iso3, analysis="main"):
             est.TruncatedNormalTarget(
                 "prop_ever_infected_age_matched",
                 sero_target.data,
-                trunc_range=[0., 1.],  # will reject runs with >10% attack rate before epidemic
-                stdev=.1
+                trunc_range=[0., 1.],
+                stdev=sero_target.stdev
             )
-            # est.BinomialTarget(
-            #     "prop_ever_infected_age_matched", 
-            #     sero_target.data, 
-            #     sample_sizes=sero_target.sample_sizes
-            # )
         )
 
     # Add a safeguard target to prevent a premature epidemic occurring before the first reported death
     # Early calibrations sometimes produced a rapid epidemic reaching 100% attack rate before the true epidemic start
-    zero_attack_rate_series = pd.Series([0.], index=[death_target_data.index[0]])
+    zero_series = pd.Series([0.], index=[death_target_data.index[0]]) #  could be any value, only the time index matters
+
+    def censored_func(modelled, data, parameters, time_weights):     
+        # Returns a very large negative number if modelled value is greater than 1%. Returns 0 otherwise.
+        return jnp.where(modelled > 0.01, -1.e11, 0.)[0]
+
     targets.append(
-        est.TruncatedNormalTarget(
-            "prop_ever_infected",
-            zero_attack_rate_series,
-            trunc_range=[0., 0.10],  # will reject runs with >10% attack rate before epidemic
-            stdev=1000.  # flat distribution
+        est.CustomTarget(
+            "prop_ever_infected", 
+            zero_series, # could be any value, only the time index matters
+            censored_func
         )
     )
 

diff --git a/autumn/projects/sm_covid2/common_school/output_plots/country_spec.py b/autumn/projects/sm_covid2/common_school/output_plots/country_spec.py
@@ -26,7 +26,7 @@ def update_rcparams():
             'axes.labelsize': "x-large",
             'xtick.labelsize': 'large',
             'ytick.labelsize': 'large',
-            'legend.fontsize': 'large',
+            'legend.fontsize': 'medium',
             'legend.title_fontsize': 'large',
             'lines.linewidth': 1.,
 
@@ -54,7 +54,8 @@ def update_rcparams():
     "hospital_occupancy": "Hospital pressure",
     "icu_admissions": "daily ICU admissions",
     "icu_occupancy": "total ICU beds",
-    "prop_ever_infected": "ever infected with Delta or Omicron",
+    "prop_ever_infected": "ever infected",
+    "prop_ever_infected_age_matched": "Prop. ever infected\n(age-matched)",
 
     "transformed_random_process": "Transformed random process",
 
@@ -70,8 +71,8 @@ def update_rcparams():
 )
 
 SCHOOL_COLORS = {
-    'partial': 'azure',
-    'full': 'thistle'
+    'partial': 'silver', #  'azure',
+    'full': 'gold' # 'thistle'
 }
 
 def y_fmt(tick_val, pos):
@@ -109,7 +110,7 @@ def add_school_closure_patches(ax, iso3, ymax, school_colors=SCHOOL_COLORS):
     ax.vlines(closed_dates_str, ymin=0, ymax=ymax, lw=1, alpha=1, color=school_colors['full'], zorder = 1)
 
 
-def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3):
+def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3, include_legend=True):
 
     bcm = get_bcm_object(iso3, "main")
 
@@ -120,7 +121,7 @@ def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3):
     if output_name in bcm.targets:
         t = bcm.targets[output_name].data
         t.index = ref_times_to_dti(REF_DATE, t.index)
-        axis.scatter(list(t.index), t, marker=".", color='black', label='observations', zorder=11, s=3.)
+        axis.scatter(list(t.index), t, marker=".", color='black', label='observations', zorder=11, s=5.)
 
     colour = unc_sc_colours[0]      
 
@@ -151,14 +152,18 @@ def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3):
 
     # axis.tick_params(axis="x", labelrotation=45)
     title = output_name if output_name not in title_lookup else title_lookup[output_name]
+    if output_name == "prop_ever_infected_age_matched" and output_name not in bcm.targets:
+        title = "Prop. ever infected"
+
     axis.set_ylabel(title)
     plt.tight_layout()
 
-    plt.legend(markerscale=2.)
+    if include_legend:
+        plt.legend(markerscale=2.)
     axis.yaxis.set_major_formatter(tick.FuncFormatter(y_fmt))
 
 
-def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=False):
+def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=False, include_legend=True):
     # update_rcparams()
 
     ymax = 0.
@@ -176,6 +181,7 @@ def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=Fals
                 time, 
                 df[df["quantile"] == .25]['value'], df[df["quantile"] == .75]['value'], 
                 color=colour, alpha=0.7, 
+                edgecolor=None,
                 # label=interval_label,
                 zorder=scenario_zorder
             )
@@ -194,7 +200,8 @@ def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=Fals
     # axis.set_xlim((model_start, model_end))
     axis.set_ylim((0, plot_ymax))
 
-    axis.legend()
+    if include_legend:
+        axis.legend()
 
     axis.yaxis.set_major_formatter(tick.FuncFormatter(y_fmt))
 
@@ -333,23 +340,25 @@ def make_country_output_tiling(iso3, uncertainty_df, diff_quantiles_df, output_f
     right_grid = inner_grid[0, 1]  # will contain final size plots
 
     #### Split left panel into 3 panels
-    inner_left_grid = gridspec.GridSpecFromSubplotSpec(3, 1, subplot_spec=left_grid, hspace=.15, height_ratios=(1, 1, 1))
-    # calibration
+    inner_left_grid = gridspec.GridSpecFromSubplotSpec(4, 1, subplot_spec=left_grid, hspace=.15, height_ratios=(1, 1, 1, 1))
+    # calibration, deaths
     ax2 = fig.add_subplot(inner_left_grid[0, 0])
     plot_model_fit_with_uncertainty(ax2, uncertainty_df, "infection_deaths", iso3)
     format_date_axis(ax2)
     remove_axes_box(ax2)
-    # plt.setp(ax2.get_xticklabels(), visible=False)
+    # seropos prop over time
+    ax_sero = fig.add_subplot(inner_left_grid[1, 0])
+    plot_model_fit_with_uncertainty(ax_sero, uncertainty_df, "prop_ever_infected_age_matched", iso3, include_legend=False)
+    format_date_axis(ax_sero)
+    remove_axes_box(ax_sero)
     # scenario compare deaths
-    ax3 = fig.add_subplot(inner_left_grid[1, 0]) #, sharex=ax2)
+    ax3 = fig.add_subplot(inner_left_grid[2, 0]) #, sharex=ax2)
     plot_two_scenarios(ax3, uncertainty_df, "infection_deaths", iso3, True)
     format_date_axis(ax3)
     remove_axes_box(ax3)
-
-    # plt.setp(ax3.get_xticklabels(), visible=False)
     # scenario compare hosp
-    ax4 = fig.add_subplot(inner_left_grid[2, 0])  #, sharex=ax2)
-    plot_two_scenarios(ax4, uncertainty_df, "hospital_occupancy", iso3, True)
+    ax4 = fig.add_subplot(inner_left_grid[3, 0])  #, sharex=ax2)
+    plot_two_scenarios(ax4, uncertainty_df, "hospital_occupancy", iso3, True, include_legend=False)
     format_date_axis(ax4)
     remove_axes_box(ax4)
 
@@ -419,3 +428,18 @@ def make_country_output_tiling(iso3, uncertainty_df, diff_quantiles_df, output_f
     fig.savefig(os.path.join(output_folder, "tiling.pdf"), facecolor="white")
 
     plt.close()
+
+
+def test_tiling_plot():
+    from pathlib import Path
+    import pandas as pd
+
+    directory = Path.cwd() / "autumn" / "projects" / "sm_covid2" / "common_school" / "output_plots" / "test_tiling_plot"
+
+    iso3 = "FRA"
+    uncertainty_df = pd.read_parquet(directory / "uncertainty_df.parquet")
+    diff_quantiles_df = pd.read_parquet(directory / "diff_quantiles_df.parquet")
+    output_folder = directory
+    make_country_output_tiling(iso3, uncertainty_df, diff_quantiles_df, output_folder)
+
+# test_tiling_plot()
diff --git a/autumn/projects/sm_covid2/common_school/project_maker.py b/autumn/projects/sm_covid2/common_school/project_maker.py
@@ -14,7 +14,7 @@
 )
 from autumn.calibration import Calibration
 from autumn.calibration.priors import UniformPrior
-from autumn.calibration.targets import NegativeBinomialTarget, BinomialTarget
+from autumn.calibration.targets import NegativeBinomialTarget, BinomialTarget, TruncNormalTarget
 from autumn.models.sm_covid2 import get_base_params, build_model
 from autumn.model_features.jax.random_process import set_up_random_process
 from autumn.settings import Region, Models
@@ -58,7 +58,7 @@
 def get_school_project(iso3, analysis="main"):
 
     # read seroprevalence data (needed to specify the sero age range params and then to define the calibration targets)
-    positive_prop, adjusted_sample_size, midpoint_as_int, sero_age_min, sero_age_max = get_sero_estimate(iso3)
+    positive_prop, sero_target_sd, midpoint_as_int, sero_age_min, sero_age_max = get_sero_estimate(iso3)
 
     # Load timeseries
     timeseries = get_school_project_timeseries(iso3, sero_data={
@@ -97,10 +97,11 @@ def get_school_project(iso3, analysis="main"):
 
     if positive_prop is not None:
         targets.append(
-            BinomialTarget(
-                data=pd.Series(data=[positive_prop], index=[midpoint_as_int], name="prop_ever_infected_age_matched"), 
-                sample_sizes = pd.Series(data=[adjusted_sample_size], index=[midpoint_as_int])
-            )
+            TruncNormalTarget(
+                data=pd.Series(data=[positive_prop], index=[midpoint_as_int], name="prop_ever_infected_age_matched"),
+                trunc_range=(0., 1.),
+                stdev=sero_target_sd
+            )          
         )
 
     # set up random process if relevant
@@ -392,13 +393,13 @@ def get_sero_estimate(iso3):
 
     country_data = df[df['alpha_3_code'] == iso3].to_dict(orient="records")[0]
 
-    # work out the adjusted sample size, accounting for survey sample size, risk of bias and geographic level (national/subnational)
-    bias_risk_adjustment = {
-        0: 1./3., # high risk of bias
-        1: 2./3., # moderate risk of bias
-        2: 1., # low risk of bias
-    }
-    adjusted_sample_size = round(country_data['denominator_value'] * bias_risk_adjustment[country_data['overall_risk_of_bias']])
+    # work out the target's standard deviation according to the risk of bias
+    sero_target_sd = {
+        0: .2, # high risk of bias
+        1: .1, # moderate risk of bias
+        2: .05, # low risk of bias
+    }[country_data['overall_risk_of_bias']]
+    # adjusted_sample_size = round(country_data['denominator_value'] * bias_risk_adjustment[country_data['overall_risk_of_bias']])
 
     # work out the survey midpoint
     start_date = datetime.fromisoformat(country_data['sampling_start_date'])
@@ -407,4 +408,4 @@ def get_sero_estimate(iso3):
     midpoint = start_date + (end_date - start_date) / 2
     midpoint_as_int = (midpoint - datetime(2019, 12, 31)).days
 
-    return country_data["serum_pos_prevalence"], adjusted_sample_size, midpoint_as_int, country_data['age_min'], country_data['age_max']
+    return country_data["serum_pos_prevalence"], sero_target_sd, midpoint_as_int, country_data['age_min'], country_data['age_max']
diff --git a/data/inputs/school-closure/serodata_national.csv b/data/inputs/school-closure/serodata_national.csv
diff --git a/docs/tex/tex_descriptions/models/sm_covid/code.tex b/docs/tex/tex_descriptions/models/sm_covid/code.tex
@@ -0,0 +1,9 @@
+\section{Software and code used to conduct the analyses}
+
+\subsection{Code}
+The Python code and data used to perform the analyses is fully available on Github at the following link:
+\href{https://github.com/monash-emu/AuTuMN}{https://github.com/monash-emu/AuTuMN}. In particular, \textcolor{red}{LIST LINKS TO SOME SUB-MODULES HERE}.
+
+
+\subsection{Software implementation}
+\input{../../tex_descriptions/models/sm_covid/runtime.tex}
diff --git a/docs/tex/tex_descriptions/models/sm_covid/deaths.tex b/docs/tex/tex_descriptions/models/sm_covid/deaths.tex
@@ -1,14 +1,14 @@
-We estimate the number of COVID-19 deaths over time using a similar approach 
-as for the hospital pressure indicator. We use the age-specific infection fatality rates reported in
-ODriscoll et al. \cite{odriscoll-2021}, adjusted for vaccination status and for the infecting strain
+We estimated the number of COVID-19 deaths over time using a similar approach 
+as for the hospital pressure indicator. We used the age-specific infection fatality rates reported in
+O'Driscoll et al. \cite{odriscoll-2021}, adjusted for vaccination status and for the infecting strain
 to estimate COVID-19 mortality. Using the same notations as in Section \ref{hosp}, the number of COVID-19
-deaths observed at time $t$ is obtained by:
+deaths observed at time $t$ was obtained by:
 \begin{equation}
 \mu(t) = m_C \sum_{a,v,s} ifr_{a,v,s} \int_{u \geq 0}  i_{a,v,s}(t-u)g_{d}(u) du   \quad,
 \end{equation}
 where $ifr_{a,v,s}$ is the risk of death given infection for age $a$, vaccination status $v$ and strain $s$, 
 and $g_d$ is the probability density function of the statistical distribution chosen to represent the 
-time from symptom onset to death (Table \ref{param_table}). We use a country-specific adjuster $m_C$ to 
+time from symptom onset to death (Table \ref{param_table}). We used the country-specific adjuster $m_C$ to 
 capture the fact that the infection fatality ratio is expected to vary by country, in part due to 
-differences in COVID-19 death definition and reporting standards. This adjustment is automatically calibrated 
+differences in COVID-19 death definition and reporting standards. This adjustment was automatically calibrated 
 by the MCMC (Section \ref{calibration}).
diff --git a/docs/tex/tex_descriptions/models/sm_covid/hospitalisations.tex b/docs/tex/tex_descriptions/models/sm_covid/hospitalisations.tex
@@ -1,19 +1,19 @@
-The risk of hospitalisation given infection is expected to vary dramatically by setting.
+The risk of hospitalisation given infection was expected to vary markedly by setting.
 For example, different countries may have different criteria for whether or not a COVID-19 
 patient should be admitted to a hospital. This makes it difficult to provide accurate 
 estimates of hospitalisation rates for multiple countries. 
 
-This is why we introduce a universal indicator named ``hospital pressure'' in our analysis. This indicator
-is obtained by considering the age-specific risk of hospitalisation given infection observed in the first year
+For this reason we introduced a universal indicator named ``hospital pressure'' in our analysis. This indicator
+was obtained by considering the age-specific risk of hospitalisation given infection observed in the first year
 of the pandemic in the Netherlands, adjusted for vaccination status and for the infecting strain (Table \ref{param_table}).
 The ``hospital pressure'' indicator can therefore be interpreted as the level of hospital occupancy that
 would be observed in the analysed country if the rates of hospitalisation given infection in this country were the same
-as in the Netherlands. This indicator is expected to be roughly proportional to the actual hospital occupancy level of the 
-studied country. Note that this indicator is used in order to make comparisons between scenarios such that one should interpret the relative
+as for the Netherlands. This quantity is expected to vary proportionately with occupancy over time, providing an indicator of 
+hospital pressure. Note that this indicator was used in order to make comparisons between scenarios, such that one should interpret the relative
 differences between scenarios rather than the absolute values of the indicator. 
 
 Let us denote $i_{a,v,s}(t)$ the number of new disease episodes estimated to start at time $t$ for people aged $a$ with vaccination status $v$
-and infected with strain $s$. The number of new hospital admissions occurring at time $t$ is calculated using the following
+and infected with strain $s$. The number of new hospital admissions occurring at time $t$ was calculated using the following
 convolution product:
 \begin{equation}
  \eta(t) = \sum_{a,v,s} \kappa_{a,v,s} \int_{u \geq 0}  i_{a,v,s}(t-u)g_{h}(u) du   \quad,
@@ -22,7 +22,7 @@
 based on the Netherlands data, and $g_h$ is the probability density function of the statistical distribution chosen to represent the 
 time from symptom onset to hospitalisation (Table \ref{param_table}). 
 
-We then compute the ``hospital pressure'' quantity $h$, which is an indicator of hospital occupancy level, by combining the number of new 
+We then computed the ``hospital pressure'' quantity $h$, which is an indicator of hospital occupancy level, by combining the number of new 
 hospital admissions $\eta$ with the statistical distribution used to model hospital stay duration:
 \begin{equation}
 h(t) = \int_{u \geq 0}  \eta(t-u) (1 - \tau(u)) du   \quad,