Merge branch 'master' of github.com:monash-emu/AuTuMN

monash-emu · Sep 19, 2023 · 7c427da · 7c427da
2 parents 187d199 + bfb9a30
commit 7c427da
Show file tree

Hide file tree

Showing 21 changed files with 1,147 additions and 297 deletions.
diff --git a/autumn/models/sm_covid2/params.yml b/autumn/models/sm_covid2/params.yml
@@ -39,7 +39,7 @@ seed_duration: 7
 
 sojourns:
   latent: 6.65  # wild-type
-  active: 6.5
+  active: 4.5
 
 mobility:
   region: null
@@ -168,8 +168,8 @@ voc_emergence:
     icu_risk_adjuster: 1.
     cross_protection:
       wild_type: 1.
-      delta: 0.68 # 32% immune escape   DOI: 10.1503/cmaj.211248
-      omicron: .45  # 55% immune escape  DOI: 10.1503/cmaj.211248
+      delta: 0.82  # https://doi.org/10.1016/S0140-6736(22)02465-5
+      omicron: 0.45 # https://doi.org/10.1016/S0140-6736(22)02465-5
   delta:
     starting_strain: false
     seed_prop: 0.
@@ -185,7 +185,7 @@ voc_emergence:
     cross_protection:
       wild_type: 1.
       delta: 1.
-      omicron: .45  # 55% immune escape  DOI: 10.1503/cmaj.211248
+      omicron: .45  # https://doi.org/10.1016/S0140-6736(22)02465-5
   omicron:
     starting_strain: false
     seed_prop: 0.

diff --git a/autumn/projects/sm_covid2/common_school/calibration_plots/opti_plots.py b/autumn/projects/sm_covid2/common_school/calibration_plots/opti_plots.py
@@ -172,13 +172,9 @@ def plot_model_fit_with_ll(bcm, params, outfile=None):
     return fig
 
 
-def plot_multiple_model_fits(bcm, params_list, outfile=None):
+def plot_model_fits(results_df, bcm, outfile=None):
     REF_DATE = datetime.date(2019,12,31)
 
-    n_samples = len(params_list)
-    cmap = plt.cm.get_cmap("plasma", n_samples)
-    colors = [cmap(i) for i in range(n_samples)]
-
     targets = {}
     for output in bcm.targets:
         t = copy(bcm.targets[output].data)
@@ -188,46 +184,22 @@ def plot_multiple_model_fits(bcm, params_list, outfile=None):
     fig, axs = plt.subplots(3, 1, figsize=(10, 10), height_ratios=(2., 1., 2.), sharex=True)
     death_ax, rp_ax, sero_ax = axs[0], axs[1], axs[2]
 
-    # set up the three axes
     death_ax.set_ylabel("COVID-19 deaths")   
+    results_df['infection_deaths_ma7'].plot(ax=death_ax)
+    targets["infection_deaths_ma7"].plot(style='.', ax=death_ax, label="", zorder=20, color='black')
+
     rp_ax.set_ylabel("Random process")
+    results_df['transformed_random_process'].plot(ax=rp_ax)
 
+    # Sero data
+    output = "prop_ever_infected_age_matched" if "prop_ever_infected_age_matched" in targets else "prop_ever_infected"
+    results_df[output].plot(ax=sero_ax)
     if "prop_ever_infected_age_matched" in targets:
         sero_ax.set_ylabel("Prop. ever infected\n(age-matched)")
         targets["prop_ever_infected_age_matched"].plot(style='.', ax=sero_ax, zorder=20, color='black')
     else:
         sero_ax.set_ylabel("Prop. ever infected")
 
-    for i, params in enumerate(params_list):
-        run_model = bcm.run(params)
-        # ll = bcm.loglikelihood(**params)  # not ideal...
-        # rounded_ll = round(ll, 2)
-
-        # Deaths
-        death_ax.plot(
-            list(run_model.derived_outputs["infection_deaths_ma7"].index), 
-            run_model.derived_outputs["infection_deaths_ma7"],
-            label=f"search {i}",  #: ll={rounded_ll}",
-            color=colors[i]
-        )
-
-        # Random Process
-        rp_ax.plot(
-            list(run_model.derived_outputs["transformed_random_process"].index),
-            run_model.derived_outputs["transformed_random_process"],
-            color=colors[i]
-        )
-
-        # Sero data
-        output = "prop_ever_infected_age_matched" if "prop_ever_infected_age_matched" in targets else "prop_ever_infected"
-        sero_ax.plot(
-            list(run_model.derived_outputs[output].index),
-            run_model.derived_outputs[output],
-            color=colors[i]
-        )
-
-    targets["infection_deaths_ma7"].plot(style='.', ax=death_ax, label="", zorder=20, color='black')
-
     # Post plotting processes
     # death
     death_ax.legend(loc='best', ncols=2)

diff --git a/autumn/projects/sm_covid2/common_school/runner_tools.py b/autumn/projects/sm_covid2/common_school/runner_tools.py
@@ -26,7 +26,7 @@
 from autumn.projects.sm_covid2.common_school.calibration import get_bcm_object
 from autumn.projects.sm_covid2.common_school.project_maker import get_school_project
 
-from autumn.projects.sm_covid2.common_school.calibration_plots.opti_plots import plot_opti_params, plot_model_fit, plot_multiple_model_fits
+from autumn.projects.sm_covid2.common_school.calibration_plots.opti_plots import plot_opti_params, plot_model_fit, plot_model_fits
 from autumn.projects.sm_covid2.common_school.calibration_plots.mc_plots import make_post_mc_plots
 
 from autumn.projects.sm_covid2.common_school.output_plots.country_spec import make_country_output_tiling
@@ -75,24 +75,6 @@
     Functions related to model calibration
 """
 
-def sample_with_lhs(n_samples, bcm):
-
-    # sample using LHS in the right dimension
-    lhs_sampled_params = [p for p in bcm.priors if p != "random_process.delta_values"]  
-    d = len(lhs_sampled_params)
-    sampler = qmc.LatinHypercube(d=d)
-    regular_sample = sampler.random(n=n_samples)
-
-    # scale the data cube to match parameter bounds
-    l_bounds = [bcm.priors[p].bounds()[0] for p in lhs_sampled_params]
-    u_bounds = [bcm.priors[p].bounds()[1] for p in lhs_sampled_params]
-    sample = qmc.scale(regular_sample, l_bounds, u_bounds)
-
-    sample_as_dicts = [{p: sample[i][j] for j, p in enumerate(lhs_sampled_params)} for i in range(n_samples)]
-
-    return sample_as_dicts
-
-
 def optimise_model_fit(bcm, num_workers: int = 8, warmup_iterations: int = 0, search_iterations: int = 5000, suggested_start: dict = None, opt_class=ng.optimizers.CMA):
 
     # Build optimizer
@@ -251,37 +233,36 @@ def run_full_analysis(
     # Sample optimisation starting points with LHS
     if logger:
         logger.info("Perform LHS sampling")
-    sample_as_dicts = sample_with_lhs(run_config['n_opti_searches'], bcm)
-
+    lhs_samples = bcm.sample.lhs(run_config['n_opti_searches'])
+    lhs_samples_as_dicts = lhs_samples.convert("list_of_dicts")
+
     # Store starting points
     with open(out_path / "LHS_init_points.yml", "w") as f:
-        yaml.dump(sample_as_dicts, f)
+        yaml.dump(lhs_samples_as_dicts, f)
 
     # Perform optimisation searches
     if logger:
         logger.info(f"Perform optimisation ({run_config['n_opti_searches']} searches)")
     n_opti_workers = 8
     def opti_func(sample_dict):
-        best_p, _ = optimise_model_fit(bcm, num_workers=n_opti_workers, search_iterations=run_config['opti_budget'], suggested_start=sample_dict)
+        suggested_start = {p: v for p, v in sample_dict.items() if p != 'random_process.delta_values'}
+        best_p, _ = optimise_model_fit(bcm, num_workers=n_opti_workers, search_iterations=run_config['opti_budget'], suggested_start=suggested_start)
         return best_p
 
-    best_params = map_parallel(opti_func, sample_as_dicts, n_workers=int(2 * run_config['n_cores'] / n_opti_workers))  # oversubscribing
+    best_params = map_parallel(opti_func, lhs_samples_as_dicts, n_workers=int(2 * run_config['n_cores'] / n_opti_workers))  # oversubscribing
     # Store optimal solutions
     with open(out_path / "best_params.yml", "w") as f:
         yaml.dump(best_params, f)
 
     if logger:
         logger.info("... optimisation completed")
-
-    # Keep only n_chains best solutions
-    loglikelihoods = [bcm.loglikelihood(**p) for p in best_params]
-    ll_cutoff = sorted(loglikelihoods, reverse=True)[run_config['n_chains'] - 1]
-
-    retained_init_points, retained_best_params = [], []
-    for init_sample, best_p, ll in zip(sample_as_dicts, best_params, loglikelihoods):
-        if ll >= ll_cutoff:
-            retained_init_points.append(init_sample)
-            retained_best_params.append(best_p)
+
+    # Keep only n_chains best solutions and plot optimised fits
+    best_outputs = esamp.model_results_for_samples(best_params, bcm, include_extras=True)
+    lle, results = best_outputs.extras, best_outputs.results
+    retained_indices = lle.sort_values("loglikelihood", ascending=False).index[0:run_config['n_chains']].to_list()    
+    retained_best_params = [best_params[i] for i in retained_indices]
+    retained_init_points = [lhs_samples_as_dicts[i] for i in retained_indices]
 
     # Store retained optimal solutions
     with open(out_path / "retained_best_params.yml", "w") as f:
@@ -291,7 +272,8 @@ def opti_func(sample_dict):
     plot_opti_params(retained_init_points, retained_best_params, bcm, output_folder)
 
     # Plot optimised model fits on a same figure
-    plot_multiple_model_fits(bcm, retained_best_params, out_path / "optimal_fits.png")
+    retained_results = results.loc[:, pd.IndexSlice[results.columns.get_level_values(1).isin(retained_indices), :]]
+    plot_model_fits(retained_results, bcm, out_path / "optimal_fits.png")
 
     # Early return if MCMC not requested
     if run_config['metropolis_draws'] == 0:

diff --git a/docs/tex/tex_descriptions/models/sm_covid/deaths.tex b/docs/tex/tex_descriptions/models/sm_covid/deaths.tex
@@ -1,6 +1,6 @@
 We estimated the number of COVID-19 deaths over time using a similar approach 
 as for the hospital pressure indicator. We used the age-specific infection fatality rates reported in
-O'Driscoll et al. \cite{odriscoll-2021}, adjusted for vaccination status and for the infecting strain
+O'Driscoll et al. \cite{odriscoll2021}, adjusted for vaccination status and for the infecting strain
 to estimate COVID-19 mortality. Using the same notations as in Section \ref{hosp}, the number of COVID-19
 deaths observed at time $t$ was obtained by:
 \begin{equation}

diff --git a/docs/tex/tex_descriptions/models/sm_covid/mixing.tex b/docs/tex/tex_descriptions/models/sm_covid/mixing.tex
@@ -31,7 +31,7 @@
 Reduced attendance at schools was represented through the function $s$, which represents the proportion of all school students 
 currently attending on-site teaching. If schools are fully closed at time $t$, \(s(t)=0\) and \(C_{S}\) does not contribute to the overall 
 mixing matrix \(C(t)\). 
-The function $s$ was derived from the UNESCO database on school closures from the start of the COVID-19 pandemic (\textcolor{red}{ADD REF HERE}).
+The function $s$ was derived from the UNESCO database on school closures from the start of the COVID-19 pandemic \cite{unesco2023}.
 This database provides school opening status over time as a categorical variable taking the following values: ``Fully open'', ``Partially open'', ``Academic break'', ``Closed due to COVID-19''.
 Table \ref{tab:unesco_categories} indicates how the different categorical values were converted into the numerical function $s$.
 
@@ -40,12 +40,12 @@
   \begin{center}
     \caption{Assumed percentage of students on-site for the different UNESCO school closure categories.}
     \label{tab:unesco_categories}
-    \begin{tabular}{p{5cm} | p{5cm}}
+    \begin{tabular}{p{5cm} | p{7cm}}
           \hline
-          \textbf{UNESCO category} & \textbf{Assumed proportion of students on-site ($s(t)$)} \\
+          \textbf{UNESCO category} & \textbf{Assumed proportion of students on-site at national level ($s(t)$)} \\
           \hline
           Fully open & 100\% \\
-          Partially open & 10-30\% \\
+          Partially open & 10-50\% \\
           Academic break & 0\% \\
           Closed due to COVID-19 & 0\% \\
           \hline
@@ -54,7 +54,7 @@
   \end{table}
 
 We included uncertainty around the value associated with the partial closure category, as there was no quantitative data available to 
-inform this parameter. The partial closure periods are likely to be periods where only a small fraction of students such as children of ``essential workers'' were 
+inform this parameter \cite{unesco2023}. The partial closure periods are likely to be periods where only a small fraction of students such as children of ``essential workers'' were 
 attending school. We assumed that between 10 and 30\% of students attended on-site learning during these periods.
 
 To model the counterfactual ``no school closure'' scenario, we assumed that the schools were ``Fully open'' during the

diff --git a/docs/tex/tex_descriptions/models/sm_covid/model_description.tex b/docs/tex/tex_descriptions/models/sm_covid/model_description.tex
@@ -5,7 +5,8 @@
 \section{Model description}
 
 \subsection{General approach}
-We use a semi-mechanistic compartmental model of COVID-19 transmission governed by ordinary differential equations (ODEs). 
+We use a semi-mechanistic compartmental model of COVID-19 transmission governed by ordinary differential equations (ODEs) to
+simulate country-specific COVID-19 epidemics during the first three years of the pandemic (2020-2022). 
 Our model captures important factors of COVID-19 transmission and disease such as age-specific characteristics, 
 heterogeneous mixing, vaccination and the emergence of different variants of concern. 
 The ODE-based model is used to capture only states relevant to transmission, whereas hospitalisations and deaths are
@@ -107,18 +108,9 @@ \subsubsection{Random transmission adjustment}
 \input{../../tex_descriptions/models/sm_covid/random_process.tex} 
 
 \subsubsection{Ordinary differential equations}
-\label{ODEs}
+\label{ODEs} 
 \input{../../tex_descriptions/models/sm_covid/odes.tex} 
 
-\subsubsection{Model parameters}
-\label{parameters}
-The model parameters and their values (or associated prior distributions) are listed in Table \ref{param_table}.
-\renewcommand{\arraystretch}{1.3}
-\input{../../tex_descriptions/projects/sm_covid/param_table.tex} 
-
-\renewcommand{\arraystretch}{1.3}
-\input{../../tex_descriptions/projects/sm_covid/agespec_table.tex} 
-
 % ____________________________________________________
 % Now, let's talk about the convolution processes
 %______________________________________________________
@@ -134,3 +126,7 @@ \subsubsection{COVID-19-related hospital pressure}
 \subsubsection{COVID-19 deaths}
 \input{../../tex_descriptions/models/sm_covid/deaths.tex} 
 
+
+\subsection{Model parameters}
+\label{parameters}
+\input{../../tex_descriptions/projects/sm_covid/model_parameters.tex}
diff --git a/docs/tex/tex_descriptions/models/sm_covid/odes.tex b/docs/tex/tex_descriptions/models/sm_covid/odes.tex
@@ -5,7 +5,7 @@
 the average duration of active disease is denoted $w$. The relative susceptibility to infection with strain $s$ of individuals aged $a$ with 
 vaccination status $v$ is denoted $\rho_{a,v,s}$. The term $b_{a,v,s}(t)$ designates the introduction of individuals of age $a$ and 
 with vaccination status $v$ that are infected with strain $s$ (infection seeding). Vaccination is characterised by the age-specific
-and time-variant per-capita vaccination rate $w_a$. Finally, $\chi_{s,\sigma}$ represents the relative susceptibility to infection
+and time-variant per-capita vaccination rate $\omega_a$. Finally, $\chi_{s,\sigma}$ represents the relative susceptibility to infection
 with strain $\sigma$ for individuals whose most recent infection episode was with strain $s$. Using this new notation combined 
 with those previously introduced, we can describe the model with the following set of ordinary differential equations:
 

diff --git a/docs/tex/tex_descriptions/models/sm_covid/stratifications/immunity.tex b/docs/tex/tex_descriptions/models/sm_covid/stratifications/immunity.tex
@@ -10,7 +10,7 @@
 are allocated. Note that in the event that the population-level vaccine coverage exceeds 80\%, the saturation coverage was set equal to the population-level coverage. 
 
 Let us consider two successive time points $t_i$ and $t_{i+1}$ for which vaccination data are available. Let us denote $r_{a, i}$ and $r_{a, i+1}$ the associated vaccine 
-coverage for age group $a$. The time-variant and age-specific vaccination rate per capita $w_a(t)$ verifies:
+coverage for age group $a$. The time-variant and age-specific vaccination rate per capita $\omega_a(t)$ verifies:
 
 \begin{equation}
     1 - r_{a, i+1} = (1 - r_{a, i})e^{-w_a(t)(t_{i+1} - t_i)} \quad, \forall t \in [t_i, t_{i+1}) .
@@ -19,7 +19,7 @@
 Then, 
 \begin{equation}
     \label{eq:vacc}
-    w_a(t) = \frac{\ln(1 - r_{a, i}) - \ln(1 - r_{a, i+1})}{t_{i+1} - t_i} \quad, \forall t \in [t_i, t_{i+1}) ,
+    \omega_a(t) = \frac{\ln(1 - r_{a, i}) - \ln(1 - r_{a, i+1})}{t_{i+1} - t_i} \quad, \forall t \in [t_i, t_{i+1}) ,
 \end{equation}
 where $\ln(x)$ represents the natural logarithm of $x$.
 

diff --git a/docs/tex/tex_descriptions/models/sm_covid/stratifications/strains.tex b/docs/tex/tex_descriptions/models/sm_covid/stratifications/strains.tex
@@ -7,10 +7,10 @@
 
 We assumed that individuals previously infected with the wild-type strain could only be reinfected with the delta or 
 omicron strains. However, such individuals have a reduced risk of infection with these variants compared to 
-infection-naive individuals (68\% and 45\% reduction for delta and omicron, respectively) \textcolor{red}{[REF DOI: 10.1503/cmaj.211248]}.
+infection-naive individuals (82\% and 45\% reduction for delta and omicron, respectively) \cite{stein2023}.
 We assumed that individuals previously infected with the delta variant could only be reinfected with the omicron variant, 
-with an infection risk reduced by 45\% compared to infection-naive individuals \textcolor{red}{[REF DOI: 10.1503/cmaj.211248]}. 
+with an infection risk reduced by 45\% compared to infection-naive individuals \cite{stein2023}. 
 The other parameters used to represent strain-specific characteristics are presented in Table \ref{param_table}.
 
 Seeding of each new strain into the model was achieved through the importation of a small number (10 per million population) of new infectious persons with the relevant strain into the model.
-The seeding process was done over a ten-day period and the start of this period was set to the emergence date reported by GISAID.
+The seeding process was done over a ten-day period and the start of this period was set to the emergence date reported by GISAID for each country \cite{gisaid2023}.
diff --git a/docs/tex/tex_descriptions/projects/sm_covid/agespec_table.tex b/docs/tex/tex_descriptions/projects/sm_covid/agespec_table.tex
@@ -1,14 +1,10 @@
 \begin{table}
-   \begin{tikzpicture}[overlay,remember picture]
-      \node [yshift=-50ex,rotate=45] at ( $(pic cs:A) !.5! (pic cs:B)$ ){ \fontsize{30} 
-            {6}\selectfont\textbf{\color{black!40}DRAFT ONLY} };
-      \end{tikzpicture}
-\centering
+   \centering
 \caption{Age-specific parameters for wild-type COVID-19}
 \label{agespec_table}
 \begin{tabular}{p{2cm} p{3cm} p{3cm} p{3cm} p{3cm}}
 \toprule
-   Age group & Rel. suscepitbility to infection (ref. 15-69 y.o.)\cite{zhang-2020-a} & Proportion symptomatic\cite{sah-2021} & Proportion of symptomatic patients hospitalised [CURRENTLY USING Dutch GGD report from 4th August 2020, Table 3] & Infection fatality rate\cite{odriscoll-2021} \\
+   Age group & Rel. suscepitbility to infection (ref. 15-69 y.o.) \cite{zhang2020a} & Proportion symptomatic \cite{sah2021} & Proportion of symptomatic patients hospitalised \cite{rivm2020} & Infection fatality rate \cite{odriscoll2021} \\
 \midrule
 0-4 & 0.36 & 0.533 & 0.0777 & 0.00003 \\
 5-9 & 0.36 & 0.533 & 0.0069 & 0.00001 \\