diff --git a/autumn/projects/sm_covid2/common_school/calibration.py b/autumn/projects/sm_covid2/common_school/calibration.py index 4fc49193b..473ee86a2 100644 --- a/autumn/projects/sm_covid2/common_school/calibration.py +++ b/autumn/projects/sm_covid2/common_school/calibration.py @@ -66,25 +66,24 @@ def get_bcm_object(iso3, analysis="main"): est.TruncatedNormalTarget( "prop_ever_infected_age_matched", sero_target.data, - trunc_range=[0., 1.], # will reject runs with >10% attack rate before epidemic - stdev=.1 + trunc_range=[0., 1.], + stdev=sero_target.stdev ) - # est.BinomialTarget( - # "prop_ever_infected_age_matched", - # sero_target.data, - # sample_sizes=sero_target.sample_sizes - # ) ) # Add a safeguard target to prevent a premature epidemic occurring before the first reported death # Early calibrations sometimes produced a rapid epidemic reaching 100% attack rate before the true epidemic start - zero_attack_rate_series = pd.Series([0.], index=[death_target_data.index[0]]) + zero_series = pd.Series([0.], index=[death_target_data.index[0]]) # could be any value, only the time index matters + + def censored_func(modelled, data, parameters, time_weights): + # Returns a very large negative number if modelled value is greater than 1%. Returns 0 otherwise. + return jnp.where(modelled > 0.01, -1.e11, 0.)[0] + targets.append( - est.TruncatedNormalTarget( - "prop_ever_infected", - zero_attack_rate_series, - trunc_range=[0., 0.10], # will reject runs with >10% attack rate before epidemic - stdev=1000. # flat distribution + est.CustomTarget( + "prop_ever_infected", + zero_series, # could be any value, only the time index matters + censored_func ) ) diff --git a/autumn/projects/sm_covid2/common_school/output_plots/country_spec.py b/autumn/projects/sm_covid2/common_school/output_plots/country_spec.py index bf6c3c277..724fbd48c 100644 --- a/autumn/projects/sm_covid2/common_school/output_plots/country_spec.py +++ b/autumn/projects/sm_covid2/common_school/output_plots/country_spec.py @@ -26,7 +26,7 @@ def update_rcparams(): 'axes.labelsize': "x-large", 'xtick.labelsize': 'large', 'ytick.labelsize': 'large', - 'legend.fontsize': 'large', + 'legend.fontsize': 'medium', 'legend.title_fontsize': 'large', 'lines.linewidth': 1., @@ -54,7 +54,8 @@ def update_rcparams(): "hospital_occupancy": "Hospital pressure", "icu_admissions": "daily ICU admissions", "icu_occupancy": "total ICU beds", - "prop_ever_infected": "ever infected with Delta or Omicron", + "prop_ever_infected": "ever infected", + "prop_ever_infected_age_matched": "Prop. ever infected\n(age-matched)", "transformed_random_process": "Transformed random process", @@ -70,8 +71,8 @@ def update_rcparams(): ) SCHOOL_COLORS = { - 'partial': 'azure', - 'full': 'thistle' + 'partial': 'silver', # 'azure', + 'full': 'gold' # 'thistle' } def y_fmt(tick_val, pos): @@ -109,7 +110,7 @@ def add_school_closure_patches(ax, iso3, ymax, school_colors=SCHOOL_COLORS): ax.vlines(closed_dates_str, ymin=0, ymax=ymax, lw=1, alpha=1, color=school_colors['full'], zorder = 1) -def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3): +def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3, include_legend=True): bcm = get_bcm_object(iso3, "main") @@ -120,7 +121,7 @@ def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3): if output_name in bcm.targets: t = bcm.targets[output_name].data t.index = ref_times_to_dti(REF_DATE, t.index) - axis.scatter(list(t.index), t, marker=".", color='black', label='observations', zorder=11, s=3.) + axis.scatter(list(t.index), t, marker=".", color='black', label='observations', zorder=11, s=5.) colour = unc_sc_colours[0] @@ -151,14 +152,18 @@ def plot_model_fit_with_uncertainty(axis, uncertainty_df, output_name, iso3): # axis.tick_params(axis="x", labelrotation=45) title = output_name if output_name not in title_lookup else title_lookup[output_name] + if output_name == "prop_ever_infected_age_matched" and output_name not in bcm.targets: + title = "Prop. ever infected" + axis.set_ylabel(title) plt.tight_layout() - plt.legend(markerscale=2.) + if include_legend: + plt.legend(markerscale=2.) axis.yaxis.set_major_formatter(tick.FuncFormatter(y_fmt)) -def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=False): +def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=False, include_legend=True): # update_rcparams() ymax = 0. @@ -176,6 +181,7 @@ def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=Fals time, df[df["quantile"] == .25]['value'], df[df["quantile"] == .75]['value'], color=colour, alpha=0.7, + edgecolor=None, # label=interval_label, zorder=scenario_zorder ) @@ -194,7 +200,8 @@ def plot_two_scenarios(axis, uncertainty_df, output_name, iso3, include_unc=Fals # axis.set_xlim((model_start, model_end)) axis.set_ylim((0, plot_ymax)) - axis.legend() + if include_legend: + axis.legend() axis.yaxis.set_major_formatter(tick.FuncFormatter(y_fmt)) @@ -333,23 +340,25 @@ def make_country_output_tiling(iso3, uncertainty_df, diff_quantiles_df, output_f right_grid = inner_grid[0, 1] # will contain final size plots #### Split left panel into 3 panels - inner_left_grid = gridspec.GridSpecFromSubplotSpec(3, 1, subplot_spec=left_grid, hspace=.15, height_ratios=(1, 1, 1)) - # calibration + inner_left_grid = gridspec.GridSpecFromSubplotSpec(4, 1, subplot_spec=left_grid, hspace=.15, height_ratios=(1, 1, 1, 1)) + # calibration, deaths ax2 = fig.add_subplot(inner_left_grid[0, 0]) plot_model_fit_with_uncertainty(ax2, uncertainty_df, "infection_deaths", iso3) format_date_axis(ax2) remove_axes_box(ax2) - # plt.setp(ax2.get_xticklabels(), visible=False) + # seropos prop over time + ax_sero = fig.add_subplot(inner_left_grid[1, 0]) + plot_model_fit_with_uncertainty(ax_sero, uncertainty_df, "prop_ever_infected_age_matched", iso3, include_legend=False) + format_date_axis(ax_sero) + remove_axes_box(ax_sero) # scenario compare deaths - ax3 = fig.add_subplot(inner_left_grid[1, 0]) #, sharex=ax2) + ax3 = fig.add_subplot(inner_left_grid[2, 0]) #, sharex=ax2) plot_two_scenarios(ax3, uncertainty_df, "infection_deaths", iso3, True) format_date_axis(ax3) remove_axes_box(ax3) - - # plt.setp(ax3.get_xticklabels(), visible=False) # scenario compare hosp - ax4 = fig.add_subplot(inner_left_grid[2, 0]) #, sharex=ax2) - plot_two_scenarios(ax4, uncertainty_df, "hospital_occupancy", iso3, True) + ax4 = fig.add_subplot(inner_left_grid[3, 0]) #, sharex=ax2) + plot_two_scenarios(ax4, uncertainty_df, "hospital_occupancy", iso3, True, include_legend=False) format_date_axis(ax4) remove_axes_box(ax4) @@ -419,3 +428,18 @@ def make_country_output_tiling(iso3, uncertainty_df, diff_quantiles_df, output_f fig.savefig(os.path.join(output_folder, "tiling.pdf"), facecolor="white") plt.close() + + +def test_tiling_plot(): + from pathlib import Path + import pandas as pd + + directory = Path.cwd() / "autumn" / "projects" / "sm_covid2" / "common_school" / "output_plots" / "test_tiling_plot" + + iso3 = "FRA" + uncertainty_df = pd.read_parquet(directory / "uncertainty_df.parquet") + diff_quantiles_df = pd.read_parquet(directory / "diff_quantiles_df.parquet") + output_folder = directory + make_country_output_tiling(iso3, uncertainty_df, diff_quantiles_df, output_folder) + +# test_tiling_plot() \ No newline at end of file diff --git a/autumn/projects/sm_covid2/common_school/project_maker.py b/autumn/projects/sm_covid2/common_school/project_maker.py index 1b0f9d172..9d85be96b 100644 --- a/autumn/projects/sm_covid2/common_school/project_maker.py +++ b/autumn/projects/sm_covid2/common_school/project_maker.py @@ -14,7 +14,7 @@ ) from autumn.calibration import Calibration from autumn.calibration.priors import UniformPrior -from autumn.calibration.targets import NegativeBinomialTarget, BinomialTarget +from autumn.calibration.targets import NegativeBinomialTarget, BinomialTarget, TruncNormalTarget from autumn.models.sm_covid2 import get_base_params, build_model from autumn.model_features.jax.random_process import set_up_random_process from autumn.settings import Region, Models @@ -58,7 +58,7 @@ def get_school_project(iso3, analysis="main"): # read seroprevalence data (needed to specify the sero age range params and then to define the calibration targets) - positive_prop, adjusted_sample_size, midpoint_as_int, sero_age_min, sero_age_max = get_sero_estimate(iso3) + positive_prop, sero_target_sd, midpoint_as_int, sero_age_min, sero_age_max = get_sero_estimate(iso3) # Load timeseries timeseries = get_school_project_timeseries(iso3, sero_data={ @@ -97,10 +97,11 @@ def get_school_project(iso3, analysis="main"): if positive_prop is not None: targets.append( - BinomialTarget( - data=pd.Series(data=[positive_prop], index=[midpoint_as_int], name="prop_ever_infected_age_matched"), - sample_sizes = pd.Series(data=[adjusted_sample_size], index=[midpoint_as_int]) - ) + TruncNormalTarget( + data=pd.Series(data=[positive_prop], index=[midpoint_as_int], name="prop_ever_infected_age_matched"), + trunc_range=(0., 1.), + stdev=sero_target_sd + ) ) # set up random process if relevant @@ -392,13 +393,13 @@ def get_sero_estimate(iso3): country_data = df[df['alpha_3_code'] == iso3].to_dict(orient="records")[0] - # work out the adjusted sample size, accounting for survey sample size, risk of bias and geographic level (national/subnational) - bias_risk_adjustment = { - 0: 1./3., # high risk of bias - 1: 2./3., # moderate risk of bias - 2: 1., # low risk of bias - } - adjusted_sample_size = round(country_data['denominator_value'] * bias_risk_adjustment[country_data['overall_risk_of_bias']]) + # work out the target's standard deviation according to the risk of bias + sero_target_sd = { + 0: .2, # high risk of bias + 1: .1, # moderate risk of bias + 2: .05, # low risk of bias + }[country_data['overall_risk_of_bias']] + # adjusted_sample_size = round(country_data['denominator_value'] * bias_risk_adjustment[country_data['overall_risk_of_bias']]) # work out the survey midpoint start_date = datetime.fromisoformat(country_data['sampling_start_date']) @@ -407,4 +408,4 @@ def get_sero_estimate(iso3): midpoint = start_date + (end_date - start_date) / 2 midpoint_as_int = (midpoint - datetime(2019, 12, 31)).days - return country_data["serum_pos_prevalence"], adjusted_sample_size, midpoint_as_int, country_data['age_min'], country_data['age_max'] + return country_data["serum_pos_prevalence"], sero_target_sd, midpoint_as_int, country_data['age_min'], country_data['age_max'] diff --git a/data/inputs/school-closure/serodata_national.csv b/data/inputs/school-closure/serodata_national.csv index 1adf311ae..0bea38364 100644 --- a/data/inputs/school-closure/serodata_national.csv +++ b/data/inputs/school-closure/serodata_national.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2e9121456121f64b5729e55d5cb1222b61d587c337b346b5a21c738409c8c83 -size 52128 +oid sha256:e1d0b1e21959b80afc87beee88b1c4902eaadb3d4ae4a8bb6890976b3c8c81b3 +size 52142 diff --git a/docs/tex/tex_descriptions/models/sm_covid/code.tex b/docs/tex/tex_descriptions/models/sm_covid/code.tex new file mode 100644 index 000000000..6e15a3925 --- /dev/null +++ b/docs/tex/tex_descriptions/models/sm_covid/code.tex @@ -0,0 +1,9 @@ +\section{Software and code used to conduct the analyses} + +\subsection{Code} +The Python code and data used to perform the analyses is fully available on Github at the following link: +\href{https://github.com/monash-emu/AuTuMN}{https://github.com/monash-emu/AuTuMN}. In particular, \textcolor{red}{LIST LINKS TO SOME SUB-MODULES HERE}. + + +\subsection{Software implementation} +\input{../../tex_descriptions/models/sm_covid/runtime.tex} \ No newline at end of file diff --git a/docs/tex/tex_descriptions/models/sm_covid/deaths.tex b/docs/tex/tex_descriptions/models/sm_covid/deaths.tex index 4ec161b95..acc63db27 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/deaths.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/deaths.tex @@ -1,14 +1,14 @@ -We estimate the number of COVID-19 deaths over time using a similar approach -as for the hospital pressure indicator. We use the age-specific infection fatality rates reported in -ODriscoll et al. \cite{odriscoll-2021}, adjusted for vaccination status and for the infecting strain +We estimated the number of COVID-19 deaths over time using a similar approach +as for the hospital pressure indicator. We used the age-specific infection fatality rates reported in +O'Driscoll et al. \cite{odriscoll-2021}, adjusted for vaccination status and for the infecting strain to estimate COVID-19 mortality. Using the same notations as in Section \ref{hosp}, the number of COVID-19 -deaths observed at time $t$ is obtained by: +deaths observed at time $t$ was obtained by: \begin{equation} \mu(t) = m_C \sum_{a,v,s} ifr_{a,v,s} \int_{u \geq 0} i_{a,v,s}(t-u)g_{d}(u) du \quad, \end{equation} where $ifr_{a,v,s}$ is the risk of death given infection for age $a$, vaccination status $v$ and strain $s$, and $g_d$ is the probability density function of the statistical distribution chosen to represent the -time from symptom onset to death (Table \ref{param_table}). We use a country-specific adjuster $m_C$ to +time from symptom onset to death (Table \ref{param_table}). We used the country-specific adjuster $m_C$ to capture the fact that the infection fatality ratio is expected to vary by country, in part due to -differences in COVID-19 death definition and reporting standards. This adjustment is automatically calibrated +differences in COVID-19 death definition and reporting standards. This adjustment was automatically calibrated by the MCMC (Section \ref{calibration}). \ No newline at end of file diff --git a/docs/tex/tex_descriptions/models/sm_covid/hospitalisations.tex b/docs/tex/tex_descriptions/models/sm_covid/hospitalisations.tex index 3e95315f5..c658efd73 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/hospitalisations.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/hospitalisations.tex @@ -1,19 +1,19 @@ -The risk of hospitalisation given infection is expected to vary dramatically by setting. +The risk of hospitalisation given infection was expected to vary markedly by setting. For example, different countries may have different criteria for whether or not a COVID-19 patient should be admitted to a hospital. This makes it difficult to provide accurate estimates of hospitalisation rates for multiple countries. -This is why we introduce a universal indicator named ``hospital pressure'' in our analysis. This indicator -is obtained by considering the age-specific risk of hospitalisation given infection observed in the first year +For this reason we introduced a universal indicator named ``hospital pressure'' in our analysis. This indicator +was obtained by considering the age-specific risk of hospitalisation given infection observed in the first year of the pandemic in the Netherlands, adjusted for vaccination status and for the infecting strain (Table \ref{param_table}). The ``hospital pressure'' indicator can therefore be interpreted as the level of hospital occupancy that would be observed in the analysed country if the rates of hospitalisation given infection in this country were the same -as in the Netherlands. This indicator is expected to be roughly proportional to the actual hospital occupancy level of the -studied country. Note that this indicator is used in order to make comparisons between scenarios such that one should interpret the relative +as for the Netherlands. This quantity is expected to vary proportionately with occupancy over time, providing an indicator of +hospital pressure. Note that this indicator was used in order to make comparisons between scenarios, such that one should interpret the relative differences between scenarios rather than the absolute values of the indicator. Let us denote $i_{a,v,s}(t)$ the number of new disease episodes estimated to start at time $t$ for people aged $a$ with vaccination status $v$ -and infected with strain $s$. The number of new hospital admissions occurring at time $t$ is calculated using the following +and infected with strain $s$. The number of new hospital admissions occurring at time $t$ was calculated using the following convolution product: \begin{equation} \eta(t) = \sum_{a,v,s} \kappa_{a,v,s} \int_{u \geq 0} i_{a,v,s}(t-u)g_{h}(u) du \quad, @@ -22,7 +22,7 @@ based on the Netherlands data, and $g_h$ is the probability density function of the statistical distribution chosen to represent the time from symptom onset to hospitalisation (Table \ref{param_table}). -We then compute the ``hospital pressure'' quantity $h$, which is an indicator of hospital occupancy level, by combining the number of new +We then computed the ``hospital pressure'' quantity $h$, which is an indicator of hospital occupancy level, by combining the number of new hospital admissions $\eta$ with the statistical distribution used to model hospital stay duration: \begin{equation} h(t) = \int_{u \geq 0} \eta(t-u) (1 - \tau(u)) du \quad, diff --git a/docs/tex/tex_descriptions/models/sm_covid/mixing.tex b/docs/tex/tex_descriptions/models/sm_covid/mixing.tex index 9f7934bc4..c4bd0d85f 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/mixing.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/mixing.tex @@ -1,5 +1,5 @@ The model captures changes in social interactions over time through a dynamic age-specific mixing matrix. The following -sections describe how this matrix is defined and how it captures the different non-pharmaceutical interventions implemented +sections describe how this matrix was defined and how it captures the different non-pharmaceutical interventions implemented in the analysed countries, including school closures. The overall approach is also illustrated by Figure \ref{fig:mixing}. \paragraph{Reference mixing matrices} @@ -10,7 +10,7 @@ The overall contact matrix (before adjustments for mobility changes) results from the summation of the four location-specific contact matrices: \(C_{0} = C_{H} + m_S C_{S} + C_{W} + C_{L}\), where \(C_{H}\), \(C_{S}\), \(C_{W}\) and \(C_{L}\) are the age-specific contact matrices associated with households, schools, workplaces and other locations, respectively. Note that the school contribution $C_{S}$ is -multiplied by the factor $m_S$ that is varied during model calibration (see Section \ref{calibration}), in order to include uncertainty around +multiplied by the factor $m_S$ that was varied during model calibration (see Section \ref{calibration}), in order to account for uncertainty around the relative contribution of school contacts to overall mixing. \paragraph{Modifications of contact rates over time} @@ -22,18 +22,18 @@ C(t)= h(t)^{2}C_{H}+ s(t)^{2} m_S C_{S}+ w(t)^{2}C_{W}+l(t)^{2}C_{L} \end{equation} -The modifying functions $h$ (for households), $s$ (for schools), $w$ (for work) and $l$ (for other-locations) are each squared to capture the effect of the mobility changes on +The modifying functions $h$ (for households), $s$ (for schools), $w$ (for work) and $l$ (for other-locations) were each squared to capture the effect of the mobility changes on both the infector and the infectee in any given interaction that could potentially result in transmission. \vspace{5pt} \textbf{School closure/re-opening} -Reduced attendance at schools is represented through the function $s$, which represents the proportion of all school students +Reduced attendance at schools was represented through the function $s$, which represents the proportion of all school students currently attending on-site teaching. If schools are fully closed at time $t$, \(s(t)=0\) and \(C_{S}\) does not contribute to the overall mixing matrix \(C(t)\). -The function $s$ is derived from the UNESCO database on school closures since the start of the COVID-19 pandemic (\textcolor{red}{ADD REF HERE}). +The function $s$ was derived from the UNESCO database on school closures from the start of the COVID-19 pandemic (\textcolor{red}{ADD REF HERE}). This database provides school opening status over time as a categorical variable taking the following values: ``Fully open'', ``Partially open'', ``Academic break'', ``Closed due to COVID-19''. -Table \ref{tab:unesco_categories} indicates how the different categorical values are converted into the numerical function $s$. +Table \ref{tab:unesco_categories} indicates how the different categorical values were converted into the numerical function $s$. \vspace{5pt} \begin{table}[ht] @@ -53,11 +53,11 @@ \end{center} \end{table} -We included uncertainty around the value associated with the partial closure category, as there is no quantitative data available to +We included uncertainty around the value associated with the partial closure category, as there was no quantitative data available to inform this parameter. The partial closure periods are likely to be periods where only a small fraction of students such as children of ``essential workers'' were -attending school. We assume that between 10 and 30\% of students attended on-site learning during these periods. +attending school. We assumed that between 10 and 30\% of students attended on-site learning during these periods. -To model the counterfactual ``no school closure'' scenario, we assume that the schools were ``Fully open'' during the +To model the counterfactual ``no school closure'' scenario, we assumed that the schools were ``Fully open'' during the periods reported as ``Partially open'' or ``Closed due to COVID-19''. Figure \ref{fig:unesco} summarises the UNESCO data on school closure for the analysed countries. @@ -74,19 +74,19 @@ \vspace{5pt} \textbf{Dynamic mobility outside of schools and homes} -Changes to people's mobility in places other than schools and homes are modelled using Google Mobility data, after applying a seven-day moving average smoothing. We use the ``Workplace'' +Changes to people's mobility in places other than schools and homes were modelled using Google Mobility data, after applying a seven-day moving average smoothing. We used the ``Workplace'' category of the Google data to scale the work-related matrix contribution $C_W$ to overall mixing over time, using the adjusting function -$w$. The ``other locations'' matrix $C_L$ is scaled through the adjusting function $l$ which is defined as the average of the Google mobility +$w$. The ``other locations'' matrix $C_L$ was scaled through the adjusting function $l$ which was defined as the average of the Google mobility indicators across the following Google categories: ``Retail and recreation'', ``Grocery and pharmacy'' and ``Transit stations''. \vspace{5pt} \textbf{Household contacts} -In the base case analysis, the contribution of household contacts to the overall mixing matrix is fixed over time +In the base case analysis, the contribution of household contacts to the overall mixing matrix was fixed over time (i.e. $h(t) = 1$ in Equation \ref{eq_mixing}). Although Google provides mobility estimates for residential contacts, the nature of these data is different from that of each of the other Google mobility -types. They represent the time spent in that location, as opposed to other categories, which measure a change in total visitors -rather than the duration. The daily frequency with which people attend their residence is likely to be close to one and we +types. They represent the time spent in that location, as opposed to other categories, which measure a change in total visits +rather than the duration. The daily frequency with which people attend their residence is likely to be close to one, and we considered that household members likely have a daily opportunity for infection with each other household member regardless of the background level of mobility. Therefore, we did not implement a function to scale the contribution of household contacts to the mixing matrix with time. @@ -98,12 +98,12 @@ \vspace{5pt} \label{sa_descriptions} \textbf{SA1: No Google mobility data} -In a first sensitivity analysis (SA1), we remove the contribution of the Google mobility data to the modelled social mixing (see Figure \ref{fig:mixing}). -In this configuration, the calibrated random process ($W(t)$) is expected to capture mobility changes implicitly. +In a first sensitivity analysis (SA1), we removed the contribution of the Google mobility data to the modelled social mixing (see Figure \ref{fig:mixing}). +In this configuration, the calibrated random process ($W(t)$) was used to capture mobility changes implicitly. \vspace{5pt} \textbf{SA2: School closures increase household contact rates} -In another sensitivity analysis (SA2), we consider an alternative assumption where the effective contact rates within households are increased during +In another sensitivity analysis (SA2), we considered an alternative assumption under which the effective contact rates within households were increased during periods of school closure. In that case, the household component of the mixing matrix is modified by the following function: $$ h(t) = 1 + 0.20(1 - s(t)) \quad ,$$ where $s$ is the function modifying school contacts as introduced in Section \ref{time_var_mixing}. This is equivalent to assuming diff --git a/docs/tex/tex_descriptions/models/sm_covid/model_description.tex b/docs/tex/tex_descriptions/models/sm_covid/model_description.tex index 9e1b1c473..568eed0ab 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/model_description.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/model_description.tex @@ -9,7 +9,7 @@ \subsection{General approach} Our model captures important factors of COVID-19 transmission and disease such as age-specific characteristics, heterogeneous mixing, vaccination and the emergence of different variants of concern. The ODE-based model is used to capture only states relevant to transmission, whereas hospitalisations and deaths are -estimated through a convolution process based on the ODE-based model's outputs. This process combines the model-estimated disease incidence with +estimated through a convolution process applied to the ODE-based model's outputs. This process combines the model-estimated disease incidence with statistical distributions modelling the time to hospitalisation, the hospital stay duration and the time to death. This approach presents two main advantages. First it reduces the complexity of the dynamic system relying on numerical solving of ODEs, which is computationally expensive. Second, the convolution approach allows for more flexibility @@ -38,7 +38,7 @@ \subsubsection{Compartment types and sequence} \begin{itemize} \item Susceptible \begin{itemize} - \item Persons never previously infected with SARS-CoV-2 during or before the model simulation period + \item Persons not previously infected with SARS-CoV-2 during or before the model simulation period \end{itemize} \item Latent \begin{itemize} @@ -70,7 +70,7 @@ \subsubsection{Compartment types and sequence} \end{figure} The main rationale for using four serial compartments for both the latent and active states is to achieve an Erlang distribution for the time spent in each of these states. This distribution is more realistic than -the exponential distribution that would have been associated with a single compartment, because the Erlang distribution does not have a large density mass around 0 and is not heavy-tailed. Figure \ref{fig:sojourn} +the exponential distribution which is the consequence of a single compartment assumption, because the Erlang distribution does not have a large density mass around 0 and is not heavy-tailed. Figure \ref{fig:sojourn} illustrates the modelled distributions of the incubation period and the active disease period. \begin{figure}[ht] @@ -82,7 +82,7 @@ \subsubsection{Compartment types and sequence} \label{fig:sojourn} \end{figure} -The four active disease compartments all have exactly the same characteristics. However, the last two latent compartments ($E^3$ and $E^4$) are infectious whereas the first two ($E^1$ and $E^2$) are not. We further assume that +The four active disease compartments all have identical characteristics. However, the last two latent compartments ($E^3$ and $E^4$) are infectious whereas the first two ($E^1$ and $E^2$) are not. We further assume that the infectious latent compartments are half as infectious as the active disease compartments. \subsubsection{Model stratification by age} @@ -94,6 +94,7 @@ \subsubsection{Capturing the effects of vaccination} \input{../../tex_descriptions/models/sm_covid/stratifications/immunity.tex} \subsubsection{Modelling multiple viral strains} +\label{strains} \input{../../tex_descriptions/models/sm_covid/stratifications/strains.tex} @@ -122,7 +123,7 @@ \subsubsection{Model parameters} %______________________________________________________ \subsection{Estimation of COVID-19-related hospital pressure and deaths} The transmission model described in Section \ref{trans} provides estimates of COVID-19 incidence over time, disaggregated by age, vaccination status and strain. -We combine these incidence estimates with the age-, vaccination- and strain-specific risks of hospitalisation and deaths as well as +We combine these incidence estimates with the age-, vaccination- and strain-specific risks of hospitalisation and deaths, as well as statistical distributions of time to events to compute COVID-19-related hospital pressure and deaths over time. \subsubsection{COVID-19-related hospital pressure} diff --git a/docs/tex/tex_descriptions/models/sm_covid/odes.tex b/docs/tex/tex_descriptions/models/sm_covid/odes.tex index 000ae3d9e..4083c5a34 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/odes.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/odes.tex @@ -1,12 +1,12 @@ -Let us first introduce some new notations. The different age groups are indicated by the subscript $a$ and $\mathcal{A}$ represents -the set of all modelled age groups (see Section \ref{age}). The vaccination status is represented by the subscript $v$ and $\mathcal{V}$ is the set of -vaccination statuses (i.e. $\mathcal{V}=$ \{``0'', ``1''\} where ``0'' represents unvaccinated people and ``1'' represents vaccinated people). The subscript $s$ is used to represent the different viral strains and $\mathcal{S}$ is the set +Let us first introduce some new notations. Modelled age groups are indicated by the subscript $a$, and $\mathcal{A}$ represents +the set of all modelled age groups (see Section \ref{age}). Vaccination status is represented by the subscript $v$, and $\mathcal{V}$ is the set of +vaccination statuses (i.e. $\mathcal{V}=$ \{``0'', ``1''\}, where ``0'' represents unvaccinated people and ``1'' represents vaccinated people). The subscript $s$ is used to represent the different viral strains, and $\mathcal{S}$ is the set of all strains (i.e. $\mathcal{S}=$ \{``wild-type'', ``delta'', ``omicron''\}). The average incubation period duration associated with strain $s$ is denoted $q_s$ and the average duration of active disease is denoted $w$. The relative susceptibility to infection with strain $s$ of individuals aged $a$ with -vaccination status $v$ is denoted $\rho_{a,v,s}$. The term $b_{a,v,s}(t)$ is used to designate the imported individuals of age $a$ and +vaccination status $v$ is denoted $\rho_{a,v,s}$. The term $b_{a,v,s}(t)$ designates the introduction of individuals of age $a$ and with vaccination status $v$ that are infected with strain $s$ (infection seeding). Vaccination is characterised by the age-specific and time-variant per-capita vaccination rate $w_a$. Finally, $\chi_{s,\sigma}$ represents the relative susceptibility to infection -with strain $\sigma$ for individuals whose most recent infection episode was with strain $s$. Using these new notations combined +with strain $\sigma$ for individuals whose most recent infection episode was with strain $s$. Using this new notation combined with those previously introduced, we can describe the model with the following set of ordinary differential equations: \begin{equation} @@ -22,13 +22,14 @@ \end{equation} where $\lambda_{a,s}$ represents the force of infection of strain $s$ affecting individuals of age $a$. The quantity $\Phi_v$ is a binary variable -used to switch between plus and minus signs depending on the vaccination status. It is equal to $1$ when +used to switch between a positive and negative multiplier depending on vaccination status. It is equal to $1$ when $v=$``1'' and $-1$ when $v=$``0''. In other words, $\Phi_v = 2\mathbbm{1}_{v=``1"} - 1$. -The force of infection is calculated as: +The force of infection was calculated as: \begin{equation} \label{foi} - \lambda_{a,s}(t) = \beta e^{2W(t)} \psi_{s} \sum_{\alpha \in \mathcal{A}} \sum_{v \in \mathcal{V}} c_{a,\alpha}(t) \Bigl( 0.5 \sum_{k=3}^{4} E_{\alpha,v,s}^{k} + \sum_{k=1}^{4} I_{\alpha,v,s}^{k} \Bigr) \quad . + \lambda_{a,s}(t) = \beta e^{2W(t)} \psi_{s} \sum_{\alpha \in \mathcal{A}} \sum_{v \in \mathcal{V}} \frac{c_{a,\alpha}(t)}{N_{\alpha, v}} \Bigl( 0.5 \sum_{k=3}^{4} E_{\alpha,v,s}^{k} + \sum_{k=1}^{4} I_{\alpha,v,s}^{k} \Bigr) \quad . \end{equation} In the previous equation, $\beta$ represents the unadjusted risk of transmission per contact, $\psi_{s}$ is the relative infectiousness of strain $s$, and $W(t)$ is -the random process introduced in Section \ref{random_process}. \ No newline at end of file +the random process introduced in Section \ref{random_process}. The size of the population of age $\alpha$ with vaccination status $v$ is denoted $N_{\alpha, v}$. The term $c_{a,\alpha}(t)$ is a single element of the contact matrix $C(t)$ introduced in Equation \ref{eq_mixing}. It +represents the average numbers of contacts per day that a individual of age $a$ has with individuals of age $\alpha$. \ No newline at end of file diff --git a/docs/tex/tex_descriptions/models/sm_covid/random_process.tex b/docs/tex/tex_descriptions/models/sm_covid/random_process.tex index f31db98e9..ed3fc40a2 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/random_process.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/random_process.tex @@ -1,20 +1,20 @@ -The risk of SARS-CoV2 transmission per contact is adjusted by a time-variant random process, making +The risk of SARS-CoV-2 transmission per contact was adjusted by a time-variant random process, making the model semi-mechanistic. This random process reflects the fact that all the variations observed in the transmission -risk in the real world cannot be explained solely by the factors that are included explicitly in the model such as vaccination, dynamic mobility or new variants' emergence. -We therefore allow for random perturbations to the risk of transmission over time, although these perturbations are highly auto-correlated +risk in the real world cannot be explained solely by the factors that are explicitly captured through model inputs such as vaccination, dynamic mobility or new variants' emergence. +We therefore allowed for random perturbations to the risk of transmission over time, although these perturbations were highly auto-correlated to avoid unrealistic changes over a short period of time. -We use a random walk with gaussian update defined by: +We used a random walk with Gaussian update defined by: \begin{equation} \label{eq:random_process} \begin{split} W(0) & = 0 \\ - W(t+1) & \sim \mathcal{N}(W(t), \epsilon) \quad , + W(t+1) & \sim \mathcal{N}(W(t), 0.5) \quad , \end{split} \end{equation} -where $\mathcal{N}$ denotes the normal distribution and where the standard deviation $\epsilon$ is automatically calibrated by the MCMC. -The random process $W$ is updated every two months and is transformed using the exponential function before being applied to the risk of transmission per contact (see Equation \ref{foi}). -Finally, the contribution of the random process to the risk of transmission is squared in order to capture its effect +where $\mathcal{N}$ denotes the normal distribution. +The random process $W$ was updated every two months and was transformed using the exponential function before being applied to the risk of transmission per contact (see Equation \ref{foi}). +Finally, the contribution of the random process to the risk of transmission was squared in order to capture its effect on both the susceptible and the infectious individuals (see Equation \ref{foi}). \ No newline at end of file diff --git a/docs/tex/tex_descriptions/models/sm_covid/runtime.tex b/docs/tex/tex_descriptions/models/sm_covid/runtime.tex index fa268403a..3ad7a37e8 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/runtime.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/runtime.tex @@ -1,17 +1,18 @@ -We use the summer2 Python package to implement the model. This is a domain specific library for +We used the \textit{summer2} Python package (v 1.2.5) to implement the model. This is a domain specific library for compartmental epidemiological models that addresses a few of the key concerns as follows. \subsubsection{Application Programming Interface (API)} -The model specification is done via a simple yet expressive Python API, -while the numerical implementation is largely autogenerated by the summer2 package at runtime. -This specification is composable via stratification classes and other reusable components (\textcolor{red}{maybe list some specific summer features with link to code}), -thus the complexity of the software is kept to a minimum, reducing cognitive overhead for the modeller, and greatly reducing the possibility for error. +The model specification was done via a simple yet expressive Python API, +while the numerical implementation was largely autogenerated by the \textit{summer2} package at runtime. +This specification is composable via stratification classes and other reusable components, +thus the complexity of the software is kept to a minimum, reducing cognitive overhead and greatly reducing the possibility for error. \subsubsection{Optimising compiler} -The summer2 package uses the jax library as its computational backend, meaning that while the specification of models is done largely in Python, +The \textit{summer2} package uses the jax library as its computational backend, meaning that while the specification of models is done largely in Python, the model execution itself is transformed via an optimising compiler into fast native code. -This brings the model runtime from several seconds (for a naive implementation) to under 50ms per iteration. +This brings the model runtime from several seconds (for a naive implementation) to under 50ms per iteration, which was necessary to perform the computationally-intensive +calibration tasks described in Section \ref{calibration}. diff --git a/docs/tex/tex_descriptions/models/sm_covid/stratifications/agegroup.tex b/docs/tex/tex_descriptions/models/sm_covid/stratifications/agegroup.tex index c8d2591cc..1afe57729 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/stratifications/agegroup.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/stratifications/agegroup.tex @@ -1,11 +1,11 @@ % Note that this will vary for every application, so will need to be edited - not sure of how best to manage this: -All compartments of the base compartmental structure are stratified by age into the following age bands: +All compartments of the base compartmental structure were stratified by age into the following age bands: zero to 14 years / 15 to 24 years / 25 to 49 years / 50 to 69 years / 70 years and above. -The initial population is distributed between the different age bands to reflect the age distributions reported by the United Nations Population Division. Demographic processes, including births, ageing and non-infection-related deaths are not simulated, given the timeframes considered in this simulation. +The initial population was distributed between the different age bands to reflect the age distributions reported by the United Nations Population Division. Demographic processes, including births, ageing and non-infection-related deaths are not simulated, given the timeframes considered in this simulation. -We assume heterogeneous mixing between the different age groups to account for the assortative nature of social interactions by age (see Section \ref{mixing}). -Age is assumed to affect: +We assumed heterogeneous mixing between the different age groups to account for the assortative nature of social interactions by age (see Section \ref{mixing}). +Age was assumed to affect: \begin{itemize} \item The susceptibility to infection \item The risk of COVID-19 hospitalisation diff --git a/docs/tex/tex_descriptions/models/sm_covid/stratifications/immunity.tex b/docs/tex/tex_descriptions/models/sm_covid/stratifications/immunity.tex index 62495628f..7a04623f4 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/stratifications/immunity.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/stratifications/immunity.tex @@ -1,16 +1,15 @@ -% Referring to the immunity stratification as the vaccination stratification here, for the Bhutan application -History of vaccination is captured by stratifying all model compartments by vaccination status. -Two vaccination strata are included to represent those who have received at least two doses of a COVID-19 vaccine, +History of vaccination was captured by stratifying all model compartments by vaccination status. +Two vaccination strata were included to represent those who have received at least two doses of a COVID-19 vaccine, and those who have not. -We use data from \textit{Our World in Data} to inform the modelled dynamic vaccination coverage. In particular, we use the reported proportion of -people ``fully vaccinated'' to specify the time-variant proportion of vaccinated people in our model. We assume that older individuals are vaccinated +We used data from \textit{Our World in Data} to inform the modelled dynamic vaccination coverage. In particular, we used the reported proportion of +people ``fully vaccinated'' to specify the time-variant proportion of vaccinated people in our model. We assumed that older individuals are vaccinated first by prioritising the modelled age groups in descending order. That is, the oldest age group receives all available vaccines until a saturation coverage of 80\% is reached for this group. Then the next oldest category starts receiving vaccines and we repeat this process until all available vaccines -are allocated. Note that in the event that the population-level vaccine coverage exceeds 80\%, the saturation coverage is set equal to the population-level coverage. +are allocated. Note that in the event that the population-level vaccine coverage exceeds 80\%, the saturation coverage was set equal to the population-level coverage. -Let us consider two successive time points $t_i$ and $t_{i+1}$ for which vaccination data is available. Let us denote $r_{a, i}$ and $r_{a, i+1}$ the associated vaccine +Let us consider two successive time points $t_i$ and $t_{i+1}$ for which vaccination data are available. Let us denote $r_{a, i}$ and $r_{a, i+1}$ the associated vaccine coverage for age group $a$. The time-variant and age-specific vaccination rate per capita $w_a(t)$ verifies: \begin{equation} @@ -35,10 +34,9 @@ \label{fig:vaccination} \end{figure} -The effect of vaccination on transmission is to partially reduce the rate of infection for all persons at-risk of infection in the vaccinated stratum. +The effect of vaccination on transmission is to reduce the rate of infection partially for all persons at-risk of infection in the vaccinated stratum. This includes both fully susceptible (never previously infected) persons, as well as recovered persons who are at risk of reinfection. The model allows for hybrid immunity in the sense that the vaccination-induced relative reduction of transmission risk is multiplied with that induced by previous infection. Vaccination is also assumed to reduce the risk of hospitalisation and death -Emerging variants of concern (VoCs) may partially escape vaccine-induced (as well as infection-induced) immunity, as described further below (Table \ref{param_table} and \textcolor{red}{Table XXX for cross-immunity}). - +Emerging variants of concern (VoCs) may partially escape vaccine-induced (as well as infection-induced) immunity, as described further below (Table \ref{param_table} and Section \ref{strains}). \ No newline at end of file diff --git a/docs/tex/tex_descriptions/models/sm_covid/stratifications/strains.tex b/docs/tex/tex_descriptions/models/sm_covid/stratifications/strains.tex index 38abf6fa2..106af4363 100644 --- a/docs/tex/tex_descriptions/models/sm_covid/stratifications/strains.tex +++ b/docs/tex/tex_descriptions/models/sm_covid/stratifications/strains.tex @@ -1,17 +1,16 @@ -The model is stratified by ``strain'' to simulate the emergence of multiple variants of concern (VoC). -This approach explicitly represents multiple competing strains, each with a separate force of infection calculation. -We assume that VoCs can have different levels of transmissibility, incubation period's duration and disease severity -(hospitalisation and death risks) compared to the ancestral COVID-19 strain. In addition, VoCs are assumed to escape +The model was stratified by ``strain'' to simulate the emergence of multiple variants of concern (VoC). +This approach explicitly represents multiple competing strains, each with an independent force of infection calculation. +We assumed that VoCs can have different levels of transmissibility, incubation period and disease severity +(hospitalisation and death risks) compared to the ancestral COVID-19 strain. In addition, VoCs were assumed to escape immunity partially for both vaccination- and infection-related immunity. -We assume that individuals previously infected with the wild-type strain can only be reinfected with the delta or +We assumed that individuals previously infected with the wild-type strain could only be reinfected with the delta or omicron strains. However, such individuals have a reduced risk of infection with these variants compared to infection-naive individuals (68\% and 45\% reduction for delta and omicron, respectively) \textcolor{red}{[REF DOI: 10.1503/cmaj.211248]}. -We assume that individuals previously infected with the delta variant can only be reinfected with the omicron variant, +We assumed that individuals previously infected with the delta variant could only be reinfected with the omicron variant, with an infection risk reduced by 45\% compared to infection-naive individuals \textcolor{red}{[REF DOI: 10.1503/cmaj.211248]}. The other parameters used to represent strain-specific characteristics are presented in Table \ref{param_table}. -Seeding of each new strain into the model is achieved through the importation of a small number (10 per million population) of new infectious persons with the relevant strain into the model. -The seeding process is done over a ten-day period and the start of this period is set between 30 days before and 30 days after the emergence date -as reported by GISAID \textcolor{red}{Table XXX}. The exact start date is automatically calibrated by the MCMC algorithm. +Seeding of each new strain into the model was achieved through the importation of a small number (10 per million population) of new infectious persons with the relevant strain into the model. +The seeding process was done over a ten-day period and the start of this period was set to the emergence date reported by GISAID. diff --git a/docs/tex/tex_descriptions/projects/sm_covid/calibration_description.tex b/docs/tex/tex_descriptions/projects/sm_covid/calibration_description.tex index afe6e9272..ab8bdfac0 100644 --- a/docs/tex/tex_descriptions/projects/sm_covid/calibration_description.tex +++ b/docs/tex/tex_descriptions/projects/sm_covid/calibration_description.tex @@ -1,15 +1,17 @@ \section{Model calibration and uncertainty propagation} \label{calibration} -\textcolor{red}{REVISE THE SECTION BELOW WHEN FINALISED} -The model was calibrated using a Bayesian approach. In particular, we used the adaptive -Metropolis algorithm introduced by Haario \textit{et al.} to sample parameters from their -posterior distributions \cite{haario-2001}. For each country, we ran 8 independent Metropolis -chains initialised using Latin Hypercube Sampling based on the parameter priors. -We ran simulations for 1 hour per chain yielding at least 60,000 iterations per -chain. We discarded the first 30,000 iterations of each chain as burn-in and combined the -samples of the 8 chains to project epidemic trajectories over time. -For each modelled country, the epidemic projections presented in our analysis are associated with 1000 parameter sets -randomly sampled from the posterior distributions obtained from MCMC sampling. +The model was calibrated using a Bayesian approach. In particular, we used the Differential Evolution Metropolis +(DEMetropolis) algorithm implemented with the \textit{PyMC} Python package (v.5.2.0) to sample parameters from their +posterior distributions. For each country, we ran 32 independent DEMetropolis chains of 20,000 iterations, each +starting from a different starting point. The starting points were obtained after conducting 32 independent optimisation +searches performed with the Covariance Matrix Adaptation Evolution Strategy (CMA-ES) method, implemented with the +\textit{nevergrad} Python package (v.0.6.0) and with a budget of 20,000 model evaluations per search. +The 32 optimisation searches were initialised randomly using Latin Hypercube Sampling based on the parameter priors, +in order to promote broad dispersion across the parameter space. Our calibration approach required a total of 1,280,000 +model evaluations per country, which were completed in about one hour on a machine with 32 cpus and 128-GiB memory. + +For each country, the results presented in the manuscript are associated with 1000 parameter sets randomly sampled from +the posterior distributions obtained from DEMetropolis sampling (after discarding the first 10,000 iterations for each chain). The definitions of the prior distributions and the likelihood are detailed in the following sections. \subsection{Parameters varied during calibration} @@ -19,157 +21,57 @@ \subsection{Parameters varied during calibration} We used uniform prior distributions for all calibrated parameters. The primary parameters varied during calibration are the unadjusted risk of transmission per contact ($\beta$), the IFR multiplier ($m_C$), the infection seeding times of each strain and the proportion of students on-site during ``Partially open'' periods. Note that the values of the random -process $W_t$ described in Section \ref{random_process} are also treated like calibrated parameters by the MCMC. The Gaussian +process $W_t$ described in Section \ref{random_process} are also treated as calibrated parameters by the MCMC. The Gaussian auto-regressive component described in Equation \ref{eq:random_process} is incorporated in the posterior likelihood computation (Section \ref{likelihood}). \subsection{Calibration targets} \label{targets} -Model calibration is performed independently for each country. All models are fitted to the reported number of COVID-19 deaths over time. +Model calibration was performed independently for each country. All models were fitted to the reported number of COVID-19 deaths over time. We used the daily number of COVID-19 deaths reported by Our World in Data and applied a 7-day moving average to the observed data. -In addition, for countries where a nationally representative seroprevalence survey had been conducted (n=39), we include -seroprevalence data in the calibration likelihood. We used the online platform SeroTracker to extract country-specific seroprevalence estimates. - -\textcolor{red}{Use Angus' description here.} -\textcolor{red}{Also mention how we match age-specific estimates in the model} - -\begin{table}[!ht] - \footnotesize - \begin{center} - \caption{Seroprevalence data extracted from SeroTracker (national surveys).} - \label{tab:sero_national} - \begin{tabular}{p{2cm} | p{1.6cm} | p{1.6cm} | p{0.8cm} | p{0.8cm} | p{1cm} | p{1.3cm} | p{1.2cm} | p{1.2cm}} - \hline - - \textbf{country} & \textbf{sampling start date} & \textbf{sampling end date} & \textbf{age min} & \textbf{age max} & \textbf{denom. value} & \textbf{serum pos prevalence} & \textbf{estimate grade} & \textbf{overall risk of bias} \\ -\hline -\href{https://dx.doi.org/10.5694/mja2.51542}{\textcolor{blue}{Australia}} & 2020-11-03 & 2021-03-12 & & 19 & 1685 & 0.0023 & National & High \\ -\hline -\href{https://dx.doi.org/10.1007/s15010-021-01639-0}{\textcolor{blue}{Austria}} & 2020-06-05 & 2020-12-04 & 18 & 72 & 20228 & 0.025 & National & High \\ -\hline -\href{https://dx.doi.org/10.1371/journal.pone.0268093}{\textcolor{blue}{Bangladesh}} & 2020-10-15 & 2021-02-15 & 10 & & 3220 & 0.673 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.2807/1560-7917.ES.2022.27.9.2100419}{\textcolor{blue}{Belgium}} & 2020-05-18 & 2020-05-25 & & 101 & 3242 & 0.062 & National & Low \\ -\hline -\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8225319/}{\textcolor{blue}{Brazil}} & 2020-05-14 & 2020-06-23 & & & 89362 & 0.023 & National & Moderate \\ -\hline -\href{http://dx.doi.org/10.1111/trf.16296}{\textcolor{blue}{Canada}} & 2020-05-09 & 2020-07-21 & 17 & & 74642 & 0.007 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1186/s12879-022-07045-7}{\textcolor{blue}{Chile}} & 2020-09-25 & 2020-11-25 & 7 & 94 & 2493 & 0.104 & National & Low \\ -\hline -\href{https://dx.doi.org/10.1016/j.lana.2022.100195}{\textcolor{blue}{Colombia}} & 2020-09-21 & 2020-12-11 & 5 & 80 & 17863 & 0.3253 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.3390/pathogens10060774}{\textcolor{blue}{Croatia}} & 2020-12-15 & 2021-02-15 & & & 1436 & 0.251 & National & High \\ -\hline -\href{https://doi.org/10.1038/s43856-022-00080-0}{\textcolor{blue}{Czechia}} & 2021-02-01 & 2021-03-31 & 18 & & 19548 & 0.51 & National & High \\ -\hline -\href{https://dx.doi.org/10.1007/s10654-021-00796-8}{\textcolor{blue}{Denmark}} & 2020-08-15 & 2020-09-30 & 12 & & 11478 & 0.02 & National & Low \\ -\hline -\href{https://dx.doi.org/10.3389/fmed.2022.933260}{\textcolor{blue}{Ecuador}} & 2020-10-12 & 2020-10-19 & & & 1250 & 0.1168 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1080/23744235.2021.1974540}{\textcolor{blue}{Estonia}} & 2021-02-08 & 2021-03-25 & & & 2517 & 0.201 & National & High \\ -\hline -\href{http://dx.doi.org/10.1016/j.ijid.2021.08.028}{\textcolor{blue}{Ethiopia}} & 2020-06-24 & 2020-07-08 & 15 & & 16932 & 0.035 & National & Low \\ -\hline -\href{https://dx.doi.org/10.1038/s41467-021-23233-6}{\textcolor{blue}{France}} & 2020-05-11 & 2020-05-17 & & & 3592 & 0.0493 & National & Low \\ -\hline -\href{https://revistas.ucr.ac.cr/index.php/psm/article/view/43261/46175}{\textcolor{blue}{Honduras}} & 2020-06-16 & 2020-06-23 & 5 & & 792 & 0.062 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1007/s11357-020-00226-9}{\textcolor{blue}{Hungary}} & 2020-05-01 & 2020-05-16 & 14 & & 10474 & 0.0068 & National & Low \\ -\hline -\href{http://dx.doi.org/10.1016/j.cmi.2021.06.002}{\textcolor{blue}{Iran}} & 2020-08-03 & 2020-10-31 & 6 & 109 & 11256 & 0.142 & National & Low \\ -\hline -\href{https://dx.doi.org/10.1007/s10654-021-00749-1}{\textcolor{blue}{Israel}} & 2020-06-28 & 2020-09-14 & & & 54357 & 0.046 & National & Moderate \\ -\hline -\href{https://www.istat.it/it/files//2020/08/ReportPrimiRisultatiIndagineSiero.pdf}{\textcolor{blue}{Italy}} & 2020-05-25 & 2020-07-15 & & & 64660 & 0.025 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.3201/eid2702.204088}{\textcolor{blue}{Japan}} & 2020-06-01 & 2020-06-07 & 20 & & 7950 & 0.001 & National & Moderate \\ -\hline -\href{http://dx.doi.org/10.1016/j.onehlt.2021.100292}{\textcolor{blue}{Jordan}} & 2020-10-01 & 2020-10-31 & & & 5470 & 0.07 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1001/jama.2021.15265}{\textcolor{blue}{Kenya}} & 2021-01-03 & 2021-03-15 & 16 & 64 & 3018 & 0.485 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1186/s12879-022-07031-z}{\textcolor{blue}{Lebanon}} & 2020-12-07 & 2021-01-15 & & & 2058 & 0.185 & National & Low \\ -\hline -\href{https://www.journals.vu.lt/AML/article/view/22344}{\textcolor{blue}{Lithuania}} & 2020-08-10 & 2020-09-10 & 18 & 92 & 3089 & 0.014 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.3201/eid2813.212348}{\textcolor{blue}{Malawi}} & 2020-10-14 & 2020-12-08 & & & 4261 & 0.078 & National & Low \\ -\hline -\href{https://dx.doi.org/10.1016/j.lanwpc.2021.100317}{\textcolor{blue}{Mongolia}} & 2020-10-13 & 2020-12-04 & & & 5000 & 0.0136 & National & Low \\ -\hline -\href{https://mohp.gov.np/attachments/article/708/First%20Sero-prevalence\_final\_report\_04-04-2021.pdf}{\textcolor{blue}{Nepal}} & 2020-10-09 & 2020-10-22 & & & 3040 & 0.144 & National & Low \\ -\hline -\href{https://dx.doi.org/10.1111/irv.12932}{\textcolor{blue}{Norway}} & 2020-12-27 & 2021-02-13 & & & 1912 & 0.032 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1016/j.ijid.2021.09.062}{\textcolor{blue}{Oman}} & 2020-09-13 & 2020-09-24 & 5 & & 4780 & 0.164 & National & Moderate \\ -\hline -\href{https://dx.doi.org/10.1007/s15010-021-01629-2}{\textcolor{blue}{Pakistan}} & 2020-07-15 & 2020-07-31 & & & 15390 & 0.424 & National & Moderate \\ -\hline -\href{https://europepmc.org/article/PPR/PPR359402}{\textcolor{blue}{Portugal}} & 2021-03-01 & 2021-03-17 & & & 2435 & 0.173 & National & Low \\ -\hline -\href{https://papers.ssrn.com/sol3/papers.cfm?abstract\_id=3826194}{\textcolor{blue}{Singapore}} & 2020-11-15 & 2020-12-15 & 23 & 83 & 937 & 0.0016 & National & High \\ -\hline -\href{https://dx.doi.org/10.1016/j.cmi.2021.03.009}{\textcolor{blue}{Slovenia}} & 2020-10-17 & 2020-11-10 & & 99 & 1211 & 0.0429 & National & Low \\ -\hline -\href{https://dx.doi.org/10.1093/cid/ciac198}{\textcolor{blue}{South Africa}} & 2020-11-15 & 2021-04-15 & & & 7577 & 0.4523 & National & Moderate \\ -\hline -\href{https://bmjopen.bmj.com/content/11/4/e049837.abstract}{\textcolor{blue}{Rep. of Korea}} & 2020-09-24 & 2020-12-09 & 18 & 86 & 4085 & 0.0039 & National & Moderate \\ -\hline -\href{https://www.mscbs.gob.es/ciudadanos/ene-covid/docs/ESTUDIO\_ENE-COVID19\_INFORME\_FINAL.pdf}{\textcolor{blue}{Spain}} & 2020-06-08 & 2020-06-22 & & & 62167 & 0.052 & National & Low \\ -\hline -\href{https://www.folkhalsomyndigheten.se/contentassets/376f9021a4c84da08de18ac597284f0c/pavisning-antikroppar-genomgangen-covid-19-blodgivare-delrapport-2.pdf}{\textcolor{blue}{Sweden}} & 2020-11-23 & 2020-12-04 & & & 3183 & 0.07 & National & Moderate \\ -\hline -\href{https://www.thelancet.com/journals/langlo/article/PIIS2214-109X(21)00053-X/fulltext}{\textcolor{blue}{Zambia}} & 2020-07-04 & 2020-07-27 & & & 2704 & 0.021 & National & Low \\ -\hline - - \end{tabular} - \end{center} -\end{table} - +\input{../../tex_descriptions/projects/sm_covid/serodata.tex} \subsection{Likelihood definition} \label{likelihood} Let $d_w$ denote the rounded average daily number of COVID-19 deaths during week $w$, and $\hat{d_w}^\theta$ the associated predicted number of deaths according to the model with parameter set $\theta$. -For countries with seroprevalence data, let us denote $m$ the sample size associated with the seroprevalence survey selected from SeroTracker (Section \ref{targets}), -and $k$ the number of seropositive individuals observed in the survey. -Let $\hat{\pi}^\theta$ denote the modelled age-matched proportion ever infected by the time the survey was conducted (using midpoint date) +For countries with seroprevalence data, let us denote $\pi$ the measured seroprevalence proportion extracted from SeroTracker (Section \ref{targets}). +Let $\hat{\pi}^\theta$ denote the modelled age-matched proportion ever infected by the time the survey was conducted (using the midpoint date) associated with the parameter set $\theta$. -The likelihood was defined as follows for coutnries with seroprevalence data: +The likelihood was defined as follows for countries with seroprevalence data: \begin{equation} \label{eq:likelihood} - \mathcal{L}(\theta) := f_{m,\hat{\pi}^\theta}(k) \times \prod_w g_r(d_w | \:\hat{d_w}^\theta) \quad , + \mathcal{L}(\theta) := f_{\sigma}(\pi | \hat{\pi}^\theta) \times \prod_w g_r(d_w | \:\hat{d_w}^\theta) \quad , \end{equation} -where $f_{n,p}(.)$ is the probability mass function of a binomial distribution $\mathcal{B}(n,p)$, and +where $f_{\sigma}( . | \mu )$ is the probability density function of a $[0, 1]$-truncated normal distribution with mean $\mu$ and standard deviation $\sigma$; and $g_r(. | \mu)$ is the probability mass function of a negative binomial distribution with mean $\mu$ and -overdispersion parameter $r$. The parameter $r$ is automatically estimated by the MCMC algorithm. +overdispersion parameter $r$. The overdispersion parameter $r$ was automatically estimated by the MCMC algorithm, while the standard deviation $\sigma$ was set to different +values depending on the SeroTracker-reported risk of bias associated with the seroprevalence estimate ($\sigma=0.05$ if ``Low'', $\sigma=0.1$ if ``Moderate'', $\sigma=0.2$ if ``High''). -For countries without seroprevalence data, the likelihood equation is reduced to: +For countries without seroprevalence data, the likelihood equation reduces to: \begin{equation} \label{eq:likelihood_nosero} \mathcal{L}(\theta) := \prod_w g_r(d_w | \:\hat{d_w}^\theta) \quad. \end{equation} -The likelihood described above represents the goodness of fit of a particular model parameterisation with regards to the targeted data. +The likelihood functions described above represent the goodness of fit of a particular model parameterisation with regards to the targeted data. This quantity needs to be adjusted for the prior likelihood of the parameter set in order to compute the MCMC acceptance quantity $\mathcal{Q}(\theta)$. As we used uniform priors for all the parameters, the inclusion of the individual parameters' priors in the acceptance quantity is not necessary. Indeed, their respective contributions would cancel out as the same quantity would appear in the numerator and the denominator of the MCMC acceptance quantity ratio. However, the auto-regressive relationship described in Equation \ref{eq:random_process} -must be accounted for as part of the combined prior likelihood of a parameter set. This is what prevents unrealistic fluctuations of the random process. +must be accounted for as part of the combined prior likelihood of a parameter set. This prevents unrealistically large fluctuations of the random process. If $W^\theta$ represents the random process associated with the parameter set $\theta$, the overall MCMC acceptance quantity is obtained by: \begin{equation} \label{eq:acc_qtt} \begin{split} - \mathcal{Q}(\theta) & = \mathcal{L}(\theta) \times \prod_{i=1}^{n} z_{W^\theta_{i-1},\sigma}(W^\theta_i) \\ - & = f_{m,\hat{\pi}^\theta}(k) \times \prod_w g_r(d_w | \:\hat{d_w}^\theta) \times \prod_{i=1}^{n} z_{W^\theta_{i-1},\sigma}(W^\theta_i) \quad , + \mathcal{Q}(\theta) & = \mathcal{L}(\theta) \times \prod_{i=1}^{n} z_{W^\theta_{i-1},\epsilon}(W^\theta_i) \quad , \end{split} \end{equation} -where $z_{\mu,\sigma}(.)$ represents the probability density function of the normal distribution $\mathcal{N}(\mu, \sigma)$, and $n$ is the number -of random process updates. The standard deviation $\sigma$ is automatically estimated by the MCMC. +where $z_{\mu,\epsilon}(.)$ represents the probability density function of the normal distribution $\mathcal{N}(\mu, \epsilon)$, and $n$ is the number +of random process updates. + -\subsection{Software implementation} -\input{../../tex_descriptions/models/sm_covid/runtime.tex} diff --git a/docs/tex/tex_descriptions/projects/sm_covid/serodata.tex b/docs/tex/tex_descriptions/projects/sm_covid/serodata.tex new file mode 100644 index 000000000..70858800f --- /dev/null +++ b/docs/tex/tex_descriptions/projects/sm_covid/serodata.tex @@ -0,0 +1,133 @@ + +In addition, for countries where a nationally representative seroprevalence survey had been conducted \textcolor{red}{(n=44)}, we include +seroprevalence data in the calibration likelihood. We used the online platform SeroTracker to extract country-specific seroprevalence estimates. +The estimates had to verify the following conditions to be included in the analysis as calibration targets: +\begin{itemize} + \item Be aligned with the World Health Organization's Unity protocol (WHO Unity) for general population seroepidemiological studies. + \item Have a sample size greater than 599 (minimum sample size recommended in \textcolor{blue}{\href{https://docs.google.com/document/d/1NYpszkr-u__aZspFDFa_fa4VBzjAAAAxNxM1rZ1txWU/edit}{WHO sero-surveys protocol}}) + \item Have a sampling start date later than 1 May 2020 (to avoid very early surveys that may be less accurate) + \item Be nationally representative (as classified by SeroTracker) +\end{itemize} + +We further excluded studies that focused on specific population subgroups presenting a risk of selection bias for seroprevalence (e.g. pregnant women, slum population, healthcare workers, quarantine workers). +Finally, to minimise the risk of interference with vaccination, we only included studies for which the vaccination coverage at the time of the survey (midpoint date) +was lower than 10\% of the measured seroprevalence. + +All the criteria listed above were first verified systematically using the SeroTracker database, and the extracted studies were then analysed individually by one author (RR) to check +that all inclusion criteria were verified. + +When a seroprevalence study was restricted to a specific age-group, we matched the survey estimate to the modelled seroprevalence measured in the +closest modelled age-group. For example, as the age-group reported in the Kenya survey was 16-64 years old, we used the modelled seroprevalence +in the age-group 15-69 years-old to inform model calibration. + +When multiple estimates were available for a country, we selected the highest ranked estimate after ordering by the following preference criteria (applied in the presented order): +\begin{enumerate} + \item Lowest risk of bias (according to SeroTracker), + \item Latest sampling start date (when a greater number of infections have occurred, and to avoid bias due to early geographic heterogeneity), + \item Largest sample size. +\end{enumerate} + +The estimates used to inform the models are summarised in Table \ref{tab:sero_national}, with the associated original reports accessible by clicking on the countries' names. + +\begin{table}[!ht] + \footnotesize + \begin{center} + \caption{Seroprevalence data extracted from SeroTracker (national surveys).} + \label{tab:sero_national} + \begin{tabular}{p{2cm} | p{1.6cm} | p{1.6cm} | p{0.8cm} | p{0.8cm} | p{1cm} | p{1.3cm} | p{1.2cm} | p{1.2cm}} + \hline + + \textbf{country} & \textbf{sampling start date} & \textbf{sampling end date} & \textbf{age min} & \textbf{age max} & \textbf{denom. value} & \textbf{serum pos prevalence} & \textbf{estimate grade} & \textbf{overall risk of bias} \\ +\hline +\href{https://dx.doi.org/10.5694/mja2.51542}{\textcolor{blue}{Australia}} & 2020-11-03 & 2021-03-12 & & 19 & 1685 & 0.0023 & National & High \\ +\hline +\href{https://dx.doi.org/10.1007/s15010-021-01639-0}{\textcolor{blue}{Austria}} & 2020-06-05 & 2020-12-04 & 18 & 72 & 20228 & 0.025 & National & High \\ +\hline +\href{https://dx.doi.org/10.2807/1560-7917.ES.2022.27.9.2100419}{\textcolor{blue}{Belgium}} & 2020-10-12 & 2020-10-17 & & 101 & 2966 & 0.0418 & National & Low \\ +\hline +\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8225319/}{\textcolor{blue}{Brazil}} & 2020-05-14 & 2020-06-23 & & & 89362 & 0.023 & National & Moderate \\ +\hline +\href{https://serotracker.com/en/Explore}{\textcolor{blue}{Canada}} & 2021-04-13 & 2021-04-30 & 17 & & 16931 & 0.2692 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1186/s12879-022-07045-7}{\textcolor{blue}{Chile}} & 2020-09-25 & 2020-11-25 & 7 & 94 & 2493 & 0.104 & National & Low \\ +\hline +\href{https://dx.doi.org/10.1016/j.lana.2022.100195}{\textcolor{blue}{Colombia}} & 2020-09-21 & 2020-12-11 & 5 & 80 & 17863 & 0.3253 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.3390/pathogens10060774}{\textcolor{blue}{Croatia}} & 2020-12-15 & 2021-02-15 & & & 1436 & 0.251 & National & High \\ +\hline +\href{https://doi.org/10.1038/s43856-022-00080-0}{\textcolor{blue}{Czechia}} & 2021-02-01 & 2021-03-31 & 18 & & 19548 & 0.51 & National & High \\ +\hline +\href{https://dx.doi.org/10.1007/s10654-021-00796-8}{\textcolor{blue}{Denmark}} & 2020-12-01 & 2020-12-31 & 12 & & 4044 & 0.043 & National & Low \\ +\hline +\href{https://dx.doi.org/10.3389/fmed.2022.933260}{\textcolor{blue}{Ecuador}} & 2020-10-12 & 2020-10-19 & & & 1250 & 0.1168 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1186/s41182-022-00448-x}{\textcolor{blue}{Egypt}} & 2021-01-15 & 2021-06-15 & & & 2360 & 0.463 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1038/s41467-021-23233-6}{\textcolor{blue}{France}} & 2020-05-11 & 2020-05-17 & & & 3592 & 0.0493 & National & Low \\ +\hline +\href{https://www.ifo.de/en/publikationen/2020/monograph-authorship/die-deutschen-und-corona}{\textcolor{blue}{Germany}} & 2020-10-26 & 2020-11-18 & 18 & & 9929 & 0.011 & National & Moderate \\ +\hline +\href{https://revistas.ucr.ac.cr/index.php/psm/article/view/43261/46175}{\textcolor{blue}{Honduras}} & 2020-06-16 & 2020-06-23 & 5 & & 792 & 0.062 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1007/s11357-020-00226-9}{\textcolor{blue}{Hungary}} & 2020-05-01 & 2020-05-16 & 14 & & 10474 & 0.0068 & National & Low \\ +\hline +\href{http://dx.doi.org/10.1016/j.ijid.2021.05.040}{\textcolor{blue}{India}} & 2020-12-18 & 2021-01-06 & 10 & & 28598 & 0.241 & National & Low \\ +\hline +\href{https://dx.doi.org/10.1186/s12889-022-13464-7}{\textcolor{blue}{Iran}} & 2021-01-15 & 2021-03-15 & 10 & 90 & 7411 & 0.342 & National & Low \\ +\hline +\href{https://dx.doi.org/10.1007/s10654-021-00749-1}{\textcolor{blue}{Israel}} & 2020-06-28 & 2020-09-14 & & & 54357 & 0.046 & National & Moderate \\ +\hline +\href{https://www.istat.it/it/files//2020/08/ReportPrimiRisultatiIndagineSiero.pdf}{\textcolor{blue}{Italy}} & 2020-05-25 & 2020-07-15 & & & 64660 & 0.025 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.3201/eid2702.204088}{\textcolor{blue}{Japan}} & 2020-06-01 & 2020-06-07 & 20 & & 7950 & 0.001 & National & Moderate \\ +\hline +\href{http://dx.doi.org/10.1016/j.onehlt.2021.100292}{\textcolor{blue}{Jordan}} & 2020-12-27 & 2021-01-06 & & & 5044 & 0.342 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.3390/ijerph19042263}{\textcolor{blue}{Kazakhstan}} & 2020-07-16 & 2021-07-07 & & & 85346 & 0.63 & National & High \\ +\hline +\href{https://dx.doi.org/10.1001/jama.2021.15265}{\textcolor{blue}{Kenya}} & 2021-01-03 & 2021-03-15 & 16 & 64 & 3018 & 0.485 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1186/s12879-022-07031-z}{\textcolor{blue}{Lebanon}} & 2020-12-07 & 2021-01-15 & & & 2058 & 0.185 & National & Low \\ +\hline +\href{https://www.journals.vu.lt/AML/article/view/22344}{\textcolor{blue}{Lithuania}} & 2020-08-10 & 2020-09-10 & 18 & 92 & 3089 & 0.014 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.3201/eid2813.212348}{\textcolor{blue}{Malawi}} & 2020-10-14 & 2020-12-08 & & & 4261 & 0.078 & National & Low \\ +\hline +\href{https://saludpublica.mx/index.php/spm/article/view/12847}{\textcolor{blue}{Mexico}} & 2020-08-15 & 2020-11-15 & 3 & 12 & 944 & 0.187 & National & Low \\ +\hline +\href{https://mohp.gov.np/attachments/article/708/First%20Sero-prevalence\_final\_report\_04-04-2021.pdf}{\textcolor{blue}{Nepal}} & 2020-10-09 & 2020-10-22 & & & 3040 & 0.144 & National & Low \\ +\hline +\href{https://dx.doi.org/10.1001/jamanetworkopen.2022.36053}{\textcolor{blue}{Nigeria}} & 2021-06-29 & 2021-08-21 & & & 4904 & 0.789 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1111/irv.12932}{\textcolor{blue}{Norway}} & 2020-12-27 & 2021-02-13 & & & 1912 & 0.032 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1016/j.ijid.2021.09.062}{\textcolor{blue}{Oman}} & 2020-11-08 & 2020-11-19 & 5 & & 4064 & 0.22 & National & Moderate \\ +\hline +\href{https://bmjopen.bmj.com/content/12/4/e055381.abstract}{\textcolor{blue}{Pakistan}} & 2020-10-21 & 2020-11-08 & & & 4998 & 0.0702 & National & Moderate \\ +\hline +\href{https://wwwnc.cdc.gov/eid/article/27/11/21-0636\_article}{\textcolor{blue}{Portugal}} & 2020-09-08 & 2020-10-14 & & & 13398 & 0.022 & National & Moderate \\ +\hline +\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8702669/}{\textcolor{blue}{Senegal}} & 2020-06-15 & 2020-10-15 & & & 3231 & 0.204 & National & High \\ +\hline +\href{https://papers.ssrn.com/sol3/papers.cfm?abstract\_id=3826194}{\textcolor{blue}{Singapore}} & 2020-11-15 & 2020-12-15 & 23 & 83 & 937 & 0.0016 & National & High \\ +\hline +\href{https://dx.doi.org/10.1016/j.cmi.2021.03.009}{\textcolor{blue}{Slovenia}} & 2020-10-17 & 2020-11-10 & & 99 & 1211 & 0.0429 & National & Low \\ +\hline +\href{https://assets.researchsquare.com/files/rs-690372/v2\_covered.pdf?c=1627923426}{\textcolor{blue}{South Africa}} & 2021-01-15 & 2021-05-15 & 15 & 69 & 16762 & 0.474 & National & Moderate \\ +\hline +\href{https://bmjopen.bmj.com/content/11/4/e049837.abstract}{\textcolor{blue}{Rep. of Korea}} & 2020-09-24 & 2020-12-09 & 18 & 86 & 4085 & 0.0039 & National & Moderate \\ +\hline +\href{https://www.mscbs.gob.es/ciudadanos/ene-covid/docs/ESTUDIO\_ENE-COVID19\_INFORME\_FINAL.pdf}{\textcolor{blue}{Spain}} & 2020-06-08 & 2020-06-22 & & & 62167 & 0.052 & National & Low \\ +\hline +\href{https://www.folkhalsomyndigheten.se/contentassets/376f9021a4c84da08de18ac597284f0c/pavisning-antikroppar-genomgangen-covid-19-blodgivare-delrapport-2.pdf}{\textcolor{blue}{Sweden}} & 2020-11-23 & 2020-12-04 & & & 3183 & 0.07 & National & Moderate \\ +\hline +\href{https://dx.doi.org/10.1093/cid/ciab626}{\textcolor{blue}{USA}} & 2020-08-09 & 2020-12-08 & 18 & & 4654 & 0.0471 & National & Low \\ +\hline +\href{https://www.gov.uk/government/publications/national-covid-19-surveillance-reports}{\textcolor{blue}{UK}} & 2020-08-24 & 2020-09-18 & 17 & & 8230 & 0.061 & National & Low \\ +\hline +\href{https://www.thelancet.com/journals/langlo/article/PIIS2214-109X(21)00053-X/fulltext}{\textcolor{blue}{Zambia}} & 2020-07-04 & 2020-07-27 & & & 2704 & 0.021 & National & Low \\ +\hline + + \end{tabular} + \end{center} +\end{table} \ No newline at end of file diff --git a/docs/tex/user/rragonnet/sm_covid.pdf b/docs/tex/user/rragonnet/sm_covid.pdf index 136a22306..115a91fd6 100644 Binary files a/docs/tex/user/rragonnet/sm_covid.pdf and b/docs/tex/user/rragonnet/sm_covid.pdf differ diff --git a/docs/tex/user/rragonnet/sm_covid.tex b/docs/tex/user/rragonnet/sm_covid.tex index d45e6a6af..cb2d3b03a 100644 --- a/docs/tex/user/rragonnet/sm_covid.tex +++ b/docs/tex/user/rragonnet/sm_covid.tex @@ -48,8 +48,8 @@ \maketitle \tableofcontents \newpage - \input{../../tex_descriptions/models/sm_covid/model_description.tex} +\input{../../tex_descriptions/models/sm_covid/code.tex} \input{../../tex_descriptions/projects/sm_covid/calibration_description.tex} diff --git a/notebooks/assignment/modify_model.ipynb b/notebooks/assignment/modify_model.ipynb new file mode 100644 index 000000000..0777b141e --- /dev/null +++ b/notebooks/assignment/modify_model.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If we are running in google colab, pip install the required packages, \n", + "# but do not modify local environments\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except:\n", + " IN_COLAB = False\n", + "\n", + "if IN_COLAB:\n", + " !pip install summerepi2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from summer2 import CompartmentalModel\n", + "from summer2.parameters import Parameter as param" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set pandas plotting backend to plotly (interactive)\n", + "import pandas as pd\n", + "pd.options.plotting.backend = \"plotly\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def build_model():\n", + " # build an SIR compartmental model\n", + " m = CompartmentalModel(\n", + " times=[0, 100], \n", + " compartments=[\"S\",\"I\",\"R\"],\n", + " infectious_compartments= [\"I\"]\n", + " )\n", + " \n", + " # define starting population distribution\n", + " m.set_initial_population({\"S\": 900, \"I\": 1})\n", + " \n", + " # add flows\n", + " m.add_infection_frequency_flow(\n", + " name=\"infection\", \n", + " contact_rate=param(\"contact_rate\"), \n", + " source=\"S\", \n", + " dest=\"I\"\n", + " )\n", + " m.add_transition_flow(\n", + " name=\"recovery\", \n", + " fractional_rate=param(\"recovery_rate\"), \n", + " source=\"I\", \n", + " dest=\"R\"\n", + " )\n", + "\n", + " # track disease incidence\n", + " m.request_output_for_flow(name=\"incidence\", flow_name=\"infection\")\n", + "\n", + " return m" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sir_model = build_model()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parameters = {\n", + " \"contact_rate\": .25,\n", + " \"recovery_rate\": .1\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sir_model.run(parameters)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sir_model.get_derived_outputs_df().plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment\n", + "Modify the model above in order to capture the following features:\n", + "- An incubation period that has an average duration of 3 days\n", + "- Waning immunity, such that recovered individuals lose their full protection against infection after 30 days (in average), and then become partially protected against reinfection (50% reduction compared to infection-naive individuals)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "summer2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/user/rragonnet/project_specific/School_Closure/country_inclusion.ipynb b/notebooks/user/rragonnet/project_specific/School_Closure/country_inclusion.ipynb index 8d360f488..35f391856 100644 --- a/notebooks/user/rragonnet/project_specific/School_Closure/country_inclusion.ipynb +++ b/notebooks/user/rragonnet/project_specific/School_Closure/country_inclusion.ipynb @@ -164,15 +164,6 @@ "## UNESCO (N=210)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_db.table_names()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -298,6 +289,7 @@ " \"221212_USA_RTIInternational_Guatemala\", # pregnant women\n", " \"220523_Bangladesh_icddrb\", # actually subnational\n", " \"221212_USA_RTIInternational_Bangladesh\", # pregnant women\n", + " \"210727_Canada_CBS\", # cannot find reports of this study. Also, no URL in SeroTracker \n", "]\n", "redflag_filter = ~sero_data[\"study_name\"].isin(red_flags)" ] diff --git a/notebooks/user/rragonnet/project_specific/School_Closure/make_serotable.ipynb b/notebooks/user/rragonnet/project_specific/School_Closure/make_serotable.ipynb index df76ce189..61ff5e75b 100644 --- a/notebooks/user/rragonnet/project_specific/School_Closure/make_serotable.ipynb +++ b/notebooks/user/rragonnet/project_specific/School_Closure/make_serotable.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -46,18 +46,23 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "def make_href_code(url):\n", + " if type(url) == float: # url is nan \n", + " return \"\\href{https://serotracker.com/en/Explore}\"\n", + " else:\n", + " return \"\\href{\" + url + \"}\"\n", + "\n", "def make_latex_table(level):\n", " df = copy(dfs[level])\n", - " df[\"url\"] = df[\"url\"].transform(lambda s: \"\\href{\" + s + \"}\")\n", + " df[\"url\"] = df[\"url\"].transform(make_href_code)\n", " df[\"country\"] = df[\"country\"].transform(lambda s: \"{\\\\textcolor{blue}{\" + s + \"}}\")\n", "\n", - " # df[\"country\"] = \"\\\\textcolor{blue}{\" + df[\"url\"] + df[\"country\"] + \"}\"\n", " df[\"country\"] = df[\"url\"] + df[\"country\"]\n", - " # df[\"country\"] = df[\"country\"].transform(lambda s: \"\\\\underline{\" + s + \"}\")\n", + "\n", "\n", " df = df[columns]\n", " df['serum_pos_prevalence'] = round(df['serum_pos_prevalence'], 4)\n", @@ -100,101 +105,22 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\\textbf{country} & \\textbf{sampling start date} & \\textbf{sampling end date} & \\textbf{age min} & \\textbf{age max} & \\textbf{denom. value} & \\textbf{serum pos prevalence} & \\textbf{estimate grade} & \\textbf{overall risk of bias} \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.5694/mja2.51542}{\\textcolor{blue}{Australia}} & 2020-11-03 & 2021-03-12 & & 19 & 1685 & 0.0023 & National & High \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1007/s15010-021-01639-0}{\\textcolor{blue}{Austria}} & 2020-06-05 & 2020-12-04 & 18 & 72 & 20228 & 0.025 & National & High \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1371/journal.pone.0268093}{\\textcolor{blue}{Bangladesh}} & 2020-10-15 & 2021-02-15 & 10 & & 3220 & 0.673 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.2807/1560-7917.ES.2022.27.9.2100419}{\\textcolor{blue}{Belgium}} & 2020-05-18 & 2020-05-25 & & 101 & 3242 & 0.062 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8225319/}{\\textcolor{blue}{Brazil}} & 2020-05-14 & 2020-06-23 & & & 89362 & 0.023 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{http://dx.doi.org/10.1111/trf.16296}{\\textcolor{blue}{Canada}} & 2020-05-09 & 2020-07-21 & 17 & & 74642 & 0.007 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1186/s12879-022-07045-7}{\\textcolor{blue}{Chile}} & 2020-09-25 & 2020-11-25 & 7 & 94 & 2493 & 0.104 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1016/j.lana.2022.100195}{\\textcolor{blue}{Colombia}} & 2020-09-21 & 2020-12-11 & 5 & 80 & 17863 & 0.3253 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.3390/pathogens10060774}{\\textcolor{blue}{Croatia}} & 2020-12-15 & 2021-02-15 & & & 1436 & 0.251 & National & High \\\\ \n", - "\\hline \n", - "\\href{https://doi.org/10.1038/s43856-022-00080-0}{\\textcolor{blue}{Czechia}} & 2021-02-01 & 2021-03-31 & 18 & & 19548 & 0.51 & National & High \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1007/s10654-021-00796-8}{\\textcolor{blue}{Denmark}} & 2020-08-15 & 2020-09-30 & 12 & & 11478 & 0.02 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.3389/fmed.2022.933260}{\\textcolor{blue}{Ecuador}} & 2020-10-12 & 2020-10-19 & & & 1250 & 0.1168 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1080/23744235.2021.1974540}{\\textcolor{blue}{Estonia}} & 2021-02-08 & 2021-03-25 & & & 2517 & 0.201 & National & High \\\\ \n", - "\\hline \n", - "\\href{http://dx.doi.org/10.1016/j.ijid.2021.08.028}{\\textcolor{blue}{Ethiopia}} & 2020-06-24 & 2020-07-08 & 15 & & 16932 & 0.035 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1038/s41467-021-23233-6}{\\textcolor{blue}{France}} & 2020-05-11 & 2020-05-17 & & & 3592 & 0.0493 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://revistas.ucr.ac.cr/index.php/psm/article/view/43261/46175}{\\textcolor{blue}{Honduras}} & 2020-06-16 & 2020-06-23 & 5 & & 792 & 0.062 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1007/s11357-020-00226-9}{\\textcolor{blue}{Hungary}} & 2020-05-01 & 2020-05-16 & 14 & & 10474 & 0.0068 & National & Low \\\\ \n", - "\\hline \n", - "\\href{http://dx.doi.org/10.1016/j.cmi.2021.06.002}{\\textcolor{blue}{Iran}} & 2020-08-03 & 2020-10-31 & 6 & 109 & 11256 & 0.142 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1007/s10654-021-00749-1}{\\textcolor{blue}{Israel}} & 2020-06-28 & 2020-09-14 & & & 54357 & 0.046 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://www.istat.it/it/files//2020/08/ReportPrimiRisultatiIndagineSiero.pdf}{\\textcolor{blue}{Italy}} & 2020-05-25 & 2020-07-15 & & & 64660 & 0.025 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.3201/eid2702.204088}{\\textcolor{blue}{Japan}} & 2020-06-01 & 2020-06-07 & 20 & & 7950 & 0.001 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{http://dx.doi.org/10.1016/j.onehlt.2021.100292}{\\textcolor{blue}{Jordan}} & 2020-10-01 & 2020-10-31 & & & 5470 & 0.07 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1001/jama.2021.15265}{\\textcolor{blue}{Kenya}} & 2021-01-03 & 2021-03-15 & 16 & 64 & 3018 & 0.485 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1186/s12879-022-07031-z}{\\textcolor{blue}{Lebanon}} & 2020-12-07 & 2021-01-15 & & & 2058 & 0.185 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://www.journals.vu.lt/AML/article/view/22344}{\\textcolor{blue}{Lithuania}} & 2020-08-10 & 2020-09-10 & 18 & 92 & 3089 & 0.014 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.3201/eid2813.212348}{\\textcolor{blue}{Malawi}} & 2020-10-14 & 2020-12-08 & & & 4261 & 0.078 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1016/j.lanwpc.2021.100317}{\\textcolor{blue}{Mongolia}} & 2020-10-13 & 2020-12-04 & & & 5000 & 0.0136 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://mohp.gov.np/attachments/article/708/First%20Sero-prevalence\\_final\\_report\\_04-04-2021.pdf}{\\textcolor{blue}{Nepal}} & 2020-10-09 & 2020-10-22 & & & 3040 & 0.144 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1111/irv.12932}{\\textcolor{blue}{Norway}} & 2020-12-27 & 2021-02-13 & & & 1912 & 0.032 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1016/j.ijid.2021.09.062}{\\textcolor{blue}{Oman}} & 2020-09-13 & 2020-09-24 & 5 & & 4780 & 0.164 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1007/s15010-021-01629-2}{\\textcolor{blue}{Pakistan}} & 2020-07-15 & 2020-07-31 & & & 15390 & 0.424 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://europepmc.org/article/PPR/PPR359402}{\\textcolor{blue}{Portugal}} & 2021-03-01 & 2021-03-17 & & & 2435 & 0.173 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://papers.ssrn.com/sol3/papers.cfm?abstract\\_id=3826194}{\\textcolor{blue}{Singapore}} & 2020-11-15 & 2020-12-15 & 23 & 83 & 937 & 0.0016 & National & High \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1016/j.cmi.2021.03.009}{\\textcolor{blue}{Slovenia}} & 2020-10-17 & 2020-11-10 & & 99 & 1211 & 0.0429 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://dx.doi.org/10.1093/cid/ciac198}{\\textcolor{blue}{South Africa}} & 2020-11-15 & 2021-04-15 & & & 7577 & 0.4523 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://bmjopen.bmj.com/content/11/4/e049837.abstract}{\\textcolor{blue}{Rep. of Korea}} & 2020-09-24 & 2020-12-09 & 18 & 86 & 4085 & 0.0039 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://www.mscbs.gob.es/ciudadanos/ene-covid/docs/ESTUDIO\\_ENE-COVID19\\_INFORME\\_FINAL.pdf}{\\textcolor{blue}{Spain}} & 2020-06-08 & 2020-06-22 & & & 62167 & 0.052 & National & Low \\\\ \n", - "\\hline \n", - "\\href{https://www.folkhalsomyndigheten.se/contentassets/376f9021a4c84da08de18ac597284f0c/pavisning-antikroppar-genomgangen-covid-19-blodgivare-delrapport-2.pdf}{\\textcolor{blue}{Sweden}} & 2020-11-23 & 2020-12-04 & & & 3183 & 0.07 & National & Moderate \\\\ \n", - "\\hline \n", - "\\href{https://www.thelancet.com/journals/langlo/article/PIIS2214-109X(21)00053-X/fulltext}{\\textcolor{blue}{Zambia}} & 2020-07-04 & 2020-07-27 & & & 2704 & 0.021 & National & Low \\\\ \n", - "\\hline \n", - "\n" - ] - } - ], + "outputs": [], "source": [ "s = make_latex_table(\"national\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(dfs['national']['url'][3]) == float" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/notebooks/user/rragonnet/utils/sojourn_distribution.ipynb b/notebooks/user/rragonnet/utils/sojourn_distribution.ipynb new file mode 100644 index 000000000..2d5d92be6 --- /dev/null +++ b/notebooks/user/rragonnet/utils/sojourn_distribution.ipynb @@ -0,0 +1,81 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import stats\n", + "from numpy import linspace\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Average sojourn time\n", + "mean_period = 5.\n", + "\n", + "# number of serial compartments to compare (list can be extended)\n", + "n_comps_to_compare = [1, 4]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(1, len(n_comps_to_compare), figsize=(5.*len(n_comps_to_compare), 4))\n", + "x_min, x_max = 0., 14\n", + "x = linspace(x_min, x_max, 1000)\n", + "\n", + "for i, n_comps in enumerate(n_comps_to_compare):\n", + " ax = axes[i]\n", + " distri = stats.gamma(a=n_comps, scale=mean_period / n_comps)\n", + " ax.fill_between(x, distri.pdf(x), alpha=.5, color='blue')\n", + " ax.set_xlabel(\"days\")\n", + " ax.set_title(f\"{n_comps} compartment(s)\")\n", + "\n", + " # Print some percentages\n", + " print(f\"Using {n_comps} compartment(s):\")\n", + " for quantile in [7., 10., 14.]:\n", + " perc_greater = round(100* (1 - distri.cdf(quantile)))\n", + " print(f\"\\t{perc_greater}% have sojourn time > {quantile} days\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "summer2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}