The goal of mrsim
is to simulate mendelian randomization (MR) data
using meaningful hyper parameters. You can check the theory behind the
package at vignette("data_generating_process")
.
You can install the development version of mrsim
from
GitHub with
# install.packages("devtools")
devtools::install_github("GiuseppeTT/mrsim")
This is a basic example which shows you how to use mrsim
to simulate
MR data and estimate the causal effect of the exposure (X) on the
outcome (Y) with the MendelianRandomization
package.
# install.packages("MendelianRandomization")
library(mrsim)
set.seed(42)
hyper_parameters <- define_hyper_parameters(
d = 1e3,
s = 50 / 100,
p = 25 / 100,
r2_g_x = 0.01 / 100,
r2_u_x = 30 / 100,
r2_u_y = 30 / 100,
beta_x_y = 20 / 100
)
print(hyper_parameters)
#> # Hyper parameters
#>
#> (Dimension) Number of G's (d): 1000
#> (Sparsity) Proportion of zero effect G's (s): 0.5
#> (Skewness) Minor allele frequency of G' (p): 0.25
#> (Instrument strength) Variance in X explained per non-zero effect G (r2_g_x): 1e-04
#> (Confouding level) Variance in X explained by U (r2_u_x): 0.3
#> (Confouding level) Variance in Y explained by U (r2_u_y): 0.3
#> (Target causal effect) Causal effect of X on Y (beta_x_y): 0.2
restrictions <- define_restrictions()
print(restrictions)
#> # Restrictions
#>
#> G mean (mean_g): 0
#> G variance (var_g): 1
#> U mean (mean_u): 0
#> U variance (var_u): 1
#> X mean (mean_x): 0
#> X variance (var_X): 1
#> Y mean (mean_y): 0
#> Y variance (var_Y): 1
parameters <- calculate_parameters(hyper_parameters, restrictions)
print(parameters)
#> # Parameters
#>
#> Number of non-zero effect G's (m): 500
#> Number of zero effect G's (k): 500
#> Minor allele frequency of G's (p): 0.25
#> U intercept (alpha_u): 0
#> U noise variance (sigma2_u): 1
#> X intercept (alpha_x): 0
#> Causal effect of G's on X (beta_g_x): [0.01, ..., 0] (1000 sized vector)
#> Causal effect of U on X (beta_u_x): 0.5477226
#> X noise variance (sigma2_x): 0.65
#> Y intercept (alpha_y): 0
#> Causal effect of U on Y (beta_u_y): 0.438178
#> Causal effect of X on Y (beta_x_y): 0.2
#> Y noise variance (sigma2_y): 0.672
sample <- generate_sample(parameters, n = 10e3)
print(sample)
#> # Sample
#>
#> Sample size (n): 10000
#>
#> ## Exogenous variables
#>
#> Raw G data (gprime): [[1, ..., 0]] (10000 x 1000 sized matrix)
#> U noise (e_u): [[-1.19680097216348, ..., -0.890468699587795]] (10000 x 1 sized matrix)
#> X noise (e_x): [[0.460947832126287, ..., -2.41969073735133]] (10000 x 1 sized matrix)
#> Y noise (e_y): [[0.527904124270486, ..., 0.388857577263968]] (10000 x 1 sized matrix)
#>
#> ## Endogenous variables
#>
#> G data (g): [[0.816496580927726, ..., -0.816496580927726]] (10000 x 1000 sized matrix)
#> U data (u): [[-1.19680097216348, ..., -0.890468699587795]] (10000 x 1 sized matrix)
#> X data (x): [[-0.545165772082768, ..., -2.1609379955708]] (10000 x 1 sized matrix)
#> Y data (y): [[-0.20069246021126, ..., -0.503603077991844]] (10000 x 1 sized matrix)
summary_statistics <- calculate_summary_statistics(sample)
print(head(summary_statistics))
#> snp n beta_g_x beta_se_g_x p_value_g_x f_statistic_g_x r2_g_x
#> 1 1 10000 0.010238784 0.009974030 0.3046599 1.05379325 1.053893e-04
#> 2 2 10000 -0.001569459 0.009940290 0.8745478 0.02492881 2.493374e-06
#> 3 3 10000 -0.007923938 0.010041294 0.4300517 0.62273429 6.228201e-05
#> 4 4 10000 0.006464692 0.009949239 0.5158574 0.42219769 4.222643e-05
#> 5 5 10000 0.003288470 0.009979881 0.7417772 0.10857682 1.085974e-05
#> 6 6 10000 0.009602536 0.010089301 0.3412446 0.90583630 9.059354e-05
#> beta_g_y beta_se_g_y p_value_g_y f_statistic_g_y r2_g_y
#> 1 -0.0110235115 0.010005966 0.27062129 1.21372946 1.213825e-04
#> 2 0.0015893525 0.009972198 0.87337401 0.02540146 2.540648e-06
#> 3 -0.0007303149 0.010073837 0.94220848 0.00525570 5.256748e-07
#> 4 0.0078155899 0.009981081 0.43362190 0.61315236 6.132374e-05
#> 5 -0.0094842732 0.010011520 0.34349052 0.89744539 8.975444e-05
#> 6 0.0176732200 0.010120602 0.08079619 3.04943011 3.049110e-04
filtered_summary_statistics <- summary_statistics[summary_statistics$f_statistic_g_x > 10, ]
print(head(filtered_summary_statistics))
#> snp n beta_g_x beta_se_g_x p_value_g_x f_statistic_g_x r2_g_x
#> 15 15 10000 0.03241914 0.009957096 1.134168e-03 10.60077 0.001059166
#> 44 44 10000 0.03422637 0.009885209 5.376580e-04 11.98809 0.001197612
#> 228 228 10000 0.03357739 0.009982621 7.722630e-04 11.31370 0.001130317
#> 244 244 10000 0.03956636 0.009937673 6.898075e-05 15.85195 0.001583002
#> 257 257 10000 0.03805379 0.010059046 1.558269e-04 14.31141 0.001429381
#> 259 259 10000 0.03705894 0.009865990 1.734711e-04 14.10927 0.001409221
#> beta_g_y beta_se_g_y p_value_g_y f_statistic_g_y r2_g_y
#> 15 0.010508500 0.009993799 0.2930535 1.1056565 1.105755e-04
#> 44 0.009588403 0.009922421 0.3338989 0.9338074 9.339069e-05
#> 228 0.009723977 0.010019857 0.3318355 0.9418132 9.419128e-05
#> 244 0.013624680 0.009976543 0.1720728 1.8650588 1.865084e-04
#> 257 0.005742709 0.010098391 0.5695891 0.3233920 3.234462e-05
#> 259 0.016103133 0.009903332 0.1039744 2.6439796 2.643809e-04
mr_data <- MendelianRandomization::mr_input(
bx = filtered_summary_statistics$beta_g_x,
bxse = filtered_summary_statistics$beta_se_g_x,
by = filtered_summary_statistics$beta_g_y,
byse = filtered_summary_statistics$beta_se_g_y
)
model_fit <- MendelianRandomization::mr_ivw(mr_data)
estimated_beta_x_y <- model_fit@Estimate
print(estimated_beta_x_y)
#> [1] 0.316981
real_beta_x_y <- get_beta_x_y(parameters)
print(real_beta_x_y)
#> [1] 0.2