diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..2eed539 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,93 @@ + + +
+ + + + +To install this github version type (in R):
+##if devtools is not installed yet:
+## install.packages("devtools")
+library(devtools)
+install_github("livioivil/flipscores")
J Hemerik, JJ Goeman and L Finos (2019) Robust testing in generalized linear models by sign-flipping score contributions. Journal of the Royal Statistical Society Series B: Statistical Methodology, Volume 82, Issue 3, July 2020, Pages 841–864.
https://doi.org/10.1111/rssb.12369
R De Santis, J Goeman, J Hemerik, L Finos (2022) Inference in generalized linear models with robustness to misspecified variances arXiv: 2209.13918.
https://arxiv.org/abs/2209.13918
+library(flipscores)
+set.seed(1)
+dt=data.frame(X=rnorm(20),
+ Z=factor(rep(LETTERS[1:3],length.out=20)))
+dt$Y=rpois(n=20,lambda=exp(dt$X))
+mod=flipscores(Y~Z+X,data=dt,family="poisson",x=TRUE)
+summary(mod)
+#>
+#> Call:
+#> flipscores(formula = Y ~ Z + X, family = "poisson", data = dt,
+#> x = TRUE)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -1.6910 -0.5792 0.1012 0.4900 1.0440
+#>
+#> Coefficients:
+#> Estimate Score Std. Error z value Part. Cor Pr(>|z|)
+#> (Intercept) -0.1026 -0.7229 2.7127 -0.2665 -0.088 0.7460
+#> ZB -0.1501 -0.7125 2.1789 -0.3270 -0.104 0.6564
+#> ZC 0.1633 0.8106 2.2232 0.3646 0.117 0.6924
+#> X 0.9439 16.2062 4.7272 3.4283 0.671 0.0098 **
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for poisson family taken to be 1)
+#>
+#> Null deviance: 27.135 on 19 degrees of freedom
+#> Residual deviance: 12.888 on 16 degrees of freedom
+#> AIC: 57.459
+#>
+#> Number of Fisher Scoring iterations: 5
+
+# Anova test
+anova(mod)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model: Y ~ Z + X
+#> Df Score Pr(>Score)
+#> Z 2 0.77184 0.6984
+#> X 1 0.02977 0.0098 **
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+# or
+mod0=flipscores(Y~Z,data=dt,family="poisson",x=TRUE)
+anova(mod0,mod)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model 1: Y ~ Z
+#> Model 2: Y ~ Z + X
+#> Df Score Pr(>Score)
+#> Model 2 vs Model 1 1 0.029586 0.0108 *
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+# and
+mod0=flipscores(Y~X,data=dt,family="poisson")
+anova(mod0,mod)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model 1: Y ~ X
+#> Model 2: Y ~ Z + X
+#> Df Score Pr(>Score)
+#> Model 2 vs Model 1 2 1.4245 0.5244
+set.seed(1)
+D=data.frame(x=(1:40)/20, z=rnorm(40))
+D$y=rnbinom(40,mu=exp(D$x),size=3)
+
+library(MASS)
+mod_par=glm.nb(y~x+z,data=D, link="log")
+summary(mod_par)
+#>
+#> Call:
+#> glm.nb(formula = y ~ x + z, data = D, link = "log", init.theta = 7.972747099)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -2.0746 -0.7748 -0.1086 0.4617 2.0435
+#>
+#> Coefficients:
+#> Estimate Std. Error z value Pr(>|z|)
+#> (Intercept) -0.15365 0.29358 -0.523 0.601
+#> x 0.92089 0.21481 4.287 1.81e-05 ***
+#> z -0.01282 0.13606 -0.094 0.925
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for Negative Binomial(7.9727) family taken to be 1)
+#>
+#> Null deviance: 61.217 on 39 degrees of freedom
+#> Residual deviance: 41.312 on 37 degrees of freedom
+#> AIC: 154.29
+#>
+#> Number of Fisher Scoring iterations: 1
+#>
+#>
+#> Theta: 7.97
+#> Std. Err.: 6.95
+#>
+#> 2 x log-likelihood: -146.286
+mod=flipscores(y~x+z, data=D, family = "negbinom")
+summary(mod)
+#>
+#> Call:
+#> flipscores(formula = y ~ x + z, family = "negbinom", data = D)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -2.0746 -0.7748 -0.1086 0.4617 2.0435
+#>
+#> Coefficients:
+#> Estimate Score Std. Error z value Part. Cor Pr(>|t|)
+#> (Intercept) -0.15365 -1.84162 3.42399 -0.53786 -0.087 0.5808
+#> x 0.92089 14.93128 4.42610 3.37346 0.547 0.0014 **
+#> z -0.01282 -0.72457 7.24491 -0.10001 -0.016 0.9612
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for Negative Binomial(7.9727) family taken to be 0.9960228)
+#>
+#> Null deviance: 61.217 on 39 degrees of freedom
+#> Residual deviance: 41.312 on 37 degrees of freedom
+#> AIC: 154.29
+#>
+#> Number of Fisher Scoring iterations: 1
anova.flipscores.Rd
This is the anova
method for flipscores
object. Remark: it performs type III deviance decomposition as in car::Anova
.
# S3 method for flipscores
+anova(object, model1 = NULL, score_type = NULL, n_flips = 5000, id = NULL, ...)
(the object) glm
(or flipscores
) object with the model under the null hypothesis (i.e. the covariates, the nuisance parameters).
a glm
(or flipscores
) or a matrix
(or vector
). If it is a glm
object, it has the model under the alternative hypothesis. The variables in model1
are the same variables in object
plus one or more variables to be tested. Alternatively, if
+model1
is a matrix
, it contains the tested variables column-wise.
The type of score that is computed. It can be "orthogonalized", "effective" or "basic".
+Default is "orthogonalized". "effective" and "orthogonalized" take into account the nuisance estimation. The default is NULL
, in this case the value is taken from object
.
The number of random flips of the score contributions.
+When n_flips
is equal or larger than the maximum number of possible flips (i.e. n^2), all possible flips are performed.
+Default is 5000.
a vector
identifying the clustered observations. If NULL
(default) observations are assumed to be independent. NOTE: if object
is a flipscores
and model$flip_param_call$id
is not NULL
, this is considered in the inference.
other parameters allowed in stats::anova
.
set.seed(1)
+dt=data.frame(X=scale(rnorm(50)),
+ Z=factor(rep(LETTERS[1:3],length.out=50)))
+dt$Y=rpois(n=nrow(dt),lambda=exp(dt$X*(dt$Z=="C")))
+mod0=flipscores(Y~Z+X,data=dt,family="poisson")
+summary(mod0)
+#>
+#> Call:
+#> flipscores(formula = Y ~ Z + X, family = "poisson", data = dt)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -2.0810 -1.0541 -0.1611 0.3069 2.9506
+#>
+#> Coefficients:
+#> Estimate Score Std. Error z value Part. Cor Pr(>|z|)
+#> (Intercept) -0.3513 -5.4234 4.2404 -1.2790 -0.169 0.1136
+#> ZB 0.2276 1.8049 2.8140 0.6414 0.079 0.4564
+#> ZC 0.9034 10.3392 3.4527 2.9945 0.348 0.0218 *
+#> X 0.6381 33.1170 8.0470 4.1155 0.478 0.0092 **
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for poisson family taken to be 1)
+#>
+#> Null deviance: 82.225 on 49 degrees of freedom
+#> Residual deviance: 52.798 on 46 degrees of freedom
+#> AIC: 143.12
+#>
+#> Number of Fisher Scoring iterations: 5
+#>
+anova(mod0)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model: Y ~ Z + X
+#> Df Score Pr(>Score)
+#> Z 2 4.4082 0.0964 .
+#> X 1 0.0341 0.0092 **
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+mod1=flipscores(Y~Z*X,data=dt,family="poisson")
+summary(mod1)
+#>
+#> Call:
+#> flipscores(formula = Y ~ Z * X, family = "poisson", data = dt)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -1.94518 -0.88566 -0.00768 0.56160 1.70791
+#>
+#> Coefficients:
+#> Estimate Score Std. Error z value Part. Cor Pr(>|z|)
+#> (Intercept) -0.3549 -4.3865 3.8867 -1.1286 -0.189 0.1008
+#> ZB 0.3570 2.3488 2.6093 0.9002 0.145 0.3038
+#> ZC 0.6023 3.3952 2.3099 1.4698 0.235 0.0716 .
+#> X 0.6439 8.1773 3.6816 2.2211 0.331 0.1260
+#> ZB:X -0.5979 -3.8934 2.5112 -1.5504 -0.238 0.3410
+#> ZC:X 0.4856 2.8440 2.4290 1.1709 0.184 0.2216
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for poisson family taken to be 1)
+#>
+#> Null deviance: 82.225 on 49 degrees of freedom
+#> Residual deviance: 44.750 on 44 degrees of freedom
+#> AIC: 139.07
+#>
+#> Number of Fisher Scoring iterations: 5
+#>
+anova(mod0,model1 = mod1)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model 1: Y ~ Z + X
+#> Model 2: Y ~ Z * X
+#> Df Score Pr(>Score)
+#> Model 2 vs Model 1 2 4.589 0.0628 .
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+
+
compute_scores.Rd
Same usage as anova.glm
.
+The parameter id
is used too,
+if present in model0
(with priority) or in model1
.
compute_scores(model0, model1, score_type = "standardized")
a glm
object with the model under the null hypothesis (i.e. the covariates, the nuisance parameters).
a glm
or a matrix
(or vector
). If it is a glm
object, it has the model under the alternative hypothesis. The variables in model1
are the same variables in model0
plus one or more variables to be tested. Alternatively, if
+model1
is a matrix
, it contains the tested variables column-wise.
The type of score that is computed. It is "orthogonalized", "effective" or "basic". +"effective" and "orthogonalized" take into account the nuisance estimation.
flipscores-method.Rd
Methods for flipscores
objects.
+The following are methods to extract and manipulate relevant information from
+a flipscores
object.
a flipscores object
additional arguments to be passed
a flipscores object
flipscores-package.Rd
It provides robust tests for testing in GLMs, by sign-flipping score contributions. The tests are often robust against overdispersion, heteroscedasticity and, in some cases, ignored nuisance variables.
+set.seed(1)
+dt=data.frame(X=rnorm(20),
+ Z=factor(rep(LETTERS[1:3],length.out=20)))
+dt$Y=rpois(n=20,lambda=exp(dt$X))
+mod=flipscores(Y~Z+X,data=dt,family="poisson",x=TRUE)
+summary(mod)
+#>
+#> Call:
+#> flipscores(formula = Y ~ Z + X, family = "poisson", data = dt,
+#> x = TRUE)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -1.6910 -0.5792 0.1012 0.4900 1.0440
+#>
+#> Coefficients:
+#> Estimate Score Std. Error z value Part. Cor Pr(>|z|)
+#> (Intercept) -0.1026 -0.7229 2.7127 -0.2665 -0.088 0.7472
+#> ZB -0.1501 -0.7125 2.1789 -0.3270 -0.104 0.6514
+#> ZC 0.1633 0.8106 2.2232 0.3646 0.117 0.6964
+#> X 0.9439 16.2062 4.7272 3.4283 0.671 0.0104 *
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for poisson family taken to be 1)
+#>
+#> Null deviance: 27.135 on 19 degrees of freedom
+#> Residual deviance: 12.888 on 16 degrees of freedom
+#> AIC: 57.459
+#>
+#> Number of Fisher Scoring iterations: 5
+#>
+
+# Anova test
+anova(mod)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model: Y ~ Z + X
+#> Df Score Pr(>Score)
+#> Z 2 0.72827 0.7152
+#> X 1 0.02949 0.0104 *
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+# or
+mod0=flipscores(Y~Z,data=dt,family="poisson",x=TRUE)
+anova(mod0,mod)
+#> Analysis of Deviance Table (Type III test)
+#>
+#> Model: poisson, link: log
+#>
+#> Inference is provided by FlipScores approach (5000 sign flips).
+#>
+#> Model 1: Y ~ Z
+#> Model 2: Y ~ Z + X
+#> Df Score Pr(>Score)
+#> Model 2 vs Model 1 1 0.029203 0.0098 **
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+# and
+mod0=flipscores(Y~X,data=dt,family="poisson")
+anova(mod0,mod)
+#> Error in model.frame.default(formula = Y ~ X, data = dt, drop.unused.levels = TRUE): 'data' must be a data.frame, environment, or list
+
flipscores.Rd
Provides robust tests for testing in GLMs, by sign-flipping score contributions. The tests are often robust against overdispersion, heteroscedasticity and, in some cases, ignored nuisance variables.
+flipscores(formula, family, data, score_type = "standardized",
+n_flips = 5000, alternative = "two.sided", id = NULL,
+seed = NULL, to_be_tested = NULL, flips = NULL,
+precompute_flips = TRUE, output_flips = FALSE, ...)
see glm
function. It can also be a model (usually generated by a call to glm
); in this case, any other glm-related parameter (e.g. family, data, etc.
) are discarded, the function will make use of the ones used to generate the model.
+(i.e. formula
, family
, data
, etc) are not considered. It is NULL
by default (i.e. not used).
see glm
function.
see glm
function.
The type of score that is computed. It can be "standardized" "orthogonalized", "effective" or "basic". +Both "orthogonalized" and "effective" take into account the nuisance estimation and they provide the same +test statistic. In case of small samples "effective score" might have a slight anti-conservative behaviour. +"standardized effective score" gives a solution for this issue. +"orthogonalized" has a similar intent, note however that in case of a big model matrix, it may be slow.
The number of random flips of the score contributions. Overwritten with the nrow(flips)
when flips
is not NULL
(see parameter flips
for more details).
+When n_flips
is equal or larger than the maximum number of possible flips (i.e. n^2), all possible flips are performed.
It can be "greater", "less" or "two.sided" (default)
a vector
identifying the clustered observations. If NULL
(default) observations are assumed to be independent. If id
is not NULL
, only score_type=="effective"
is allowed, yet.
NULL
by default.
vector of indices or names of coefficients of the glm model to be tested (it is faster than computing every scores and p-values of course).
matrix fo +1 or -1, the matrix has n_flips
rows and n (number of observations) columns
TRUE
by default. Overwritten if flips
is not NULL
. If FALSE
the matrix of flips is not computed and the flips are made 'on-the-fly' before computing the test statistics; it may be usefull when flips
is very large (see parameter flips
for more details).
FALSE
by default. If TRUE
the flips
matrix is returned. Useful when the same flips are needed for more glms, for example in the case of multivariate glms where the joint distribution of test statistis if used for multivariate inference.
see glm
function.
an object of class flipscores
.
+See also its methods (summary.flipscores
, anova.flipscores
, print.flipscores
).
flipscores
borrows the same parameters from function glm
(and glm.nb
). See these helps for more details about parameters such as formula
,
+data
, family
. Note: in order to use Negative Binomial family, family
reference must have quotes (i.e. family="negbinom"
).
+ Furthermore, flipscores
object contains two extra elements: scores
-- i.e. a matrix of n score contributions, one column for each tested coefficient -- and Tspace
-- i.e. a matrix of size n_flips
times ncol(scores)
. The fist row of Tspace
contains column-wise the test statistics generated by randomly flipping the score contributions, each column refers to the same column of scores
, the vector of observed test statistics (i.e. no flips) is in the first row of Tspace
.
"Robust testing in generalized linear models by sign-flipping score contributions" by J.Hemerik, J.Goeman and L.Finos.
+set.seed(1)
+dt=data.frame(X=rnorm(20),
+ Z=factor(rep(LETTERS[1:3],length.out=20)))
+dt$Y=rpois(n=20,lambda=exp(dt$Z=="C"))
+mod=flipscores(Y~Z+X,data=dt,family="poisson",n_flips=1000)
+summary(mod)
+#>
+#> Call:
+#> flipscores(formula = Y ~ Z + X, family = "poisson", data = dt,
+#> n_flips = 1000)
+#>
+#> Deviance Residuals:
+#> Min 1Q Median 3Q Max
+#> -1.4796 -0.4363 0.1579 0.3358 0.9899
+#>
+#> Coefficients:
+#> Estimate Score Std. Error z value Part. Cor Pr(>|z|)
+#> (Intercept) -0.14256 -0.91360 2.62144 -0.34851 -0.127 0.744
+#> ZB -0.18558 -0.50868 1.65785 -0.30683 -0.108 0.657
+#> ZC 1.40981 8.55380 2.58950 3.30326 0.765 0.008 **
+#> X -0.06964 -1.56935 4.70999 -0.33320 -0.117 0.665
+#> ---
+#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#>
+#> (Dispersion parameter for poisson family taken to be 1)
+#>
+#> Null deviance: 28.649 on 19 degrees of freedom
+#> Residual deviance: 11.218 on 16 degrees of freedom
+#> AIC: 58.102
+#>
+#> Number of Fisher Scoring iterations: 5
+#>
+
+# Equivalent to:
+model=glm(Y~Z+X,data=dt,family="poisson")
+mod2=flipscores(model)
+#> Error in model.frame.default(formula = Y ~ Z + X, data = dt, drop.unused.levels = TRUE): 'data' must be a data.frame, environment, or list
+summary(mod2)
+#> Error in summary(mod2): object 'mod2' not found
+
+
+ All functions+ + |
+ |
---|---|
+ + | +anova.flipscores |
+
+ + | +compute_scores |
+
+ + | +Methods for flipscores objects |
+
+ + | +Robust Score Testing in GLMs, by Sign-Flip Contributions |
+
+ + | +Robust testing in GLMs, by sign-flipping score contributions |
+