Skip to content

Commit

Permalink
Prepare get_df() to replace parameters::degrees_of_freedom() (#907)
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke authored Jul 23, 2024
1 parent 144cdc1 commit 1824bb8
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 55 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: insight
Title: Easy Access to Model Information for Various Model Objects
Version: 0.20.2.3
Version: 0.20.2.4
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Generated by roxygen2: do not edit by hand

S3method(.degrees_of_freedom_residual,BBmm)
S3method(.degrees_of_freedom_residual,BBreg)
S3method(as.data.frame,get_predicted)
S3method(as.data.frame,insight_residuals)
S3method(as.matrix,get_predicted)
Expand Down Expand Up @@ -511,6 +509,7 @@ S3method(get_df,default)
S3method(get_df,emmGrid)
S3method(get_df,emm_list)
S3method(get_df,fixest)
S3method(get_df,fixest_multi)
S3method(get_df,lme)
S3method(get_df,lmerMod)
S3method(get_df,lmerModTest)
Expand All @@ -525,6 +524,7 @@ S3method(get_df,mmrm_tmb)
S3method(get_df,model_fit)
S3method(get_df,negbinirr)
S3method(get_df,negbinmfx)
S3method(get_df,nestedLogit)
S3method(get_df,phyloglm)
S3method(get_df,phylolm)
S3method(get_df,poissonirr)
Expand Down
8 changes: 6 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# insight 0.20.3

## Changes

* `get_df()` now supports more model classes.

## Bug fixes

* Fixed issues in `find_response()` for *brms* models with `mi()` function in
Expand All @@ -8,8 +12,8 @@
* Fixed issue in `get_variance()` that could lead to recursive calls for
*brms* models, resulting in "infinite" resampling of the model.

* Fixed issue in `check_if_installed()` that errornously tried to guess the
minimum reqired package version based on the SUGGEST field of the _insight_
* Fixed issue in `check_if_installed()` that errorneously tried to guess the
minimum required package version based on the SUGGEST field of the _insight_
package, instead of the package that was calling the function.

# insight 0.20.2
Expand Down
2 changes: 1 addition & 1 deletion R/check_if_installed.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ print.check_if_installed <- function(x, ...) {
minframe <- 1L
n <- sys.nframe()
pkg <- NULL
while(n > minframe && is.null(pkg)) {
while (n > minframe && is.null(pkg)) {
pkg <- utils::packageName(env = sys.frame(n))
}
# if still NULL, or if "insight", return
Expand Down
125 changes: 108 additions & 17 deletions R/get_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@
#' + `"normal"` always returns `Inf`.
#' + `"model"` returns model-based degrees of freedom, i.e. the number of
#' (estimated) parameters.
#' + For mixed models, can also be `"ml1"` (approximation of degrees of freedom
#' based on a "m-l-1" heuristic as suggested by _Elff et al. 2019_) or
#' `"betwithin"`, and for models of class `merMod`, `type` can also be
#' `"satterthwaite"` or `"kenward-roger"`. See 'Details'.
#' + For mixed models, can also be `"ml1"` (or `"m-l-1"`, approximation of
#' degrees of freedom based on a "m-l-1" heuristic as suggested by _Elff et
#' al. 2019_) or `"between-within"` (or `"betwithin"`).
#' + For mixed models of class `merMod`, `type` can also be `"satterthwaite"`
#' or `"kenward-roger"` (or `"kenward"`). See 'Details'.
#'
#' Usually, when degrees of freedom are required to calculate p-values or
#' confidence intervals, `type = "wald"` is likely to be the best choice in
Expand Down Expand Up @@ -100,10 +101,20 @@ get_df <- function(x, ...) {
#' @rdname get_df
#' @export
get_df.default <- function(x, type = "residual", verbose = TRUE, ...) {
# check for valid model-object
if (missing(x) || is.null(x)) {
format_error(
"You must provide a model-object. Argument cannot be missing or `NULL`."
)
}

# check valid options
type <- match.arg(
tolower(type),
choices = c("residual", "model", "analytical", "wald", "normal", "ml1", "betwithin")
choices = c(
"residual", "model", "analytical", "wald", "normal", "ml1", "betwithin",
"between-within", "profile", "boot", "uniroot", "likelihood", "m-l-1"
)
)

# check if user already passed "statistic" argument, to
Expand All @@ -114,11 +125,8 @@ get_df.default <- function(x, type = "residual", verbose = TRUE, ...) {
statistic <- find_statistic(x)
}

# handle aliases
if (type == "analytical") {
type <- "residual"
}

# handle aliases, resp. mixing type and ci_method
type <- .check_df_type(type)

if (type == "normal") { # nolint
# Wald normal approximation - always Inf -----
Expand All @@ -141,11 +149,11 @@ get_df.default <- function(x, type = "residual", verbose = TRUE, ...) {
dof <- .get_residual_df(x, verbose)

# ml1 - only for certain mixed models -----
} else if (type == "ml1") {
} else if (type %in% c("ml1", "m-l-1")) {
dof <- .degrees_of_freedom_ml1(x)

# between-within - only for certain mixed models -----
} else if (type == "betwithin") {
} else if (type %in% c("betwithin", "between-within")) {
dof <- .degrees_of_freedom_betwithin(x)

# remaining option is model-based df, i.e. number of estimated parameters
Expand Down Expand Up @@ -271,6 +279,22 @@ get_df.fixest <- function(x, type = "residual", ...) {
}


#' @export
get_df.fixest_multi <- function(x, ...) {
out <- do.call(rbind, lapply(x, get_df, ...))

# add response and group columns
id_columns <- .get_fixest_multi_columns(x)

# add response column
out$Response <- id_columns$Response
out$Group <- id_columns$Group

row.names(out) <- NULL
out
}



# Mixed models - special treatment --------------

Expand All @@ -280,10 +304,12 @@ get_df.lmerMod <- function(x, type = "residual", ...) {
tolower(type),
choices = c(
"residual", "model", "analytical", "satterthwaite", "kenward",
"kenward-roger", "kr", "normal", "wald", "ml1", "betwithin"
"kenward-roger", "kr", "normal", "wald", "ml1", "m-l-1", "betwithin",
"between-within"
)
)

# hidden gem - required for get_predicted_ci(), where we have per-observation DF
dots <- list(...)

if (type %in% c("satterthwaite", "kr", "kenward", "kenward-roger") && isTRUE(dots$df_per_obs)) {
Expand Down Expand Up @@ -338,6 +364,36 @@ get_df.betaor <- get_df.logitor
get_df.betamfx <- get_df.logitor


#' @export
get_df.nestedLogit <- function(x, type = NULL, component = "all", verbose = TRUE, ...) {
if (is.null(type)) {
type <- "wald"
}
if (tolower(type) == "residual") {
cf <- as.data.frame(stats::coef(x))
dof <- rep(vapply(x$models, stats::df.residual, numeric(1)), each = nrow(cf))
if (!is.null(component) && !identical(component, "all")) {
comp <- intersect(names(dof), component)
if (length(comp)) {
dof <- dof[comp]
} else {
if (verbose) {
format_alert(paste0(
"No matching model found. Possible values for `component` are ",
toString(paste0("'", names(x$models), "'")),
"."
))
}
dof <- Inf
}
}
} else {
dof <- Inf
}
dof
}


#' @export
get_df.mira <- function(x, type = "residual", verbose = TRUE, ...) {
# installed?
Expand Down Expand Up @@ -382,17 +438,22 @@ get_df.mediate <- function(x, ...) {


#' @keywords internal
.degrees_of_freedom_analytical <- function(model) {
.degrees_of_freedom_analytical <- function(model, kenward = TRUE) {
nparam <- .model_df(model)
n <- n_obs(model)

if (is.null(n)) {
return(Inf)
}

n - nparam
}
if (isTRUE(kenward) && inherits(model, "lmerMod")) {
dof <- as.numeric(.degrees_of_freedom_kr(model))
} else {
dof <- n - nparam
}

dof
}


# Model approach (model-based / logLik df) ------------------------------
Expand Down Expand Up @@ -438,8 +499,38 @@ get_df.mediate <- function(x, ...) {

.check_df_type <- function(type) {
# handle mixing of ci_method and type arguments
if (tolower(type) %in% c("profile", "uniroot", "quantile", "eti", "hdi", "bci", "boot", "spi")) {
if (tolower(type) %in% c("profile", "uniroot", "quantile", "likelihood", "eti", "hdi", "bci", "boot", "spi", "analytical", "nokr")) {
type <- "residual"
}
type
}


.get_fixest_multi_columns <- function(model) {
# add response and group columns
s <- summary(model)
l <- lengths(lapply(s, stats::coef))
parts <- strsplit(names(l), ";", fixed = TRUE)

id_columns <- Map(function(i, j) {
if (length(j) == 1 && startsWith(j, "rhs")) {
data.frame(
Group = rep(trim_ws(sub("rhs:", "", j, fixed = TRUE)), i),
stringsAsFactors = FALSE
)
} else if (length(j) == 1 && startsWith(j, "lhs")) {
data.frame(
Response = rep(trim_ws(sub("lhs:", "", j, fixed = TRUE)), i),
stringsAsFactors = FALSE
)
} else {
data.frame(
Response = rep(trim_ws(sub("lhs:", "", j[1], fixed = TRUE)), i),
Group = rep(trim_ws(sub("rhs:", "", j[2], fixed = TRUE)), i),
stringsAsFactors = FALSE
)
}
}, unname(l), parts)

do.call(rbind, id_columns)
}
4 changes: 2 additions & 2 deletions R/get_df_ml1.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@
var.within <- stats::var(x - x.bar)
var.between <- stats::var(x.bar)
if (var.within >= var.between) {
return(n)
n
} else {
return(m)
m
}
}

Expand Down
41 changes: 23 additions & 18 deletions R/get_df_residual.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#' @keywords internal
.degrees_of_freedom_residual.default <- function(x, verbose = TRUE, ...) {
if (.is_bayesian_model(x) && !inherits(x, c("bayesx", "blmerMod", "bglmerMod"))) {
if (.is_bayesian_model(x, exclude = c("bmerMod", "bayesx", "blmerMod", "bglmerMod"))) {
if (check_if_installed("bayestestR", quietly = TRUE)) {
x <- .safe(bayestestR::bayesian_as_frequentist(x))
if (is.null(x)) {
Expand Down Expand Up @@ -53,14 +53,15 @@

#' @keywords internal
.degrees_of_freedom_residual.gls <- function(x, verbose = TRUE, ...) {
# we don't call ".degrees_of_freedom_analytical()" here, because that
# function relies on `.model_df()` to estimate the number of parameters,
# which returns results that are not in line with the "summary()" for gls
nparam <- n_parameters(x)
n <- n_obs(x)

if (is.null(n) || is.null(nparam)) {
return(Inf)
}

return(n - nparam)
n - nparam
}

#' @keywords internal
Expand Down Expand Up @@ -141,10 +142,10 @@
x$rdf
}

#' @export
#' @keywords internal
.degrees_of_freedom_residual.BBmm <- .degrees_of_freedom_residual.glht

#' @export
#' @keywords internal
.degrees_of_freedom_residual.BBreg <- .degrees_of_freedom_residual.glht

#' @keywords internal
Expand All @@ -162,18 +163,7 @@
}

#' @keywords internal
.degrees_of_freedom_residual.rqs <- function(x, verbose = TRUE, ...) {
tryCatch(
{
s <- suppressWarnings(summary(x, covariance = TRUE))
cs <- lapply(s, function(i) i$rdf)
unique(unlist(cs, use.names = FALSE))
},
error = function(e) {
NULL
}
)
}
.degrees_of_freedom_residual.rqs <- .degrees_of_freedom_residual.rq

#' @keywords internal
.degrees_of_freedom_residual.rqss <- function(x, verbose = TRUE, ...) {
Expand Down Expand Up @@ -226,3 +216,18 @@

dof
}


# helper ----------------------

.dof_fit_gam <- function(model, dof) {
params <- find_parameters(model)
if (!is.null(params$conditional)) {
dof <- rep(dof, length(params$conditional))
}
if (!is.null(params$smooth_terms)) {
s <- summary(model)
dof <- c(dof, s$s.table[, "Ref.df"])
}
dof
}
20 changes: 12 additions & 8 deletions R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -473,14 +473,18 @@
}


.is_bayesian_model <- function(x) {
inherits(x, c(
"brmsfit", "stanfit", "MCMCglmm", "stanreg",
"stanmvreg", "bmerMod", "BFBayesFactor", "bamlss",
"bayesx", "mcmc", "bcplm", "bayesQR", "BGGM",
"meta_random", "meta_fixed", "meta_bma", "blavaan",
"blrm"
))
.is_bayesian_model <- function(x, exclude = NULL) {
bayes_classes <- c(
"brmsfit", "stanfit", "MCMCglmm", "stanreg", "stanmvreg", "bmerMod",
"BFBayesFactor", "bamlss", "bayesx", "mcmc", "bcplm", "bayesQR", "BGGM",
"meta_random", "meta_fixed", "meta_bma", "blavaan", "blrm", "blmerMod",
"bglmerMod"
)
# if exclude is not NULL, remove elements in exclude from bayes_class
if (!is.null(exclude)) {
bayes_classes <- bayes_classes[!bayes_classes %in% exclude]
}
inherits(x, bayes_classes)
}


Expand Down
Loading

0 comments on commit 1824bb8

Please sign in to comment.