Skip to content

Commit

Permalink
get_variance() and null_model() functions not working depending o…
Browse files Browse the repository at this point in the history
…n the way you call the objects of your model (#919)

* `get_variance()` and `null_model()`  functions not working depending on the way you call the objects of your model
Fixes #837

* lintr
  • Loading branch information
strengejacke authored Aug 28, 2024
1 parent a5f21e7 commit 7d68ac3
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 11 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: insight
Title: Easy Access to Model Information for Various Model Objects
Version: 0.20.3.2
Version: 0.20.3.3
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# insight 0.20.4

## Changes

* `null_model()` and `formula_ok()` now warn when indexed data frames, such as
`df[, 5]`, are used as response variable in the formula, as this can lead to
unexpected results.

## Bug fixes

* Fixed regression from latest fix related to `get_variance()` for *brms* models.
Expand Down
35 changes: 30 additions & 5 deletions R/find_formula.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ formula_ok <- function(x, verbose = TRUE, ...) {
# all.vars() returns "T" as variable, which is not intended
check_2 <- .check_formula_for_T(f, verbose = verbose)

all(check_1 && check_2)
# check if formula contains index data frames as response variable
# this may result in unexpected behaviour, and we should warn users
check_3 <- .check_formula_index_df(f, x, verbose = verbose)

all(check_1 && check_2 && check_3)
}


Expand Down Expand Up @@ -526,7 +530,7 @@ find_formula.afex_aov <- function(x, verbose = TRUE, ...) {
id <- attr(x, "id")

within_variables <- names(attr(x, "within"))
within_variables <- paste0(within_variables, collapse = "*")
within_variables <- paste(within_variables, collapse = "*")
within_variables <- paste0("(", within_variables, ")")
e <- paste0("Error(", id, "/", within_variables, ")")

Expand Down Expand Up @@ -1596,13 +1600,13 @@ find_formula.model_fit <- function(x, verbose = TRUE, ...) {
)

# check if any further pforms exist
if (!all(names(f$pforms) %in% auxiliary_names)) {
if (all(names(f$pforms) %in% auxiliary_names)) {
f_custom <- NULL
} else {
custom_names <- setdiff(names(f$pforms), auxiliary_names)
if (length(custom_names)) {
f_custom <- f$pforms[custom_names]
}
} else {
f_custom <- NULL
}

f_sigmarandom <- NULL
Expand Down Expand Up @@ -1870,6 +1874,27 @@ find_formula.model_fit <- function(x, verbose = TRUE, ...) {
}


# formulas with an index data frame, like "lm(mtcars[, "mpg"] ~ mtcars$hp), may
# cause problems in various functions throughout the easystats packages. We
# warn the user here...

.check_formula_index_df <- function(f, x, verbose = TRUE) {
if (is_empty_object(f)) {
return(TRUE)
}
resp <- .safe(safe_deparse(f$conditional[[2]]))
if (!is.null(resp) && any(grepl("\\b\\w+\\[.*?,.*?\\]", resp))) {
if (verbose) {
format_warning(
"Using indexed data frames, such as `df[, 5]`, as model response can produce unexpected results. Specify your model using the literal name of the response variable instead." # nolint
)
}
return(FALSE)
}
return(TRUE)
}


.formula_clean <- function(f) {
fc <- as.character(f)
LHS <- fc[2]
Expand Down
14 changes: 9 additions & 5 deletions R/null_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@
#'
#' @export
null_model <- function(model, verbose = TRUE, ...) {
# check if model formula is ok
formula_ok(model)

model_formula <- find_formula(model, verbose = FALSE)
offset_term <- tryCatch(
{
f <- safe_deparse(find_formula(model)$conditional)
f <- safe_deparse(model_formula$conditional)
if (grepl("offset(", f, fixed = TRUE)) {
out <- gsub("(.*)offset\\((.*)\\)(.*)", "\\2", f)
} else {
Expand All @@ -34,7 +38,7 @@ null_model <- function(model, verbose = TRUE, ...) {
)

if (is_mixed_model(model)) {
.null_model_mixed(model, offset_term, verbose)
.null_model_mixed(model, offset_term, model_formula, verbose)
} else if (inherits(model, "clm2")) {
stats::update(model, location = ~1, scale = ~1)
} else if (inherits(model, "multinom")) {
Expand All @@ -60,16 +64,16 @@ null_model <- function(model, verbose = TRUE, ...) {
}


.null_model_mixed <- function(model, offset_term = NULL, verbose = TRUE) {
.null_model_mixed <- function(model, offset_term = NULL, model_formula = NULL, verbose = TRUE) {
if (inherits(model, "MixMod")) {
nullform <- stats::as.formula(paste(find_response(model), "~ 1"))
null.model <- suppressWarnings(stats::update(model, fixed = nullform))
# fix fixed effects formula
null.model$call$fixed <- nullform
} else if (inherits(model, "cpglmm")) {
nullform <- find_formula(model, verbose = FALSE)[["random"]]
nullform <- model_formula[["random"]]
null.model <- suppressWarnings(stats::update(model, nullform))
} else if (inherits(model, "glmmTMB") && !is.null(find_formula(model)$zero_inflated)) {
} else if (inherits(model, "glmmTMB") && !is.null(model_formula$zero_inflated)) {
insight::check_if_installed("glmmTMB")
# for zero-inflated models, we need to create the NULL model for the
# zero-inflation part as well. Since "update()" won't work here, we need
Expand Down
7 changes: 7 additions & 0 deletions tests/testthat/test-find_formula.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,10 @@ test_that("`find_formula` works with `mgcv::gam()`", {
expect_identical(f$conditional, formula("y ~ s(x0) + s(x1) + s(x2)"))
expect_identical(f$scale, formula("~s(x3)"))
})


test_that("formula warns for badly formulated response", {
data(iris)
model <- lm(iris[, 2] ~ Species, data = iris)
expect_warning(formula_ok(model), regex = "Using indexed")
})
8 changes: 8 additions & 0 deletions tests/testthat/test-null_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ test_that("null_model zero-inflated", {
expect_equal(glmmTMB::fixef(out), glmmTMB::fixef(m0), tolerance = 1e-4)
})


test_that("null_model warns for badly formulated response", {
data(iris)
model <- lm(iris[, 2] ~ Species, data = iris)
expect_warning(null_model(model), regex = "Using indexed")
})


# set.seed(123)
# N <- 100 # Samples
# x <- runif(N, 0, 10) # Predictor
Expand Down

0 comments on commit 7d68ac3

Please sign in to comment.