diff --git a/DESCRIPTION b/DESCRIPTION index a7a01fceb..94c9630f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: insight Title: Easy Access to Model Information for Various Model Objects -Version: 0.19.7 +Version: 0.19.7.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 73f13e399..e89b11dc5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# insight 0.19.8 + +## Bug fixes + +* Fixed issue in `get_loglikelihood()` for glm-models with binary outcome, where + levels were defined in reversed order. + # insight 0.19.7 ## General diff --git a/R/get_data.R b/R/get_data.R index f4c0bd54b..f48c71bd2 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -67,7 +67,10 @@ get_data <- function(x, ...) { # handle arguments effects <- match.arg(effects, choices = c("all", "fixed", "random")) - component <- match.arg(component, choices = c("all", "conditional", "zero_inflated", "zi", "smooth_terms", "dispersion")) + component <- match.arg( + component, + choices = c("all", "conditional", "zero_inflated", "zi", "smooth_terms", "dispersion") + ) # we want to add the variable for subsettig, too model_call <- get_call(x) @@ -156,10 +159,10 @@ get_data <- function(x, ...) { # complete cases only, as in model frames, need to filter attributes # only use model variables in complete.cases() - if (!is.null(vars)) { - cc <- stats::complete.cases(dat[, intersect(vars, colnames(dat))]) - } else { + if (is.null(vars)) { cc <- stats::complete.cases(dat) + } else { + cc <- stats::complete.cases(dat[, intersect(vars, colnames(dat))]) } # only preserve random effects @@ -284,16 +287,12 @@ get_data.default <- function(x, source = "environment", verbose = TRUE, ...) { # fall back to extract data from model frame if (is.null(model_data)) { mf <- tryCatch( - { - if (inherits(x, "Zelig-relogit")) { - .get_zelig_relogit_frame(x) - } else { - stats::model.frame(x) - } + if (inherits(x, "Zelig-relogit")) { + .get_zelig_relogit_frame(x) + } else { + stats::model.frame(x) }, - error = function(x) { - NULL - } + error = function(x) NULL ) # process arguments, check whether data should be recovered from # environment or model frame @@ -725,7 +724,7 @@ get_data.merMod <- function(x, switch(effects, fixed = stats::model.frame(x, fixed.only = TRUE), all = stats::model.frame(x, fixed.only = FALSE), - random = stats::model.frame(x, fixed.only = FALSE)[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE] + random = stats::model.frame(x, fixed.only = FALSE)[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE] # nolint ) }) .prepare_get_data(x, mf, effects, verbose = verbose) @@ -745,16 +744,12 @@ get_data.mmrm <- function(x, # data from model frame mf <- .prepare_get_data(x, stats::model.frame(x, full = TRUE), effects, verbose = verbose) tryCatch( - { - switch(effects, - fixed = mf[intersect(colnames(mf), fixed_vars)], - all = mf[intersect(colnames(mf), unique(c(fixed_vars, random_vars)))], - random = mf[intersect(colnames(mf), random_vars)] - ) - }, - error = function(x) { - NULL - } + switch(effects, + fixed = mf[intersect(colnames(mf), fixed_vars)], + all = mf[intersect(colnames(mf), unique(c(fixed_vars, random_vars)))], + random = mf[intersect(colnames(mf), random_vars)] + ), + error = function(x) NULL ) } @@ -820,16 +815,12 @@ get_data.cpglmm <- function(x, dat <- stats::model.frame(x) mf <- tryCatch( - { - switch(effects, - fixed = dat[, find_predictors(x, effects = "fixed", flatten = TRUE, verbose = FALSE), drop = FALSE], - all = dat, - random = dat[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE] - ) - }, - error = function(x) { - NULL - } + switch(effects, + fixed = dat[, find_predictors(x, effects = "fixed", flatten = TRUE, verbose = FALSE), drop = FALSE], + all = dat, + random = dat[, find_random(x, split_nested = TRUE, flatten = TRUE), drop = FALSE] + ), + error = function(x) NULL ) .prepare_get_data(x, mf, effects, verbose = verbose) } @@ -899,16 +890,12 @@ get_data.mixor <- function(x, effects <- match.arg(effects, choices = c("all", "fixed", "random")) mf <- tryCatch( - { - switch(effects, - fixed = stats::model.frame(x), - all = cbind(stats::model.frame(x), x$id), - random = data.frame(x$id) - ) - }, - error = function(x) { - NULL - } + switch(effects, + fixed = stats::model.frame(x), + all = cbind(stats::model.frame(x), x$id), + random = data.frame(x$id) + ), + error = function(x) NULL ) fix_cn <- which(colnames(mf) %in% c("x.id", "x$id")) colnames(mf)[fix_cn] <- safe_deparse(x$call$id) @@ -999,10 +986,10 @@ get_data.mixed <- function(x, #' @param shape Return long or wide data? Only applicable in repeated measures #' designs. get_data.afex_aov <- function(x, shape = c("long", "wide"), ...) { - if (!length(attr(x, "within"))) { - shape <- "long" - } else { + if (length(attr(x, "within"))) { shape <- match.arg(shape) + } else { + shape <- "long" } x$data[[shape]] } @@ -1484,7 +1471,12 @@ get_data.ivreg <- function(x, source = "environment", verbose = TRUE, ...) { mf <- .safe(stats::model.frame(x)) ft <- find_variables(x, flatten = TRUE) - if (!insight::is_empty_object(mf)) { + if (is_empty_object(mf)) { + final_mf <- .safe({ + dat <- .recover_data_from_environment(x) + dat[, ft, drop = FALSE] + }) + } else { cn <- clean_names(colnames(mf)) remain <- setdiff(ft, cn) if (is_empty_object(remain)) { @@ -1495,11 +1487,6 @@ get_data.ivreg <- function(x, source = "environment", verbose = TRUE, ...) { cbind(mf, dat[, remain, drop = FALSE]) }) } - } else { - final_mf <- .safe({ - dat <- .recover_data_from_environment(x) - dat[, ft, drop = FALSE] - }) } .prepare_get_data(x, stats::na.omit(final_mf), verbose = verbose) @@ -1559,7 +1546,15 @@ get_data.brmsfit <- function(x, effects = "all", component = "all", source = "en # verbose is FALSE by default because `get_call()` often does not work on # `brmsfit` objects, so we typically default to the `data` held in the object. data_name <- attr(x$data, "data_name") - model_data <- .get_data_from_environment(x, effects = effects, component = component, source = source, verbose = verbose, data_name = data_name, ...) + model_data <- .get_data_from_environment( + x, + effects = effects, + component = component, + source = source, + verbose = verbose, + data_name = data_name, + ... + ) if (!is.null(model_data)) { return(model_data) @@ -1651,15 +1646,15 @@ get_data.MCMCglmm <- function(x, effects = "all", source = "environment", verbos all(pv %in% colnames(dat)) })) mf <- env_dataframes[matchframe][1] - if (!is.na(mf)) { + if (is.na(mf)) { + NULL + } else { dat <- get(mf) switch(effects, fixed = dat[, setdiff(colnames(dat), find_random(x, flatten = TRUE)), drop = FALSE], all = dat, random = dat[, find_random(x, flatten = TRUE), drop = FALSE] ) - } else { - NULL } }, error = function(x) { @@ -1889,21 +1884,17 @@ get_data.vglm <- function(x, source = "environment", verbose = TRUE, ...) { # fall back to extract data from model frame mf <- tryCatch( - { - if (!length(x@model)) { - env <- environment(x@terms$terms) - if (is.null(env)) env <- parent.frame() - fcall <- x@call - fcall$method <- "model.frame" - fcall$smart <- FALSE - eval(fcall, env, parent.frame()) - } else { - x@model - } + if (length(x@model)) { + x@model + } else { + env <- environment(x@terms$terms) + if (is.null(env)) env <- parent.frame() + fcall <- x@call + fcall$method <- "model.frame" + fcall$smart <- FALSE + eval(fcall, env, parent.frame()) }, - error = function(x) { - NULL - } + error = function(x) NULL ) .prepare_get_data(x, mf) @@ -2030,7 +2021,7 @@ get_data.clmm2 <- function(x, source = "environment", verbose = TRUE, ...) { } data_complete <- cbind(data_complete, x$grFac) - colnames(data_complete)[ncol(data_complete)] <- unlist(.find_random_effects(x, f = find_formula(x, verbose = FALSE), split_nested = TRUE)) + colnames(data_complete)[ncol(data_complete)] <- unlist(.find_random_effects(x, f = find_formula(x, verbose = FALSE), split_nested = TRUE)) # nolint data_complete }, @@ -2183,14 +2174,14 @@ get_data.rma <- function(x, if (!is.function(transf)) { format_error("`transf` must be a function.") } - if (!is.null(transf_args)) { - mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf, transf_args) - mf$CI_low <- sapply(mf$CI_low, transf, transf_args) - mf$CI_high <- sapply(mf$CI_high, transf, transf_args) - } else { + if (is.null(transf_args)) { mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf) mf$CI_low <- sapply(mf$CI_low, transf) mf$CI_high <- sapply(mf$CI_high, transf) + } else { + mf[[find_response(x)]] <- sapply(mf[[find_response(x)]], transf, transf_args) + mf$CI_low <- sapply(mf$CI_low, transf, transf_args) + mf$CI_high <- sapply(mf$CI_high, transf, transf_args) } } } @@ -2283,7 +2274,7 @@ get_data.htest <- function(x, ...) { .check_data_source_arg <- function(source) { source <- match.arg(source, choices = c("environment", "mf", "modelframe", "frame")) switch(source, - "environment" = "environment", + environment = "environment", "frame" ) } diff --git a/R/helper_functions.R b/R/helper_functions.R index 9616fa9e5..bffff6c3e 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -6,9 +6,9 @@ # remove values from vector .remove_values <- function(x, values) { - remove <- x %in% values - if (any(remove)) { - x <- x[!remove] + to_remove <- x %in% values + if (any(to_remove)) { + x <- x[!to_remove] } x } @@ -24,7 +24,7 @@ # is string empty? .is_empty_string <- function(x) { x <- x[!is.na(x)] - length(x) == 0 || all(nchar(x) == 0) + length(x) == 0 || all(nchar(x) == 0) # nolint } @@ -53,12 +53,11 @@ # checks if a brms-models is a multi-membership-model .is_multi_membership <- function(x) { - if (inherits(x, "brmsfit")) { - re <- find_random(x, split_nested = TRUE, flatten = TRUE) - any(grepl("^(mmc|mm)\\(", re)) - } else { + if (!inherits(x, "brmsfit")) { return(FALSE) } + re <- find_random(x, split_nested = TRUE, flatten = TRUE) + any(grepl("^(mmc|mm)\\(", re)) } @@ -115,7 +114,7 @@ # if there are any chars left, these come from further terms that come after # random effects... .formula_empty_after_random_effect <- function(f) { - nchar(gsub("(~|\\+|\\*|-|/|:)", "", gsub(" ", "", gsub("\\((.*)\\)", "", f), fixed = TRUE))) == 0 + nchar(gsub("(~|\\+|\\*|-|/|:)", "", gsub(" ", "", gsub("\\((.*)\\)", "", f), fixed = TRUE))) == 0 # nolint } @@ -558,6 +557,13 @@ } x <- droplevels(x) levels(x) <- 1:nlevels(x) + } else if (is.unsorted(levels(x))) { + # for numeric factors, we need to check the order of levels + x_inverse <- rep(NA_real_, length(x)) + for (i in 1:nlevels(x)) { + x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[nlevels(x) - i + 1]) + } + x <- factor(x_inverse) } out <- as.numeric(as.character(x)) diff --git a/inst/WORDLIST b/inst/WORDLIST index 1c38f711a..e9ca0ae20 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -93,6 +93,7 @@ funcionts gam ggeffects github +glm glmm glmmTMB hglm diff --git a/tests/testthat/test-factor_to_numeric.R b/tests/testthat/test-factor_to_numeric.R new file mode 100644 index 000000000..56cf0e173 --- /dev/null +++ b/tests/testthat/test-factor_to_numeric.R @@ -0,0 +1,34 @@ +test_that(".factor_to_numeric", { + f <- c(0, 0, 1, 1, 1, 0) + x1 <- factor(f, levels = c(0, 1), labels = c("a", "b")) + x2 <- factor(f, levels = c(1, 0), labels = c("b", "a")) + x3 <- factor(f, levels = c(1, 0), labels = c("a", "b")) + x4 <- factor(f, levels = c(0, 1), labels = c("b", "a")) + out1 <- insight:::.factor_to_numeric(x1, lowest = 0) + out2 <- insight:::.factor_to_numeric(x2, lowest = 0) + out3 <- insight:::.factor_to_numeric(x3, lowest = 0) + out4 <- insight:::.factor_to_numeric(x4, lowest = 0) + expect_identical(out1, c(0, 0, 1, 1, 1, 0)) + expect_identical(out2, c(1, 1, 0, 0, 0, 1)) + expect_identical(out1, out4) + expect_identical(out2, out3) + + x1 <- factor(f, levels = c(0, 1)) + x2 <- factor(f, levels = c(1, 0)) + out1 <- insight:::.factor_to_numeric(x1, lowest = 0) + out2 <- insight:::.factor_to_numeric(x2, lowest = 0) + expect_identical(out1, c(0, 0, 1, 1, 1, 0)) + expect_identical(out2, c(1, 1, 0, 0, 0, 1)) + + f <- c(1, 1, 2, 2, 2, 1) + x1 <- factor(f, levels = c(1, 2)) + x2 <- factor(f, levels = c(2, 1)) + out1 <- insight:::.factor_to_numeric(x1, lowest = 0) + out2 <- insight:::.factor_to_numeric(x2, lowest = 0) + expect_identical(out1, c(0, 0, 1, 1, 1, 0)) + expect_identical(out2, c(1, 1, 0, 0, 0, 1)) + out1 <- insight:::.factor_to_numeric(x1) + out2 <- insight:::.factor_to_numeric(x2) + expect_identical(out1, c(1, 1, 2, 2, 2, 1)) + expect_identical(out2, c(2, 2, 1, 1, 1, 2)) +}) diff --git a/tests/testthat/test-get_loglikelihood.R b/tests/testthat/test-get_loglikelihood.R index f0715b3df..95e9ef9d0 100644 --- a/tests/testthat/test-get_loglikelihood.R +++ b/tests/testthat/test-get_loglikelihood.R @@ -4,20 +4,20 @@ test_that("get_loglikelihood - lm", { x <- lm(Sepal.Length ~ Petal.Width + Species, data = iris) ll <- loglikelihood(x, estimator = "ML") ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) - expect_equal(attributes(ll)$df, attributes(ll2)$df) - expect_equal(sum(attributes(ll)$per_obs - nonnest2::llcont(x)), 0) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(attributes(ll)$df, attributes(ll2)$df, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(sum(attributes(ll)$per_obs - nonnest2::llcont(x)), 0, tolerance = 1e-4, ignore_attr = TRUE) # REML ll <- loglikelihood(x, estimator = "REML") ll2 <- stats::logLik(x, REML = TRUE) - expect_equal(as.numeric(ll), as.numeric(ll2)) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) # With weights x <- lm(Sepal.Length ~ Petal.Width + Species, data = iris, weights = Petal.Length) ll <- loglikelihood(x, estimator = "ML") ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) # log-response x <- lm(mpg ~ wt, data = mtcars) @@ -47,15 +47,15 @@ test_that("get_loglikelihood - glm", { x <- glm(vs ~ mpg * disp, data = mtcars, family = "binomial") ll <- loglikelihood(x) ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) - expect_equal(attributes(ll)$df, attributes(ll2)$df) - expect_equal(sum(attributes(ll)$per_obs - nonnest2::llcont(x)), 0) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(attributes(ll)$df, attributes(ll2)$df, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(sum(attributes(ll)$per_obs - nonnest2::llcont(x)), 0, tolerance = 1e-4, ignore_attr = TRUE) x <- glm(cbind(cyl, gear) ~ mpg, data = mtcars, weights = disp, family = binomial) ll <- loglikelihood(x) ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) - expect_equal(attributes(ll)$df, attributes(ll2)$df) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(attributes(ll)$df, attributes(ll2)$df, tolerance = 1e-4, ignore_attr = TRUE) # Nonnest2 seems to be giving diffenrent results, # which sums doesn't add up to base R's result... so commenting off # expect_equal(sum(attributes(ll)$per_obs - nonnest2::llcont(x)), 0) @@ -69,39 +69,59 @@ test_that("get_loglikelihood - (g)lmer", { # REML ll <- loglikelihood(x, estimator = "REML") ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) - expect_equal(attributes(ll)$df, attributes(ll2)$df) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(attributes(ll)$df, attributes(ll2)$df, tolerance = 1e-4, ignore_attr = TRUE) # ML ll <- loglikelihood(x, estimator = "ML") ll2 <- stats::logLik(x, REML = FALSE) - expect_equal(as.numeric(ll), as.numeric(ll2)) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) # default ll <- loglikelihood(x) ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) - expect_equal(attributes(ll)$df, attributes(ll2)$df) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(attributes(ll)$df, attributes(ll2)$df, tolerance = 1e-4, ignore_attr = TRUE) x <- lme4::glmer(vs ~ mpg + (1 | cyl), data = mtcars, family = "binomial") ll <- loglikelihood(x, estimator = "REML") # no REML for glmer ll2 <- stats::logLik(x) - expect_equal(as.numeric(ll), as.numeric(ll2)) - expect_equal(attributes(ll)$df, attributes(ll2)$df) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(attributes(ll)$df, attributes(ll2)$df, tolerance = 1e-4, ignore_attr = TRUE) ll <- loglikelihood(x, estimator = "ML") ll2 <- stats::logLik(x, REML = FALSE) - expect_equal(as.numeric(ll), as.numeric(ll2)) + expect_equal(as.numeric(ll), as.numeric(ll2), tolerance = 1e-4, ignore_attr = TRUE) model <- download_model("lmerMod_1") skip_if(is.null(model)) - expect_equal(get_loglikelihood(model, estimator = "REML"), logLik(model, REML = TRUE), tolerance = 0.01, ignore_attr = TRUE) - expect_equal(get_loglikelihood(model, estimator = "ML"), logLik(model, REML = FALSE), tolerance = 0.01, ignore_attr = TRUE) + expect_equal( + get_loglikelihood(model, estimator = "REML"), + logLik(model, REML = TRUE), + tolerance = 0.01, + ignore_attr = TRUE + ) + expect_equal( + get_loglikelihood(model, estimator = "ML"), + logLik(model, REML = FALSE), + tolerance = 0.01, + ignore_attr = TRUE + ) model <- download_model("merMod_1") skip_if(is.null(model)) - expect_equal(get_loglikelihood(model, estimator = "REML"), logLik(model, REML = FALSE), tolerance = 0.01, ignore_attr = TRUE) - expect_equal(get_loglikelihood(model, estimator = "ML"), logLik(model, REML = FALSE), tolerance = 0.01, ignore_attr = TRUE) + expect_equal( + get_loglikelihood(model, estimator = "REML"), + logLik(model, REML = FALSE), + tolerance = 0.01, + ignore_attr = TRUE + ) + expect_equal( + get_loglikelihood(model, estimator = "ML"), + logLik(model, REML = FALSE), + tolerance = 0.01, + ignore_attr = TRUE + ) }) test_that("get_loglikelihood - stanreg ", { @@ -165,3 +185,16 @@ test_that("get_loglikelihood - gamm4", { # It works, but it's quite diferent from the mgcv result expect_equal(as.numeric(ll), -101.1107, tolerance = 1e-3) }) + +test_that("get_loglikelihood - Bernoulli with inversed levels", { + d <- mtcars + d$zero <- factor(d$vs, levels = c(0, 1)) + d$ones <- factor(d$vs, levels = c(1, 0)) + + ml_zero <- glm(zero ~ mpg, family = binomial, data = d) + ml_ones <- glm(ones ~ mpg, family = binomial, data = d) + + expect_equal(logLik(ml_zero), get_loglikelihood(ml_zero), ignore_attr = TRUE) + expect_equal(logLik(ml_ones), get_loglikelihood(ml_ones), ignore_attr = TRUE) + expect_equal(get_loglikelihood(ml_zero), get_loglikelihood(ml_ones), ignore_attr = TRUE) +})