From e30984534296665118b53b088aa29b7996977099 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 27 Oct 2024 11:06:02 +0100 Subject: [PATCH] Avoid "number of items to replace is not a multiple" (#950) * Avoid "number of items to replace is not a multiple" * add test * desc * cleanup --- DESCRIPTION | 2 +- R/clean_names.R | 91 ++++++++++++------------------- tests/testthat/test-clean_names.R | 60 ++++++++++++-------- 3 files changed, 72 insertions(+), 81 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2a838c231..57009cabc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: insight Title: Easy Access to Model Information for Various Model Objects -Version: 0.99.0 +Version: 0.99.0.1 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/R/clean_names.R b/R/clean_names.R index 323ca5b8c..1a8da8563 100644 --- a/R/clean_names.R +++ b/R/clean_names.R @@ -132,98 +132,75 @@ clean_names.character <- function(x, include_names = FALSE, ...) { # do we have a "log()" pattern here? if yes, get capture region # which matches the "cleaned" variable name - cleaned <- unlist(lapply(seq_along(x), function(i) { + cleaned <- unlist(lapply(x, function(i) { # check if we have special patterns like 100 * log(xy), and remove it - if (isFALSE(is_emmeans) && grepl("^([0-9]+)", x[i])) { - x[i] <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", x[i]) + if (isFALSE(is_emmeans) && grepl("^([0-9]+)", i)) { + i <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", i) } # for brms multimembership, multiple elements might be returned # need extra handling multimembership <- NULL for (j in seq_along(pattern)) { # check if we find pattern at all - if (grepl(pattern[j], x[i], fixed = TRUE)) { + if (grepl(pattern[j], i, fixed = TRUE)) { # remove possible namespace - if (grepl("::", x[i], fixed = TRUE)) { - x[i] <- sub("(.*)::(.*)", "\\2", x[i]) + if (grepl("::", i, fixed = TRUE)) { + i <- sub("(.*)::(.*)", "\\2", i) } if (pattern[j] == "offset") { # nolint - x[i] <- trim_ws(unique(sub("^offset\\(([^-+ )]*).*", "\\1", x[i]))) + i <- trim_ws(unique(sub("^offset\\(([^-+ )]*).*", "\\1", i))) } else if (pattern[j] == "I") { - if (!ignore_asis && grepl("I\\((.*)\\)", x[i])) { - # x[i] <- trim_ws(unique(sub("I\\(((\\w|\\.)*).*", "\\1", x[i]))) - x[i] <- all.vars(stats::as.formula(paste("~", x[i]))) + if (!ignore_asis && grepl("I\\((.*)\\)", i)) { + # i <- trim_ws(unique(sub("I\\(((\\w|\\.)*).*", "\\1", i))) + i <- all.vars(stats::as.formula(paste("~", i))) } } else if (pattern[j] == "asis") { - if (!ignore_asis && grepl("asis\\((.*)\\)", x[i])) { - # x[i] <- trim_ws(unique(sub("asis\\(((\\w|\\.)*).*", "\\1", x[i]))) - x[i] <- all.vars(stats::as.formula(paste("~", x[i]))) + if (!ignore_asis && grepl("asis\\((.*)\\)", i)) { + # i <- trim_ws(unique(sub("asis\\(((\\w|\\.)*).*", "\\1", i))) + i <- all.vars(stats::as.formula(paste("~", i))) } } else if (pattern[j] == "log(log") { - x[i] <- trim_ws(unique(sub("^log\\(log\\(((\\w|\\.)*).*", "\\1", x[i]))) + i <- trim_ws(unique(sub("^log\\(log\\(((\\w|\\.)*).*", "\\1", i))) } else if (pattern[j] == "relevel(as.factor") { - x[i] <- trim_ws(unique(sub("^relevel\\(as.factor\\(((\\w|\\.)*).*", "\\1", x[i]))) + i <- trim_ws(unique(sub("^relevel\\(as.factor\\(((\\w|\\.)*).*", "\\1", i))) } else if (pattern[j] == "scale(log") { - x[i] <- trim_ws(unique(sub("^scale\\(log\\(((\\w|\\.)*).*", "\\1", x[i]))) - x[i] <- trim_ws(unique(sub("^scale\\(log1p\\(((\\w|\\.)*).*", "\\1", x[i]))) - x[i] <- trim_ws(unique(sub("^scale\\(log2\\(((\\w|\\.)*).*", "\\1", x[i]))) - x[i] <- trim_ws(unique(sub("^scale\\(log10\\(((\\w|\\.)*).*", "\\1", x[i]))) + i <- trim_ws(unique(sub("^scale\\(log\\(((\\w|\\.)*).*", "\\1", i))) + i <- trim_ws(unique(sub("^scale\\(log1p\\(((\\w|\\.)*).*", "\\1", i))) + i <- trim_ws(unique(sub("^scale\\(log2\\(((\\w|\\.)*).*", "\\1", i))) + i <- trim_ws(unique(sub("^scale\\(log10\\(((\\w|\\.)*).*", "\\1", i))) } else if (pattern[j] == "scale(poly") { - x[i] <- trim_ws(unique(sub("^scale\\(poly\\(((\\w|\\.)*).*", "\\1", x[i]))) + i <- trim_ws(unique(sub("^scale\\(poly\\(((\\w|\\.)*).*", "\\1", i))) } else if (pattern[j] %in% c("mmc", "mm")) { - # # detect mm-pattern - # p <- paste0("^", pattern[j], "\\((.*)\\).*") - # # extract terms from mm() / mmc() functions - # g <- trim_ws(sub(p, "\\1", x[i])) - # # split terms, but not if comma inside parentheses - # g <- trim_ws(unlist(strsplit(g, ",(?![^()]*\\))", perl = TRUE), use.names = FALSE)) - # # we might have additional arguments, like scale or weights. handle these here - # g <- g[!startsWith(g, "scale")] - # # clean weights - # gweights <- g[startsWith(g, "weights")] - # if (length(gweights)) { - # g <- g[!startsWith(g, "weights")] - # # this regular pattern finds "weights=" or "weights =", possibly followed - # # by "cbind()", e.g. "weights = cbind(w, w)". We extract the variable names, - # # create a formula, so "all.vars()" will only extract variable names if - # # we really have "cbind()" in the weights argument - # g <- c(g, .safe(all.vars(as.formula(paste0("~", trim_ws(gsub("weights\\s?=(.*)", "\\1", "weights = cbind(w, w)"))))))) # nolint - # } - # multimembership <- as.vector(trim_ws(g)) - if (grepl(paste0("^", pattern[j], "\\((.*)\\).*"), x[i])) { - multimembership <- all.vars(stats::as.formula(paste("~", x[i]))) + if (grepl(paste0("^", pattern[j], "\\((.*)\\).*"), i)) { + i <- all.vars(stats::as.formula(paste("~", i))) } - } else if (pattern[j] == "s" && startsWith(x[i], "s(")) { - x[i] <- gsub("^s\\(", "", x[i]) - x[i] <- gsub("\\)$", "", x[i]) - if (grepl("=|[[:digit:]]", x[i])) { - new_x <- trim_ws(unlist(strsplit(x[i], ",", fixed = TRUE), use.names = FALSE)) + } else if (pattern[j] == "s" && startsWith(i, "s(")) { + i <- gsub("^s\\(", "", i) + i <- gsub("\\)$", "", i) + if (grepl("=|[[:digit:]]", i)) { + new_x <- trim_ws(unlist(strsplit(i, ",", fixed = TRUE), use.names = FALSE)) to_remove <- which(!grepl("\\D", new_x)) to_remove <- c(to_remove, grep("=", new_x, fixed = TRUE)) if (length(to_remove) == 0) { - x[i] <- toString(new_x) + i <- toString(new_x) } else { - x[i] <- toString(new_x[-to_remove]) + i <- toString(new_x[-to_remove]) } } } else { # p <- paste0("^", pattern[j], "\\(([^,/)]*).*") # this one should be more generic... p <- paste0("^", pattern[j], "\\(((\\w|\\.)*).*") - x[i] <- unique(sub(p, "\\1", x[i])) + i <- unique(sub(p, "\\1", i)) } } } # for coxme-models, remove random-effect things... - if (grepl("|", x[i], fixed = TRUE)) { - x[i] <- sub("^(.*)\\|(.*)", "\\2", x[i]) - } - # either return regular term, or mm term for brms - if (is.null(multimembership)) { - trim_ws(x[i]) - } else { - multimembership + if (grepl("|", i, fixed = TRUE)) { + i <- sub("^(.*)\\|(.*)", "\\2", i) } + + trim_ws(i) }), use.names = FALSE) # remove for random intercept only models diff --git a/tests/testthat/test-clean_names.R b/tests/testthat/test-clean_names.R index 027f77f7a..f963a6177 100644 --- a/tests/testthat/test-clean_names.R +++ b/tests/testthat/test-clean_names.R @@ -65,6 +65,7 @@ test_that("clean_names", { expect_identical(clean_names("s(x1, x2, x3)"), "x1, x2, x3") }) + test_that("clean_names, model", { m_rel1 <- lm(mpg ~ relevel(as.factor(cyl), "8") + gear, data = mtcars) expect_identical(insight::clean_names(m_rel1), c("mpg", "cyl", "gear")) @@ -74,11 +75,35 @@ test_that("clean_names, model", { expect_identical(insight::clean_names(m_rel2), c("mpg", "cyl", "gear")) }) -skip_on_cran() -skip_if_offline() -skip_if_not_installed("httr2") test_that("clean_names, multimembership", { + skip_if_not_installed("gamlss") + set.seed(123) + dat <- data.frame( + Y = sample(20:50, 100, replace = TRUE), + date = sample(seq(as.Date("1999/01/01"), as.Date("2000/01/01"), by = "day"), 10), + cont1 = rchisq(100, df = 2), + cont2 = runif(100), + cat1 = sample(LETTERS[1:3], 100, replace = TRUE) + ) + junk <- capture.output({ + mod1 <- suppressWarnings(gamlss::gamlss( + Y ~ date + scale(cont1) + scale(cont2) + I(scale(cont2)^2) * cat1, + data = dat + )) + }) + expect_identical( + clean_names(find_terms(mod1)$conditional), + c("date", "cont1", "cont2", "cont2", "cat1") + ) +}) + + +test_that("clean_names, multimembership", { + skip_on_cran() + skip_if_offline() + skip_if_not_installed("httr2") + m1 <- suppressWarnings(insight::download_model("brms_mm_1")) skip_if(is.null(m1)) out <- clean_names(m1) @@ -106,25 +131,14 @@ test_that("clean_names, multimembership", { ) }) -skip_if_not_installed("gamlss") -test_that("clean_names, multimembership", { - set.seed(123) - dat <- data.frame( - Y = sample(20:50, 100, replace = TRUE), - date = sample(seq(as.Date("1999/01/01"), as.Date("2000/01/01"), by = "day"), 10), - cont1 = rchisq(100, df = 2), - cont2 = runif(100), - cat1 = sample(LETTERS[1:3], 100, replace = TRUE) - ) - junk <- capture.output({ - mod1 <- suppressWarnings(gamlss::gamlss( - Y ~ date + scale(cont1) + scale(cont2) + I(scale(cont2)^2) * cat1, - data = dat - )) - }) - expect_identical( - clean_names(find_terms(mod1)$conditional), - c("date", "cont1", "cont2", "cont2", "cat1") - ) +test_that("clean_names, division in I()", { + skip_if_not_installed("betareg") + data("GasolineYield", package = "betareg") + data("FoodExpenditure", package = "betareg") + + m1 <- betareg::betareg(yield ~ batch + temp, data = GasolineYield) + m2 <- betareg::betareg(I(food / income) ~ income + persons, data = FoodExpenditure) + expect_identical(clean_names(m1), c("yield", "batch", "temp")) + expect_identical(clean_names(m1), c("food", "income", "persons")) })