Merge branch 'main' into strengejacke/issue937

easystats · Oct 21, 2024 · 3d3936f · 3d3936f
2 parents 856e9ad + 93b2ad7
commit 3d3936f
Show file tree

Hide file tree

Showing 12 changed files with 109 additions and 41 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,11 @@
 
 * `get_dispersion()` is now an exported function.
 
+* `get_transformation()` can now deal with any power-transformation.
+
+* `find_transformation()` and `get_transformation()` now also detects use of
+  divisions, like `x/3`.
+
 * Updates `get_varcov()` (and related documentation) to support new covariance
   matrix estimation methods from the **sandwich** package.
 

diff --git a/R/find_transformation.R b/R/find_transformation.R
@@ -5,8 +5,8 @@
 #'   or exp-transforming, was applied to the response variable (dependent
 #'   variable) in a regression formula. Currently, following patterns are
 #'   detected: `log`, `log1p`, `log2`, `log10`, `exp`, `expm1`, `sqrt`,
-#'   `log(x+<number>)`, `log-log`, `power` (to 2nd power, like `I(x^2)`), and
-#'   `inverse` (like `1/y`).
+#'   `log(x+<number>)`, `log-log`, `power` (to 2nd power, like `I(x^2)`),
+#'   `inverse` (like `1/y`), and `scale` (e.g., `x/3`).
 #'
 #' @param x A regression model or a character string of the response value.
 #' @param ... Currently not used.
@@ -51,7 +51,22 @@ find_transformation.default <- function(x, ...) {
     })
     unlist(result)
   } else {
+    # "raw" response
     rv <- find_terms(x)[["response"]]
+    # for divisions, like x/3, `find_response()` returns a character vector
+    # of length 2, one with the nominator and the denominator. In this case,
+    # check against original response
+    original_response <- safe_deparse(find_formula(x)$conditional[[2]])
+    # check if we have the pattern (x/<number)
+    if (.is_division(original_response)) { # nolint
+      # if so, check if the pattern really match
+      nominator <- gsub("/.*", "\\1", original_response)
+      denominator <- gsub(".*\\/(.*)", "\\1", original_response)
+      # and if so again, then reconstruct division string
+      if (all(rv == c(nominator, denominator))) {
+        rv <- paste(nominator, denominator, sep = "/")
+      }
+    }
     find_transformation(rv)
   }
 }
@@ -111,6 +126,9 @@ find_transformation.character <- function(x, ...) {
   } else if (any(startsWith(x, "1/"))) {
     # inverse-transformation
     transform_fun <- "inverse"
+  } else if (.is_division(x)) {
+    # scale
+    transform_fun <- "scale"
   } else if (any(grepl("(.*)(\\^|\\*\\*)\\s?-?(\\d+|[()])", x))) {
     # power-transformation
     transform_fun <- "power"
@@ -121,3 +139,10 @@ find_transformation.character <- function(x, ...) {
 
   transform_fun
 }
+
+
+# helper -----------------------------
+
+.is_division <- function(x) {
+  any(grepl("(\\w)/(\\d)", x)) && !any(grepl("(.*)(\\^|\\*\\*)\\((.*)/(.*)\\)", x))
+}
diff --git a/R/get_loglikelihood.R b/R/get_loglikelihood.R
@@ -490,6 +490,16 @@ get_loglikelihood.phyloglm <- get_loglikelihood.phylolm
         .weighted_sum((get_response(x, as_proportion = TRUE) - 1), w = model_weights)
       } else if (trans == "sqrt") {
         .weighted_sum(log(0.5 / sqrt(get_response(x, as_proportion = TRUE))), w = model_weights)
+      } else if (trans == "inverse") {
+        # first derivative of 1/x is -1/x^2 - we cannot take the log from negative
+        # values, so this won't work here, and we return NULL
+        NULL
+      } else if (trans == "scale") {
+        scale_denominator <- .extract_scale_denominator(x)
+        .weighted_sum(log(1 / scale_denominator), w = model_weights)
+      } else if (trans == "power") {
+        trans_power <- .extract_power_transformation(x)
+        .weighted_sum(log(trans_power * (get_response(x, as_proportion = TRUE)^(trans_power - 1))), w = model_weights) # nolint
       } else if (is.null(model_weights)) {
         .ll_log_adjustment(x)
       } else {

diff --git a/R/get_transformation.R b/R/get_transformation.R
@@ -6,7 +6,8 @@
 #' This functions checks whether any transformation, such as log- or
 #' exp-transforming, was applied to the response variable (dependent variable)
 #' in a regression formula, and returns the related function that was used for
-#' transformation.
+#' transformation. See [`find_transformation()`] for an overview of supported
+#' transformations that are detected.
 #'
 #' @param x A regression model.
 #' @param verbose Logical, if `TRUE`, prints a warning if the transformation
@@ -62,18 +63,20 @@ get_transformation <- function(x, verbose = TRUE) {
     out <- list(transformation = sqrt, inverse = function(x) x^2)
   } else if (transform_fun == "inverse") {
     out <- list(transformation = function(x) 1 / x, inverse = function(x) x^-1)
+  } else if (transform_fun == "scale") {
+    denominator <- .extract_scale_denominator(x)
+    out <- list(
+      transformation = eval(parse(text = paste0("function(x) x / ", as.character(denominator)))), # nolint
+      inverse = eval(parse(text = paste0("function(x) x * ", as.character(denominator))))
+    )
   } else if (transform_fun == "power") {
-    ## TODO: detect power - can we turn this into a generic function?
-    trans_power <- .safe(gsub("\\(|\\)", "", gsub("(.*)(\\^|\\*\\*)\\s*(\\d+|[()])", "\\3", find_terms(x)[["response"]]))) # nolint
+    trans_power <- .extract_power_transformation(x)
     if (is.null(trans_power)) {
-      trans_power <- "2"
+      trans_power <- 2
     }
-    out <- switch(trans_power,
-      `0.5` = list(transformation = function(x) x^0.5, inverse = function(x) x^2),
-      `3` = list(transformation = function(x) x^3, inverse = function(x) x^(1 / 3)),
-      `4` = list(transformation = function(x) x^4, inverse = function(x) x^0.25),
-      `5` = list(transformation = function(x) x^5, inverse = function(x) x^0.2),
-      list(transformation = function(x) x^2, inverse = sqrt)
+    out <- list(
+      transformation = eval(parse(text = paste0("function(x) x^", as.character(trans_power)))), # nolint
+      inverse = eval(parse(text = paste0("function(x) x^(", as.character(trans_power), "^-1)")))
     )
   } else if (transform_fun == "expm1") {
     out <- list(transformation = expm1, inverse = log1p)
@@ -93,3 +96,22 @@ get_transformation <- function(x, verbose = TRUE) {
 
   out
 }
+
+
+# helper ------------------------------
+
+
+.extract_power_transformation <- function(model) {
+  .safe(as.numeric(gsub("\\(|\\)", "", gsub("(.*)(\\^|\\*\\*)\\s*(\\d+|[()])", "\\3", find_terms(model)[["response"]])))) # nolint
+}
+
+
+.extract_scale_denominator <- function(model) {
+  resp_term <- find_terms(model)[["response"]]
+  # more complicated case: scale is inside `I()`
+  if (startsWith(resp_term[1], "I(")) {
+    as.numeric(gsub("(.*)/(.*)\\)", "\\2", resp_term[1]))
+  } else {
+    as.numeric(resp_term[2])
+  }
+}
diff --git a/man/find_transformation.Rd b/man/find_transformation.Rd
diff --git a/man/get_predicted.Rd b/man/get_predicted.Rd
diff --git a/man/get_predicted_ci.Rd b/man/get_predicted_ci.Rd
diff --git a/man/get_transformation.Rd b/man/get_transformation.Rd
diff --git a/man/get_varcov.Rd b/man/get_varcov.Rd
diff --git a/tests/testthat/test-find_transformation.R b/tests/testthat/test-find_transformation.R
@@ -44,10 +44,17 @@ test_that("find_transformation - power-1", {
 })
 
 test_that("find_transformation - power-2", {
-  model <- lm(I(Sepal.Length^2) ~ Species, data = iris)
+  model <- lm(I(Sepal.Length^2.5) ~ Species, data = iris)
   expect_identical(find_transformation(model), "power")
 })
 
+test_that("find_transformation - scale", {
+  model <- lm(mpg / 0.7 ~ hp, data = mtcars)
+  expect_identical(find_transformation(model), "scale")
+  model <- lm(I(mpg / 0.7) ~ hp, data = mtcars)
+  expect_identical(find_transformation(model), "scale")
+})
+
 test_that("find_transformation - unknown", {
   model <- lm(I(2 * mpg + 3) ~ hp, data = mtcars)
   expect_null(find_transformation(model))

diff --git a/tests/testthat/test-get_loglikelihood.R b/tests/testthat/test-get_loglikelihood.R
@@ -36,11 +36,16 @@ test_that("get_loglikelihood - lm", {
   expect_equal(as.numeric(get_loglikelihood(x)), 18.4205, tolerance = 1e-3)
   expect_equal(as.numeric(get_loglikelihood(x, check_response = TRUE)), -75.58669, tolerance = 1e-3)
 
-
   # sqrt-response
   x <- lm(sqrt(mpg) ~ wt, data = mtcars)
   expect_equal(as.numeric(get_loglikelihood(x)), -7.395031, tolerance = 1e-3)
   expect_equal(as.numeric(get_loglikelihood(x, check_response = TRUE)), -76.89597, tolerance = 1e-3)
+
+  # power to x
+  x <- lm(mpg^3.5 ~ wt, weights = wg, data = mtcars)
+  expect_equal(as.numeric(get_loglikelihood(x)), -385.6256, tolerance = 1e-3)
+  expect_equal(as.numeric(get_loglikelihood(x, check_response = TRUE)), -110.5192, tolerance = 1e-3)
+
 })
 
 test_that("get_loglikelihood - glm", {

diff --git a/tests/testthat/test-get_transformation.R b/tests/testthat/test-get_transformation.R
@@ -8,4 +8,21 @@ test_that("get_transformation - detect powers", {
   fun <- get_transformation(m)
   expect_identical(fun$transformation(2), 8)
   expect_identical(fun$inverse(8), 2)
+  m <- lm(Sepal.Length^4.7 ~ Species, data = iris)
+  fun <- get_transformation(m)
+  expect_equal(fun$transformation(2), 25.99208, tolerance = 1e-3)
+  expect_equal(fun$inverse(25.99208), 2, tolerance = 1e-3)
+})
+
+
+test_that("get_transformation - detect scale", {
+  data(mtcars)
+  m <- lm(mpg / 0.7 ~ hp, data = mtcars)
+  fun <- get_transformation(m)
+  expect_equal(fun$transformation(2), 2 / 0.7, tolerance = 1e-3)
+  expect_equal(fun$inverse(2 / 0.7), 2, tolerance = 1e-3)
+  m <- lm(I(mpg / 0.7) ~ hp, data = mtcars)
+  fun <- get_transformation(m)
+  expect_equal(fun$transformation(2), 2 / 0.7, tolerance = 1e-3)
+  expect_equal(fun$inverse(2 / 0.7), 2, tolerance = 1e-3)
 })