Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check if we can simplify clean_names() #949

Closed
wants to merge 7 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 16 additions & 68 deletions R/clean_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,79 +130,27 @@ clean_names.character <- function(x, include_names = FALSE, ...) {
if (length(lag_pattern)) pattern <- pattern[-lag_pattern]
}

if (ignore_asis) {
asis_pattern <- which(pattern %in% c("I", "asis"))
if (length(asis_pattern)) pattern <- pattern[-asis_pattern]
}

# do we have a "log()" pattern here? if yes, get capture region
# which matches the "cleaned" variable name
cleaned <- unlist(lapply(seq_along(x), function(i) {
# copy value(s)
out <- x[i]
# check if we have special patterns like 100 * log(xy), and remove it
if (isFALSE(is_emmeans) && grepl("^([0-9]+)", out)) {
out <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", out)
}
for (j in seq_along(pattern)) {
# check if we find pattern at all
if (any(grepl(pattern[j], out, fixed = TRUE))) {
# remove possible namespace
if (grepl("::", out, fixed = TRUE)) {
out <- sub("(.*)::(.*)", "\\2", out)
}
if (pattern[j] == "offset") { # nolint
out <- trim_ws(unique(sub("^offset\\(([^-+ )]*).*", "\\1", out)))
} else if (pattern[j] == "I") {
if (!ignore_asis && grepl("I\\((.*)\\)", out)) {
# out <- trim_ws(unique(sub("I\\(((\\w|\\.)*).*", "\\1", out)))
# for compatibility with `get_data(source = "mf")`, we return only
# the first value for "I()". But in some examples, like `I(food/income)`,
# we would actutually have two names - but this breaks get_data()
out <- all.vars(stats::as.formula(paste("~", out)))[1]
}
} else if (pattern[j] == "asis") {
if (!ignore_asis && grepl("asis\\((.*)\\)", out)) {
# out <- trim_ws(unique(sub("asis\\(((\\w|\\.)*).*", "\\1", out)))
out <- all.vars(stats::as.formula(paste("~", out)))[1]
}
} else if (pattern[j] == "log(log") {
out <- trim_ws(unique(sub("^log\\(log\\(((\\w|\\.)*).*", "\\1", out)))
} else if (pattern[j] == "relevel(as.factor") {
out <- trim_ws(unique(sub("^relevel\\(as.factor\\(((\\w|\\.)*).*", "\\1", out)))
} else if (pattern[j] == "scale(log") {
out <- trim_ws(unique(sub("^scale\\(log\\(((\\w|\\.)*).*", "\\1", out)))
out <- trim_ws(unique(sub("^scale\\(log1p\\(((\\w|\\.)*).*", "\\1", out)))
out <- trim_ws(unique(sub("^scale\\(log2\\(((\\w|\\.)*).*", "\\1", out)))
out <- trim_ws(unique(sub("^scale\\(log10\\(((\\w|\\.)*).*", "\\1", out)))
} else if (pattern[j] == "scale(poly") {
out <- trim_ws(unique(sub("^scale\\(poly\\(((\\w|\\.)*).*", "\\1", out)))
} else if (pattern[j] %in% c("mmc", "mm")) {
if (grepl(paste0("^", pattern[j], "\\((.*)\\).*"), out)) {
out <- all.vars(stats::as.formula(paste("~", out)))
}
} else if (pattern[j] == "s" && startsWith(out, "s(")) {
out <- gsub("^s\\(", "", out)
out <- gsub("\\)$", "", out)
if (grepl("=|[[:digit:]]", out)) {
new_x <- trim_ws(unlist(strsplit(out, ",", fixed = TRUE), use.names = FALSE))
to_remove <- which(!grepl("\\D", new_x))
to_remove <- c(to_remove, grep("=", new_x, fixed = TRUE))
if (length(to_remove) == 0) {
out <- toString(new_x)
} else {
out <- toString(new_x[-to_remove])
}
}
} else {
# p <- paste0("^", pattern[j], "\\(([^,/)]*).*")
# this one should be more generic...
p <- paste0("^", pattern[j], "\\(((\\w|\\.)*).*")
out <- unique(sub(p, "\\1", out))
}
cleaned <- unlist(lapply(x, function(i) {
if (!grepl("(Intercept)", i)) {
# check if we have special patterns like 100 * log(xy), and remove it
if (isFALSE(is_emmeans) && grepl("^([0-9]+)", i)) {
i <- gsub("^([0-9]+)[^(\\.|[:alnum:])]+(.*)", "\\2", i)
}
if (any(startsWith(i, pattern))) {
i <- all.vars(stats::as.formula(paste("~", i)))
}
}
# for coxme-models, remove random-effect things...
if (any(grepl("|", out, fixed = TRUE))) {
out <- sub("^(.*)\\|(.*)", "\\2", out)
if (any(grepl("|", i, fixed = TRUE))) {
i <- sub("^(.*)\\|(.*)", "\\2", i)
}

trim_ws(out)
trim_ws(i)
}), use.names = FALSE)

# remove for random intercept only models
Expand Down
Loading