From 5d713158aeb206ce6effce07976ba5ed881b046e Mon Sep 17 00:00:00 2001 From: Max Kuhn Date: Mon, 25 Nov 2024 13:07:40 -0500 Subject: [PATCH] Changes mandated by CRAN (#1374) * subselect no longer on CRAN * re-document * GHA update * GHA updates to make text coverage work * doc update * add missing argument --- .Rbuildignore | 1 + .github/.gitignore | 1 + .github/workflows/R-CMD-check.yaml | 21 +++++---- .github/workflows/pr-commands.yaml | 12 +++-- .github/workflows/test-coverage.yaml | 29 ++++++++---- README.md | 4 +- pkg/caret/DESCRIPTION | 3 +- pkg/caret/R/calibration.R | 4 +- pkg/caret/R/findCorrelation.R | 70 +++++++++++++--------------- pkg/caret/R/findLinearCombos.R | 23 ++++----- pkg/caret/inst/NEWS.Rd | 2 +- pkg/caret/man/calibration.Rd | 4 +- pkg/caret/man/findCorrelation.Rd | 10 +--- pkg/caret/man/findLinearCombos.Rd | 3 -- 14 files changed, 96 insertions(+), 91 deletions(-) create mode 100644 .github/.gitignore diff --git a/.Rbuildignore b/.Rbuildignore index c503c4f64..763b34098 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1 +1,2 @@ ^\.github$ +^codecov\.yml$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 000000000..2d19fc766 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 91509fddf..59a28fb9f 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -8,9 +8,10 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: R-CMD-check +name: R-CMD-check.yaml + +permissions: read-all jobs: R-CMD-check: @@ -24,20 +25,20 @@ jobs: config: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - # Use 3.6 to trigger usage of RTools35 - - {os: windows-latest, r: '3.6'} - # use 4.1 to check with rtools40's older compiler - - {os: windows-latest, r: '4.1'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} + # use 4.0 or 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: 'oldrel-4'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + + - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -46,6 +47,7 @@ jobs: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true + working-directory: pkg/caret - uses: r-lib/actions/setup-r-dependencies@v2 with: @@ -56,4 +58,5 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' working-directory: pkg/caret diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 49ca6c6f5..b6ba22a17 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -4,7 +4,9 @@ on: issue_comment: types: [created] -name: Commands +name: pr-commands.yaml + +permissions: read-all jobs: document: @@ -13,8 +15,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: @@ -51,8 +55,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 45959c6b1..bf3ceb7c0 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,9 +4,10 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: test-coverage +name: test-coverage.yaml + +permissions: read-all jobs: test-coverage: @@ -15,38 +16,50 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true + working-directory: pkg/caret - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage working-directory: pkg/caret - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, path = "pkg/caret", - install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v4 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + working-directory: pkg/caret + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/README.md b/README.md index e0b186937..7c4cd074f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -[![R-CMD-check](https://github.com/topepo/caret/workflows/R-CMD-check/badge.svg)](https://github.com/topepo/caret/actions) -[![Coverage Status](https://coveralls.io/repos/topepo/caret/badge.svg?branch=master)](https://coveralls.io/r/topepo/caret?branch=master) +![R-CMD-check](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml) +[![Codecov test coverage](https://codecov.io/gh/topepo/caret/graph/badge.svg)](https://app.codecov.io/gh/topepo/caret) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/caret)](http://cran.r-project.org/web/packages/caret) [![Downloads](http://cranlogs.r-pkg.org/badges/caret)](http://cran.rstudio.com/package=caret) diff --git a/pkg/caret/DESCRIPTION b/pkg/caret/DESCRIPTION index 373b2e158..62fc9d86d 100644 --- a/pkg/caret/DESCRIPTION +++ b/pkg/caret/DESCRIPTION @@ -107,11 +107,10 @@ Suggests: rmarkdown, rpart, spls, - subselect, superpc, testthat (>= 0.9.1), themis (>= 0.1.3) VignetteBuilder: knitr Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/pkg/caret/R/calibration.R b/pkg/caret/R/calibration.R index ef58de348..3a37d1bd5 100644 --- a/pkg/caret/R/calibration.R +++ b/pkg/caret/R/calibration.R @@ -16,8 +16,8 @@ #' \code{envir} argument in \code{eval}, e.g., a list or an environment) containing values for any #' variables in the formula, as well as \code{groups} and \code{subset} if applicable. If not found in #' \code{data}, or if \code{data} is unspecified, the variables are looked for in the environment of the -#' formula. This argument is not used for \code{xyplot.calibration}. For {ggplot.calibration}, \code{data} -#' should be an object of class "\code{calibration}"." +#' formula. This argument is not used for \code{xyplot.calibration}. For \code{ggplot.calibration}, \code{data} +#' should be an object of class "\code{calibration}". #' #' @param class a character string for the class of interest #' diff --git a/pkg/caret/R/findCorrelation.R b/pkg/caret/R/findCorrelation.R index 981c43e4e..f6b98b9ed 100644 --- a/pkg/caret/R/findCorrelation.R +++ b/pkg/caret/R/findCorrelation.R @@ -6,13 +6,13 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){ averageCorr <- as.numeric(as.factor(averageCorr)) x[lower.tri(x, diag = TRUE)] <- NA combsAboveCutoff <- which(abs(x) > cutoff) - + colsToCheck <- ceiling(combsAboveCutoff / nrow(x)) rowsToCheck <- combsAboveCutoff %% nrow(x) - + colsToDiscard <- averageCorr[colsToCheck] > averageCorr[rowsToCheck] rowsToDiscard <- !colsToDiscard - + if(verbose){ colsFlagged <- pmin(ifelse(colsToDiscard, colsToCheck, NA), ifelse(rowsToDiscard, rowsToCheck, NA), na.rm = TRUE) @@ -22,7 +22,7 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){ '\n \t Flagging column', colsFlagged, '\n' )) } - + deletecol <- c(colsToCheck[colsToDiscard], rowsToCheck[rowsToDiscard]) deletecol <- unique(deletecol) deletecol @@ -31,29 +31,29 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE) { varnum <- dim(x)[1] - + if (!isTRUE(all.equal(x, t(x)))) stop("correlation matrix is not symmetric") if (varnum == 1) stop("only one variable given") - + x <- abs(x) - + # re-ordered columns based on max absolute correlation originalOrder <- 1:varnum - + averageCorr <- function(x) mean(x, na.rm = TRUE) tmp <- x diag(tmp) <- NA - + maxAbsCorOrder <- order(apply(tmp, 2, averageCorr), decreasing = TRUE) x <- x[maxAbsCorOrder, maxAbsCorOrder] newOrder <- originalOrder[maxAbsCorOrder] rm(tmp) - + deletecol <- rep(FALSE, varnum) - + x2 <- x diag(x2) <- NA - + for (i in 1:(varnum - 1)) { if(!any(x2[!is.na(x2)] > cutoff)){ if (verbose) cat("All correlations <=", cutoff, "\n") @@ -62,13 +62,13 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE) if (deletecol[i]) next for (j in (i + 1):varnum) { if (!deletecol[i] & !deletecol[j]) { - + if (x[i, j] > cutoff) { mn1 <- mean(x2[i,], na.rm = TRUE) mn2 <- mean(x2[-j,], na.rm = TRUE) - if(verbose) cat("Compare row", newOrder[i], - " and column ", newOrder[j], - "with corr ", round(x[i,j], 3), "\n") + if(verbose) cat("Compare row", newOrder[i], + " and column ", newOrder[j], + "with corr ", round(x[i,j], 3), "\n") if (verbose) cat(" Means: ", round(mn1, 3), "vs", round(mn2, 3)) if (mn1 > mn2) { deletecol[i] <- TRUE @@ -92,28 +92,22 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE) #' Determine highly correlated variables -#' +#' #' This function searches through a correlation matrix and returns a vector of #' integers corresponding to columns to remove to reduce pair-wise #' correlations. -#' +#' #' The absolute values of pair-wise correlations are considered. If two #' variables have a high correlation, the function looks at the mean absolute #' correlation of each variable and removes the variable with the largest mean #' absolute correlation. -#' +#' #' Using \code{exact = TRUE} will cause the function to re-evaluate the average #' correlations at each step while \code{exact = FALSE} uses all the #' correlations regardless of whether they have been eliminated or not. The #' exact calculations will remove a smaller number of predictors but can be #' much slower when the problem dimensions are "big". -#' -#' There are several function in the \pkg{subselect} package -#' (\code{\link[subselect:eleaps]{leaps}}, -#' \code{\link[subselect:genetic]{genetic}}, -#' \code{\link[subselect:anneal]{anneal}}) that can also be used to accomplish -#' the same goal but tend to retain more predictors. -#' +#' #' @param x A correlation matrix #' @param cutoff A numeric value for the pair-wise absolute correlation cutoff #' @param verbose A boolean for printing the details @@ -130,38 +124,38 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE) #' \code{\link[subselect:anneal]{anneal}}, \code{\link{findLinearCombos}} #' @keywords manip #' @examples -#' -#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32, +#' +#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32, #' 0.56, 0.01, 1, 0.65, 0.91, 0.32, 0.74, 0.65, 1, 0.36, -#' 0.85, 0.32, 0.91, 0.36, 1), +#' 0.85, 0.32, 0.91, 0.36, 1), #' .Dim = c(5L, 5L)) #' colnames(R1) <- rownames(R1) <- paste0("x", 1:ncol(R1)) #' R1 -#' +#' #' findCorrelation(R1, cutoff = .6, exact = FALSE) #' findCorrelation(R1, cutoff = .6, exact = TRUE) #' findCorrelation(R1, cutoff = .6, exact = TRUE, names = FALSE) -#' -#' +#' +#' #' R2 <- diag(rep(1, 5)) #' R2[2, 3] <- R2[3, 2] <- .7 #' R2[5, 3] <- R2[3, 5] <- -.7 #' R2[4, 1] <- R2[1, 4] <- -.67 -#' +#' #' corrDF <- expand.grid(row = 1:5, col = 1:5) #' corrDF$correlation <- as.vector(R2) #' levelplot(correlation ~ row + col, corrDF) -#' +#' #' findCorrelation(R2, cutoff = .65, verbose = TRUE) -#' +#' #' findCorrelation(R2, cutoff = .99, verbose = TRUE) -#' +#' #' @export findCorrelation findCorrelation <- function(x, cutoff = 0.90, verbose = FALSE, names = FALSE, exact = ncol(x) < 100) { if(names & is.null(colnames(x))) stop("'x' must have column names when `names = TRUE`") - out <- if(exact) - findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else + out <- if(exact) + findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else findCorrelation_fast(x = x, cutoff = cutoff, verbose = verbose) out if(names) out <- colnames(x)[out] diff --git a/pkg/caret/R/findLinearCombos.R b/pkg/caret/R/findLinearCombos.R index ad9b14516..fa3c9b000 100644 --- a/pkg/caret/R/findLinearCombos.R +++ b/pkg/caret/R/findLinearCombos.R @@ -58,18 +58,15 @@ internalEnumLC <- function(qrObj, ...) #' Determine linear combinations in a matrix -#' +#' #' Enumerate and resolve the linear combinations in a numeric matrix -#' +#' #' The QR decomposition is used to determine if the matrix is full rank and #' then identify the sets of columns that are involved in the dependencies. -#' +#' #' To "resolve" them, columns are iteratively removed and the matrix rank is #' rechecked. -#' -#' The \code{\link[subselect:trim.matrix]{trim.matrix}} function in the -#' \pkg{subselect} package can also be used to accomplish the same goal. -#' +#' #' @param x a numeric matrix #' @return a list with elements: \item{linearCombos }{If there are linear #' combinations, this will be a list with elements for each dependency that @@ -80,7 +77,7 @@ internalEnumLC <- function(qrObj, ...) #' @seealso \code{\link[subselect:trim.matrix]{trim.matrix}} #' @keywords manip #' @examples -#' +#' #' testData1 <- matrix(0, nrow=20, ncol=8) #' testData1[,1] <- 1 #' testData1[,2] <- round(rnorm(20), 1) @@ -90,9 +87,9 @@ internalEnumLC <- function(qrObj, ...) #' testData1[1:4,6] <- 1 #' testData1[5:10,7] <- 1 #' testData1[11:20,8] <- 1 -#' +#' #' findLinearCombos(testData1) -#' +#' #' testData2 <- matrix(0, nrow=6, ncol=6) #' testData2[,1] <- c(1, 1, 1, 1, 1, 1) #' testData2[,2] <- c(1, 1, 1, 0, 0, 0) @@ -100,9 +97,9 @@ internalEnumLC <- function(qrObj, ...) #' testData2[,4] <- c(1, 0, 0, 1, 0, 0) #' testData2[,5] <- c(0, 1, 0, 0, 1, 0) #' testData2[,6] <- c(0, 0, 1, 0, 0, 1) -#' +#' #' findLinearCombos(testData2) -#' +#' #' @export findLinearCombos findLinearCombos <- function(x) { @@ -116,7 +113,7 @@ findLinearCombos <- function(x) while(continue) { # keep removing linear dependencies until it resolves - tmp <- unlist(lapply(lcList, function(x) x[1])) + tmp <- unlist(lapply(lcList, function(x) x[1])) tmp <- unique(tmp[!is.na(tmp)]) badList <- unique(c(tmp, badList)) lcList <- enumLC(x[,-badList, drop = FALSE]) diff --git a/pkg/caret/inst/NEWS.Rd b/pkg/caret/inst/NEWS.Rd index df7c85bfc..9420a82a4 100644 --- a/pkg/caret/inst/NEWS.Rd +++ b/pkg/caret/inst/NEWS.Rd @@ -93,7 +93,7 @@ \itemize{ \item A new version was requested by CRAN since en dashes were used in the documentation. \item A bug was fixed where, for some recipes that involve class imbalance sampling, the resampling indicies were computed incorrectly \issue{1030}. - \item code{train} now removes duplicate models in the tuning grid. Duplicates could occur for models with discrete parameters. + \item \code{train} now removes duplicate models in the tuning grid. Duplicates could occur for models with discrete parameters. } } diff --git a/pkg/caret/man/calibration.Rd b/pkg/caret/man/calibration.Rd index 13fbb21d5..c73071fb1 100644 --- a/pkg/caret/man/calibration.Rd +++ b/pkg/caret/man/calibration.Rd @@ -43,8 +43,8 @@ used in \code{calibration.formula}).} \code{envir} argument in \code{eval}, e.g., a list or an environment) containing values for any variables in the formula, as well as \code{groups} and \code{subset} if applicable. If not found in \code{data}, or if \code{data} is unspecified, the variables are looked for in the environment of the -formula. This argument is not used for \code{xyplot.calibration}. For {ggplot.calibration}, \code{data} -should be an object of class "\code{calibration}"."} +formula. This argument is not used for \code{xyplot.calibration}. For \code{ggplot.calibration}, \code{data} +should be an object of class "\code{calibration}".} \item{class}{a character string for the class of interest} diff --git a/pkg/caret/man/findCorrelation.Rd b/pkg/caret/man/findCorrelation.Rd index fe3a0827d..c4ae26753 100644 --- a/pkg/caret/man/findCorrelation.Rd +++ b/pkg/caret/man/findCorrelation.Rd @@ -46,18 +46,12 @@ correlations at each step while \code{exact = FALSE} uses all the correlations regardless of whether they have been eliminated or not. The exact calculations will remove a smaller number of predictors but can be much slower when the problem dimensions are "big". - -There are several function in the \pkg{subselect} package -(\code{\link[subselect:eleaps]{leaps}}, -\code{\link[subselect:genetic]{genetic}}, -\code{\link[subselect:anneal]{anneal}}) that can also be used to accomplish -the same goal but tend to retain more predictors. } \examples{ -R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32, +R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32, 0.56, 0.01, 1, 0.65, 0.91, 0.32, 0.74, 0.65, 1, 0.36, - 0.85, 0.32, 0.91, 0.36, 1), + 0.85, 0.32, 0.91, 0.36, 1), .Dim = c(5L, 5L)) colnames(R1) <- rownames(R1) <- paste0("x", 1:ncol(R1)) R1 diff --git a/pkg/caret/man/findLinearCombos.Rd b/pkg/caret/man/findLinearCombos.Rd index ffa01ed12..27c79c069 100644 --- a/pkg/caret/man/findLinearCombos.Rd +++ b/pkg/caret/man/findLinearCombos.Rd @@ -24,9 +24,6 @@ then identify the sets of columns that are involved in the dependencies. To "resolve" them, columns are iteratively removed and the matrix rank is rechecked. - -The \code{\link[subselect:trim.matrix]{trim.matrix}} function in the -\pkg{subselect} package can also be used to accomplish the same goal. } \examples{