Skip to content

Commit

Permalink
Merge branch 'master' into MichaelChirico-patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
topepo committed Nov 25, 2024
2 parents e44cea1 + 5d71315 commit d895410
Show file tree
Hide file tree
Showing 14 changed files with 96 additions and 91 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
^\.github$
^codecov\.yml$
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
21 changes: 12 additions & 9 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: R-CMD-check
name: R-CMD-check.yaml

permissions: read-all

jobs:
R-CMD-check:
Expand All @@ -24,20 +25,20 @@ jobs:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
# Use 3.6 to trigger usage of RTools35
- {os: windows-latest, r: '3.6'}
# use 4.1 to check with rtools40's older compiler
- {os: windows-latest, r: '4.1'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}

# use 4.0 or 4.1 to check with rtools40's older compiler
- {os: windows-latest, r: 'oldrel-4'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}

- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -46,6 +47,7 @@ jobs:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
working-directory: pkg/caret

- uses: r-lib/actions/setup-r-dependencies@v2
with:
Expand All @@ -56,4 +58,5 @@ jobs:
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
working-directory: pkg/caret
12 changes: 9 additions & 3 deletions .github/workflows/pr-commands.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ on:
issue_comment:
types: [created]

name: Commands
name: pr-commands.yaml

permissions: read-all

jobs:
document:
Expand All @@ -13,8 +15,10 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/pr-fetch@v2
with:
Expand Down Expand Up @@ -51,8 +55,10 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/pr-fetch@v2
with:
Expand Down
29 changes: 21 additions & 8 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: test-coverage
name: test-coverage.yaml

permissions: read-all

jobs:
test-coverage:
Expand All @@ -15,38 +16,50 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
working-directory: pkg/caret

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::covr
extra-packages: any::covr, any::xml2
needs: coverage
working-directory: pkg/caret

- name: Test coverage
run: |
covr::codecov(
cov <- covr::package_coverage(
quiet = FALSE,
clean = FALSE,
path = "pkg/caret",
install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
)
covr::to_cobertura(cov)
shell: Rscript {0}

- uses: codecov/codecov-action@v4
with:
# Fail if error if not on PR, or if on PR and token is given
fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
file: ./cobertura.xml
plugin: noop
disable_search: true
token: ${{ secrets.CODECOV_TOKEN }}
working-directory: pkg/caret

- name: Show testthat output
if: always()
run: |
## --------------------------------------------------------------------
find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
shell: bash

- name: Upload test results
if: failure()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: coverage-test-failures
path: ${{ runner.temp }}/package
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[![R-CMD-check](https://github.com/topepo/caret/workflows/R-CMD-check/badge.svg)](https://github.com/topepo/caret/actions)
[![Coverage Status](https://coveralls.io/repos/topepo/caret/badge.svg?branch=master)](https://coveralls.io/r/topepo/caret?branch=master)
![R-CMD-check](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/topepo/caret/actions/workflows/R-CMD-check.yaml)
[![Codecov test coverage](https://codecov.io/gh/topepo/caret/graph/badge.svg)](https://app.codecov.io/gh/topepo/caret)
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/caret)](http://cran.r-project.org/web/packages/caret)
[![Downloads](http://cranlogs.r-pkg.org/badges/caret)](http://cran.rstudio.com/package=caret)

Expand Down
3 changes: 1 addition & 2 deletions pkg/caret/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,10 @@ Suggests:
rmarkdown,
rpart,
spls,
subselect,
superpc,
testthat (>= 0.9.1),
themis (>= 0.1.3)
VignetteBuilder:
knitr
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
4 changes: 2 additions & 2 deletions pkg/caret/R/calibration.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#' \code{envir} argument in \code{eval}, e.g., a list or an environment) containing values for any
#' variables in the formula, as well as \code{groups} and \code{subset} if applicable. If not found in
#' \code{data}, or if \code{data} is unspecified, the variables are looked for in the environment of the
#' formula. This argument is not used for \code{xyplot.calibration}. For {ggplot.calibration}, \code{data}
#' should be an object of class "\code{calibration}"."
#' formula. This argument is not used for \code{xyplot.calibration}. For \code{ggplot.calibration}, \code{data}
#' should be an object of class "\code{calibration}".
#'
#' @param class a character string for the class of interest
#'
Expand Down
70 changes: 32 additions & 38 deletions pkg/caret/R/findCorrelation.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
averageCorr <- as.numeric(as.factor(averageCorr))
x[lower.tri(x, diag = TRUE)] <- NA
combsAboveCutoff <- which(abs(x) > cutoff)

colsToCheck <- ceiling(combsAboveCutoff / nrow(x))
rowsToCheck <- combsAboveCutoff %% nrow(x)

colsToDiscard <- averageCorr[colsToCheck] > averageCorr[rowsToCheck]
rowsToDiscard <- !colsToDiscard

if(verbose){
colsFlagged <- pmin(ifelse(colsToDiscard, colsToCheck, NA),
ifelse(rowsToDiscard, rowsToCheck, NA), na.rm = TRUE)
Expand All @@ -22,7 +22,7 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
'\n \t Flagging column', colsFlagged, '\n'
))
}

deletecol <- c(colsToCheck[colsToDiscard], rowsToCheck[rowsToDiscard])
deletecol <- unique(deletecol)
deletecol
Expand All @@ -31,29 +31,29 @@ findCorrelation_fast <- function(x, cutoff = .90, verbose = FALSE){
findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
{
varnum <- dim(x)[1]

if (!isTRUE(all.equal(x, t(x)))) stop("correlation matrix is not symmetric")
if (varnum == 1) stop("only one variable given")

x <- abs(x)

# re-ordered columns based on max absolute correlation
originalOrder <- 1:varnum

averageCorr <- function(x) mean(x, na.rm = TRUE)
tmp <- x
diag(tmp) <- NA

maxAbsCorOrder <- order(apply(tmp, 2, averageCorr), decreasing = TRUE)
x <- x[maxAbsCorOrder, maxAbsCorOrder]
newOrder <- originalOrder[maxAbsCorOrder]
rm(tmp)

deletecol <- rep(FALSE, varnum)

x2 <- x
diag(x2) <- NA

for (i in 1:(varnum - 1)) {
if(!any(x2[!is.na(x2)] > cutoff)){
if (verbose) cat("All correlations <=", cutoff, "\n")
Expand All @@ -62,13 +62,13 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
if (deletecol[i]) next
for (j in (i + 1):varnum) {
if (!deletecol[i] & !deletecol[j]) {

if (x[i, j] > cutoff) {
mn1 <- mean(x2[i,], na.rm = TRUE)
mn2 <- mean(x2[-j,], na.rm = TRUE)
if(verbose) cat("Compare row", newOrder[i],
" and column ", newOrder[j],
"with corr ", round(x[i,j], 3), "\n")
if(verbose) cat("Compare row", newOrder[i],
" and column ", newOrder[j],
"with corr ", round(x[i,j], 3), "\n")
if (verbose) cat(" Means: ", round(mn1, 3), "vs", round(mn2, 3))
if (mn1 > mn2) {
deletecol[i] <- TRUE
Expand All @@ -92,28 +92,22 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)


#' Determine highly correlated variables
#'
#'
#' This function searches through a correlation matrix and returns a vector of
#' integers corresponding to columns to remove to reduce pair-wise
#' correlations.
#'
#'
#' The absolute values of pair-wise correlations are considered. If two
#' variables have a high correlation, the function looks at the mean absolute
#' correlation of each variable and removes the variable with the largest mean
#' absolute correlation.
#'
#'
#' Using \code{exact = TRUE} will cause the function to re-evaluate the average
#' correlations at each step while \code{exact = FALSE} uses all the
#' correlations regardless of whether they have been eliminated or not. The
#' exact calculations will remove a smaller number of predictors but can be
#' much slower when the problem dimensions are "big".
#'
#' There are several function in the \pkg{subselect} package
#' (\code{\link[subselect:eleaps]{leaps}},
#' \code{\link[subselect:genetic]{genetic}},
#' \code{\link[subselect:anneal]{anneal}}) that can also be used to accomplish
#' the same goal but tend to retain more predictors.
#'
#'
#' @param x A correlation matrix
#' @param cutoff A numeric value for the pair-wise absolute correlation cutoff
#' @param verbose A boolean for printing the details
Expand All @@ -130,38 +124,38 @@ findCorrelation_exact <- function(x, cutoff = 0.90, verbose = FALSE)
#' \code{\link[subselect:anneal]{anneal}}, \code{\link{findLinearCombos}}
#' @keywords manip
#' @examples
#'
#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32,
#'
#' R1 <- structure(c(1, 0.86, 0.56, 0.32, 0.85, 0.86, 1, 0.01, 0.74, 0.32,
#' 0.56, 0.01, 1, 0.65, 0.91, 0.32, 0.74, 0.65, 1, 0.36,
#' 0.85, 0.32, 0.91, 0.36, 1),
#' 0.85, 0.32, 0.91, 0.36, 1),
#' .Dim = c(5L, 5L))
#' colnames(R1) <- rownames(R1) <- paste0("x", 1:ncol(R1))
#' R1
#'
#'
#' findCorrelation(R1, cutoff = .6, exact = FALSE)
#' findCorrelation(R1, cutoff = .6, exact = TRUE)
#' findCorrelation(R1, cutoff = .6, exact = TRUE, names = FALSE)
#'
#'
#'
#'
#' R2 <- diag(rep(1, 5))
#' R2[2, 3] <- R2[3, 2] <- .7
#' R2[5, 3] <- R2[3, 5] <- -.7
#' R2[4, 1] <- R2[1, 4] <- -.67
#'
#'
#' corrDF <- expand.grid(row = 1:5, col = 1:5)
#' corrDF$correlation <- as.vector(R2)
#' levelplot(correlation ~ row + col, corrDF)
#'
#'
#' findCorrelation(R2, cutoff = .65, verbose = TRUE)
#'
#'
#' findCorrelation(R2, cutoff = .99, verbose = TRUE)
#'
#'
#' @export findCorrelation
findCorrelation <- function(x, cutoff = 0.90, verbose = FALSE, names = FALSE, exact = ncol(x) < 100) {
if(names & is.null(colnames(x)))
stop("'x' must have column names when `names = TRUE`")
out <- if(exact)
findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else
out <- if(exact)
findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else
findCorrelation_fast(x = x, cutoff = cutoff, verbose = verbose)
out
if(names) out <- colnames(x)[out]
Expand Down
Loading

0 comments on commit d895410

Please sign in to comment.