From 6269666c0bc2dbb48fd4193adec46b9fbf4fdccd Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Tue, 20 Aug 2024 21:49:44 +0200 Subject: [PATCH 1/2] Ready for submission --- NAMESPACE | 1 - NEWS.md | 10 +++--- R/potential_interactions.R | 5 +-- R/shapviz.R | 63 +++++++++++++++++++++----------------- cran-comments.md | 33 +++++++------------- man/shapviz.Rd | 13 +++----- packaging.R | 2 +- revdep/README.md | 44 +++++++++++++------------- vignettes/geographic.Rmd | 20 ++---------- 9 files changed, 85 insertions(+), 106 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 37d6546..db38594 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,7 +31,6 @@ S3method(shapviz,explain) S3method(shapviz,kernelshap) S3method(shapviz,lgb.Booster) S3method(shapviz,matrix) -S3method(shapviz,permshap) S3method(shapviz,predict_parts) S3method(shapviz,shapr) S3method(shapviz,treeshap) diff --git a/NEWS.md b/NEWS.md index 766c7e7..245fcf5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,16 +1,16 @@ # shapviz 0.9.4 -## API improvements +### API improvements -- Support both XGBoost 1.x.x as well as XGBoost 2.x.x, implemented in #144. +- Support both XGBoost 1.x.x as well as XGBoost 2.x.x, implemented in [#144](https://github.com/ModelOriented/shapviz/pull/144). -## Improvements +### Other improvements -- New argument `sort_features = TRUE` in `sv_importance()` and `sv_interaction()`. Set to `FALSE` to show the features as they appear in your SHAP matrix. In that case, the plots will show the *first* `max_display` features, not the *most important* features. Implements #136. +- New argument `sort_features = TRUE` in `sv_importance()` and `sv_interaction()`. Set to `FALSE` to show the features as they appear in your SHAP matrix. In that case, the plots will show the *first* `max_display` features, not the *most important* features. Implements [#137](https://github.com/ModelOriented/shapviz/pull/137). ### Bug fixes -- `shapviz.xgboost()` would fail if a single row is passed. This has been fixed in #142. Thanks @sebsilas for reporting. +- `shapviz.xgboost()` would fail if a single row is passed. This has been fixed in [#142](https://github.com/ModelOriented/shapviz/pull/142). Thanks @sebsilas for reporting. # shapviz 0.9.3 diff --git a/R/potential_interactions.R b/R/potential_interactions.R index 9b38806..0c6af38 100644 --- a/R/potential_interactions.R +++ b/R/potential_interactions.R @@ -42,8 +42,9 @@ #' @returns A named vector of decreasing interaction strengths. #' @export #' @seealso [sv_dependence()] -potential_interactions <- function(obj, v, nbins = NULL, color_num = TRUE, - scale = FALSE, adjusted = FALSE) { +potential_interactions <- function( + obj, v, nbins = NULL, color_num = TRUE, scale = FALSE, adjusted = FALSE + ) { stopifnot(is.shapviz(obj)) S <- get_shap_values(obj) S_inter <- get_shap_interactions(obj) diff --git a/R/shapviz.R b/R/shapviz.R index 8ef9235..2f501fb 100644 --- a/R/shapviz.R +++ b/R/shapviz.R @@ -12,8 +12,9 @@ #' - `shapr::explain()`, #' - `treeshap::treeshap()`, #' - `DALEX::predict_parts()`, -#' - `kernelshap::kernelshap()`, and -#' - `kernelshap::permshap()`, +#' - `kernelshap::kernelshap()`, +#' - `kernelshap::permshap()`, and +#' - `kernelshap::additive_shap()`, #' #' check the vignettes for examples. #' @@ -85,8 +86,9 @@ shapviz.default = function(object, ...) { #' @describeIn shapviz #' Creates a "shapviz" object from a matrix of SHAP values. #' @export -shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, - S_inter = NULL, ...) { +shapviz.matrix = function( + object, X, baseline = 0, collapse = NULL, S_inter = NULL, ... + ) { if (!is.null(collapse)) { object <- collapse_shap(object, collapse = collapse) if (!is.null(S_inter)) { @@ -183,8 +185,15 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' mx #' all.equal(mx[[3]], x) #' } -shapviz.xgb.Booster = function(object, X_pred, X = X_pred, which_class = NULL, - collapse = NULL, interactions = FALSE, ...) { +shapviz.xgb.Booster = function( + object, + X_pred, + X = X_pred, + which_class = NULL, + collapse = NULL, + interactions = FALSE, + ... + ) { stopifnot( "X must be a matrix or data.frame. It can't be an object of class xgb.DMatrix" = is.matrix(X) || is.data.frame(X), @@ -269,8 +278,9 @@ shapviz.xgb.Booster = function(object, X_pred, X = X_pred, which_class = NULL, #' @describeIn shapviz #' Creates a "shapviz" object from a LightGBM model. #' @export -shapviz.lgb.Booster = function(object, X_pred, X = X_pred, - which_class = NULL, collapse = NULL, ...) { +shapviz.lgb.Booster = function( + object, X_pred, X = X_pred, which_class = NULL, collapse = NULL, ... + ) { if (!requireNamespace("lightgbm", quietly = TRUE)) { stop("Package 'lightgbm' not installed") } @@ -352,8 +362,9 @@ shapviz.explain <- function(object, X = NULL, baseline = NULL, collapse = NULL, #' @describeIn shapviz #' Creates a "shapviz" object from `treeshap::treeshap()`. #' @export -shapviz.treeshap <- function(object, X = object[["observations"]], - baseline = 0, collapse = NULL, ...) { +shapviz.treeshap <- function( + object, X = object[["observations"]], baseline = 0, collapse = NULL, ... + ) { S_inter <- object[["interactions"]] if (!is.null(S_inter)) { S_inter <- aperm(S_inter, c(3L, 1:2)) @@ -410,10 +421,12 @@ shapviz.shapr <- function(object, X = object[["x_test"]], collapse = NULL, ...) } #' @describeIn shapviz -#' Creates a "shapviz" object from `kernelshap::kernelshap()`. +#' Creates a "shapviz" object from an object of class 'kernelshap'. This includes +#' results of `kernelshap()`, `permshap()`, and `additive_shap()`. #' @export -shapviz.kernelshap <- function(object, X = object[["X"]], - which_class = NULL, collapse = NULL, ...) { +shapviz.kernelshap <- function( + object, X = object[["X"]], which_class = NULL, collapse = NULL, ... + ) { S <- object[["S"]] b <- object[["baseline"]] @@ -440,36 +453,30 @@ shapviz.kernelshap <- function(object, X = object[["X"]], shapviz.matrix(object = S, X = X, baseline = b, collapse = collapse) } -#' @describeIn shapviz -#' Creates a "shapviz" object from `kernelshap::permshap()`. -#' @export -shapviz.permshap <- function(object, X = object[["X"]], - which_class = NULL, collapse = NULL, ...) { - # The output structure of permshap is identical to kernelshap - shapviz.kernelshap(object, X = X, which_class = which_class, collapse = collapse, ...) -} - #' @describeIn shapviz #' Creates a "shapviz" object from a (tree-based) H2O regression model. #' @export -shapviz.H2ORegressionModel = function(object, X_pred, X = as.data.frame(X_pred), - collapse = NULL, ...) { +shapviz.H2ORegressionModel = function( + object, X_pred, X = as.data.frame(X_pred), collapse = NULL, ... + ) { shapviz.H2OModel(object = object, X_pred = X_pred, X = X, collapse = collapse, ...) } #' @describeIn shapviz #' Creates a "shapviz" object from a (tree-based) H2O binary classification model. #' @export -shapviz.H2OBinomialModel = function(object, X_pred, X = as.data.frame(X_pred), - collapse = NULL, ...) { +shapviz.H2OBinomialModel = function( + object, X_pred, X = as.data.frame(X_pred), collapse = NULL, ... + ) { shapviz.H2OModel(object = object, X_pred = X_pred, X = X, collapse = collapse, ...) } #' @describeIn shapviz #' Creates a "shapviz" object from a (tree-based) H2O model (base class). #' @export -shapviz.H2OModel = function(object, X_pred, X = as.data.frame(X_pred), - collapse = NULL, ...) { +shapviz.H2OModel = function( + object, X_pred, X = as.data.frame(X_pred), collapse = NULL, ... + ) { if (!requireNamespace("h2o", quietly = TRUE)) { stop("Package 'h2o' not installed") } diff --git a/cran-comments.md b/cran-comments.md index 0c2f94f..72effc2 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,29 +1,15 @@ -# shapviz 0.9.3 +# shapviz 0.9.4 -Hi CRAN team +Dear CRAN team -This is a relatively small update, but offers much more flexibility in the interaction heuristic. +This update mainly ensures that upcoming XGBoost version 2.x.x will work. -## Checks look good -### check(manual = TRUE, cran = TRUE) +### Local checks -Ok +Ok, with note -### RHub (usual notes) - -* checking package dependencies ... NOTE -Packages which this enhances but not available for checking: - 'fastshap', 'h2o', 'lightgbm' -* checking HTML version of manual ... NOTE -Skipping checking math rendering: package 'V8' unavailable -* checking for non-standard things in the check directory ... NOTE -Found the following files/directories: - ''NULL'' -* checking for detritus in the temp directory ... NOTE -Found the following files/directories: - 'lastMiKTeXException' - +Packages which this enhances but not available for checking: 'fastshap', 'h2o' ### Winbuilder() @@ -31,5 +17,8 @@ Status: OK ## Reverse dependencies (2) -- OK: 2 -- BROKEN: 0 +- fastshap 0.1.1 ── E: 0 | W: 1 | N: 1 +- lowml 0.1.3 ── E: 0 | W: 0 | N: 0 + +OK: 2 +BROKEN: 0 diff --git a/man/shapviz.Rd b/man/shapviz.Rd index adc08cb..376cb50 100644 --- a/man/shapviz.Rd +++ b/man/shapviz.Rd @@ -11,7 +11,6 @@ \alias{shapviz.predict_parts} \alias{shapviz.shapr} \alias{shapviz.kernelshap} -\alias{shapviz.permshap} \alias{shapviz.H2ORegressionModel} \alias{shapviz.H2OBinomialModel} \alias{shapviz.H2OModel} @@ -51,8 +50,6 @@ shapviz(object, ...) \method{shapviz}{kernelshap}(object, X = object[["X"]], which_class = NULL, collapse = NULL, ...) -\method{shapviz}{permshap}(object, X = object[["X"]], which_class = NULL, collapse = NULL, ...) - \method{shapviz}{H2ORegressionModel}(object, X_pred, X = as.data.frame(X_pred), collapse = NULL, ...) \method{shapviz}{H2OBinomialModel}(object, X_pred, X = as.data.frame(X_pred), collapse = NULL, ...) @@ -122,8 +119,9 @@ Furthermore, \code{\link[=shapviz]{shapviz()}} can digest the results of \item \code{shapr::explain()}, \item \code{treeshap::treeshap()}, \item \code{DALEX::predict_parts()}, -\item \code{kernelshap::kernelshap()}, and -\item \code{kernelshap::permshap()}, +\item \code{kernelshap::kernelshap()}, +\item \code{kernelshap::permshap()}, and +\item \code{kernelshap::additive_shap()}, } check the vignettes for examples. @@ -159,9 +157,8 @@ return a "mshapviz" object, containing a "shapviz" object per output. \item \code{shapviz(shapr)}: Creates a "shapviz" object from \code{shapr::explain()}. -\item \code{shapviz(kernelshap)}: Creates a "shapviz" object from \code{kernelshap::kernelshap()}. - -\item \code{shapviz(permshap)}: Creates a "shapviz" object from \code{kernelshap::permshap()}. +\item \code{shapviz(kernelshap)}: Creates a "shapviz" object from an object of class 'kernelshap'. This includes +results of \code{kernelshap()}, \code{permshap()}, and \code{additive_shap()}. \item \code{shapviz(H2ORegressionModel)}: Creates a "shapviz" object from a (tree-based) H2O regression model. diff --git a/packaging.R b/packaging.R index bca7388..62ef96f 100644 --- a/packaging.R +++ b/packaging.R @@ -112,7 +112,7 @@ if (FALSE) { check_rhub(platforms = "debian-gcc-devel") # Takes long - revdepcheck::revdep_check(num_workers = 4) + revdepcheck::revdep_check(num_workers = 4, bioc = FALSE) # Wait until above checks are passed without relevant notes/warnings # then submit to CRAN diff --git a/revdep/README.md b/revdep/README.md index de06d8c..182977e 100644 --- a/revdep/README.md +++ b/revdep/README.md @@ -2,64 +2,64 @@ |field |value | |:--------|:--------------------------------------------------------| -|version |R version 4.3.0 (2023-04-21 ucrt) | -|os |Windows 11 x64 (build 22621) | +|version |R version 4.4.1 (2024-06-14 ucrt) | +|os |Windows 11 x64 (build 22631) | |system |x86_64, mingw32 | |ui |RStudio | |language |(EN) | |collate |German_Switzerland.utf8 | |ctype |German_Switzerland.utf8 | |tz |Europe/Zurich | -|date |2024-01-12 | -|rstudio |2023.06.1+524 Mountain Hydrangea (desktop) | +|date |2024-08-20 | +|rstudio |2024.04.2+764 Chocolate Cosmos (desktop) | |pandoc |3.1.6 @ C:\Users\Michael\AppData\Local\Pandoc\pandoc.exe | # Dependencies |package |old |new |Δ | |:------------|:-------|:-------|:--| -|shapviz |0.9.2 |0.9.3 |* | -|cli |3.6.2 |3.6.2 | | -|colorspace |2.1-0 |2.1-0 | | -|commonmark |1.9.0 |1.9.0 | | -|curl |5.2.0 |5.2.0 | | -|data.table |1.14.10 |1.14.10 | | +|shapviz |0.9.3 |0.9.4 |* | +|cli |3.6.3 |3.6.3 | | +|colorspace |2.1-1 |2.1-1 | | +|commonmark |1.9.1 |1.9.1 | | +|curl |5.2.1 |5.2.1 | | +|data.table |1.15.4 |1.15.4 | | |fansi |1.0.6 |1.0.6 | | -|farver |2.1.1 |2.1.1 | | -|ggfittext |0.10.1 |0.10.1 | | +|farver |2.1.2 |2.1.2 | | +|ggfittext |0.10.2 |0.10.2 | | |gggenes |0.5.1 |0.5.1 | | -|ggplot2 |3.4.4 |3.4.4 | | +|ggplot2 |3.5.1 |3.5.1 | | |ggrepel |0.9.5 |0.9.5 | | |glue |1.7.0 |1.7.0 | | |gridtext |0.1.5 |0.1.5 | | -|gtable |0.3.4 |0.3.4 | | +|gtable |0.3.5 |0.3.5 | | |isoband |0.2.7 |0.2.7 | | |jpeg |0.1-10 |0.1-10 | | |jsonlite |1.8.8 |1.8.8 | | |labeling |0.4.3 |0.4.3 | | |lifecycle |1.0.4 |1.0.4 | | |magrittr |2.0.3 |2.0.3 | | -|markdown |1.12 |1.12 | | -|munsell |0.5.0 |0.5.0 | | +|markdown |1.13 |1.13 | | +|munsell |0.5.1 |0.5.1 | | |patchwork |1.2.0 |1.2.0 | | |pillar |1.9.0 |1.9.0 | | |pkgconfig |2.0.3 |2.0.3 | | |png |0.1-8 |0.1-8 | | |R6 |2.5.1 |2.5.1 | | |RColorBrewer |1.1-3 |1.1-3 | | -|Rcpp |1.0.12 |1.0.12 | | -|rlang |1.1.3 |1.1.3 | | +|Rcpp |1.0.13 |1.0.13 | | +|rlang |1.1.4 |1.1.4 | | |scales |1.3.0 |1.3.0 | | |shades |1.4.0 |1.4.0 | | -|stringi |1.8.3 |1.8.3 | | +|stringi |1.8.4 |1.8.4 | | |stringr |1.5.1 |1.5.1 | | |tibble |3.2.1 |3.2.1 | | |utf8 |1.2.4 |1.2.4 | | |vctrs |0.6.5 |0.6.5 | | |viridisLite |0.4.2 |0.4.2 | | -|withr |2.5.2 |2.5.2 | | -|xfun |0.41 |0.41 | | -|xgboost |1.7.6.1 |1.7.6.1 | | +|withr |3.0.1 |3.0.1 | | +|xfun |0.47 |0.47 | | +|xgboost |1.7.8.1 |1.7.8.1 | | |xml2 |1.3.6 |1.3.6 | | # Revdeps diff --git a/vignettes/geographic.Rmd b/vignettes/geographic.Rmd index 778ad3d..124b197 100644 --- a/vignettes/geographic.Rmd +++ b/vignettes/geographic.Rmd @@ -78,17 +78,10 @@ params <- list( learning_rate = 0.2, objective = "reg:squarederror", max_depth = 5, nthread = 1 ) -fit <- xgb.train( - params = params, - data = dtrain, - watchlist = list(valid = dvalid), - early_stopping_rounds = 20, - nrounds = 1000, - callbacks = list(cb.print.evaluation(period = 100)) -) +fit <- xgb.train(params = params, data = dtrain, nrounds = 200) ``` -Let's first study selected SHAP dependence plots, evaluated on the validation dataset with around 2800 observations. Note that we could as well use the training data for this purpose, but it is a bit too large. +Let's first study selected SHAP dependence plots, evaluated on the validation dataset with around 2800 observations. Note that we could as well use (a subset of) the training data for this purpose. ```{r} sv <- shapviz(fit, X_pred = X_valid) @@ -139,14 +132,7 @@ ic <- c( # Modify parameters params$interaction_constraints <- ic -fit2 <- xgb.train( - params = params, - data = dtrain2, - watchlist = list(valid = dvalid2), - early_stopping_rounds = 20, - nrounds = 1000, - callbacks = list(cb.print.evaluation(period = 100)) -) +fit2 <- xgb.train(params = params, data = dtrain2, nrounds = 200) # SHAP analysis sv2 <- shapviz(fit2, X_pred = X_valid2) From f183e1c30df2a1f355383fd418a5c0d1e77595ff Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Thu, 22 Aug 2024 08:56:23 +0200 Subject: [PATCH 2/2] Done --- CRAN-SUBMISSION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index c078dc8..90162a6 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.9.3 -Date: 2024-01-12 11:21:47 UTC -SHA: d2f329e540177df778e787a27dec50d34cb5ed5c +Version: 0.9.4 +Date: 2024-08-20 20:00:54 UTC +SHA: 6269666c0bc2dbb48fd4193adec46b9fbf4fdccd