diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 66ec04c..ab6e4fc 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ -Version: 0.8.0 -Date: 2023-05-09 17:34:27 UTC -SHA: e2f9b361e5cb79eb0faa79faabceb98a4bd1726a +Version: 0.9.0 +Date: 2023-06-09 14:22:29 UTC +SHA: 3f6bd1f781851c169a080adb317142b133f58147 diff --git a/R/methods.R b/R/methods.R index f32f19a..8b778e6 100644 --- a/R/methods.R +++ b/R/methods.R @@ -271,11 +271,13 @@ c.shapviz <- function(...) { #' @param ... Arguments passed to `split()`. #' @returns A "mshapviz" object. #' @examples +#' \dontrun{ #' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' sv <- shapviz(fit, X_pred = dtrain, X = iris) #' mx <- split(sv, f = iris$Species) #' sv_dependence(mx, "Petal.Length") +#' } #' @export #' @seealso [shapviz()], [rbind.shapviz()] split.shapviz <- function(x, f, ...) { diff --git a/R/shapviz.R b/R/shapviz.R index 6ec0aab..e1f61a2 100644 --- a/R/shapviz.R +++ b/R/shapviz.R @@ -63,7 +63,7 @@ #' scale of the SHAP values. #' - `S_inter`: Numeric array of SHAP interaction values (or `NULL`). #' @seealso -#' [sv_importance()], [sv_dependence()], [sv_interaction()], +#' [sv_importance()], [sv_dependence()], [sv_dependence2D()], [sv_interaction()], #' [sv_waterfall()], [sv_force()], [collapse_shap()] #' @examples #' S <- matrix(c(1, -1, -1, 1), ncol = 2, dimnames = list(NULL, c("x", "y"))) @@ -110,11 +110,11 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' Creates a "shapviz" object from an XGBoost model. #' @export #' @examples -#' +#' \dontrun{ #' # XGBoost models -#' X_pred <- data.matrix(iris[, -1L]) -#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50L, nthread = 1L) +#' X_pred <- data.matrix(iris[, -1]) +#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1]) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' #' # Will use numeric matrix "X_pred" as feature matrix #' x <- shapviz(fit, X_pred = X_pred) @@ -129,15 +129,13 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' x <- shapviz(fit, X_pred = dtrain, X = iris) #' #' # Multiclass setting -#' params <- list(objective = "multi:softprob", num_class = 3L) -#' X_pred <- data.matrix(iris[, -5L]) -#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5L]) - 1L) -#' fit <- xgboost::xgb.train( -#' params = params, data = dtrain, nrounds = 50L, nthread = 1L -#' ) +#' params <- list(objective = "multi:softprob", num_class = 3, nthread = 1) +#' X_pred <- data.matrix(iris[, -5]) +#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1) +#' fit <- xgboost::xgb.train(params = params, data = dtrain, nrounds = 10) #' #' # Select specific class -#' x <- shapviz(fit, X_pred = X_pred, which_class = 3L) +#' x <- shapviz(fit, X_pred = X_pred, which_class = 3) #' x #' #' # Or combine all classes to "mshapviz" object @@ -145,9 +143,9 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' x #' #' # What if we would have one-hot-encoded values and want to explain the original column? -#' X_pred <- stats::model.matrix(~ . -1, iris[, -1L]) -#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1L])) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50L) +#' X_pred <- stats::model.matrix(~ . -1, iris[, -1]) +#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1])) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz( #' fit, #' X_pred = X_pred, @@ -159,29 +157,30 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' # Similarly with LightGBM #' if (requireNamespace("lightgbm", quietly = TRUE)) { #' fit <- lightgbm::lgb.train( -#' params = list(objective = "regression", num_thread = 1L), -#' data = lightgbm::lgb.Dataset(X_pred, label = iris[, 1L]), -#' nrounds = 50L, -#' verbose = -2L +#' params = list(objective = "regression", num_thread = 1), +#' data = lightgbm::lgb.Dataset(X_pred, label = iris[, 1]), +#' nrounds = 10, +#' verbose = -2 #' ) #' #' x <- shapviz(fit, X_pred = X_pred) #' x #' #' # Multiclass -#' params <- list(objective = "multiclass", num_class = 3L, num_thread = 1L) -#' X_pred <- data.matrix(iris[, -5L]) -#' dtrain <- lightgbm::lgb.Dataset(X_pred, label = as.integer(iris[, 5L]) - 1L) -#' fit <- lightgbm::lgb.train(params = params, data = dtrain, nrounds = 50L) +#' params <- list(objective = "multiclass", num_class = 3, num_thread = 1) +#' X_pred <- data.matrix(iris[, -5]) +#' dtrain <- lightgbm::lgb.Dataset(X_pred, label = as.integer(iris[, 5]) - 1) +#' fit <- lightgbm::lgb.train(params = params, data = dtrain, nrounds = 10) #' #' # Select specific class -#' x <- shapviz(fit, X_pred = X_pred, which_class = 3L) +#' x <- shapviz(fit, X_pred = X_pred, which_class = 3) #' x #' #' # Or combine all classes to a "mshapviz" object #' mx <- shapviz(fit, X_pred = X_pred) #' mx -#' all.equal(mx[[3L]], x) +#' all.equal(mx[[3]], x) +#' } #' } shapviz.xgb.Booster = function(object, X_pred, X = X_pred, which_class = NULL, collapse = NULL, interactions = FALSE, ...) { diff --git a/R/sv_dependence.R b/R/sv_dependence.R index fb15f82..20e7c2d 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -34,8 +34,9 @@ #' @param ... Arguments passed to [ggplot2::geom_jitter()]. #' @returns An object of class "ggplot" (or "patchwork") representing a dependence plot. #' @examples +#' \dontrun{ #' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris) #' sv_dependence(x, "Petal.Length") #' sv_dependence(x, "Petal.Length", color_var = "Species") @@ -43,17 +44,13 @@ #' sv_dependence(x, c("Species", "Petal.Length")) #' sv_dependence(x, "Petal.Width", color_var = c("Species", "Petal.Length")) #' -#' # SHAP interaction values +#' # SHAP interaction values/main effects #' x2 <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) #' sv_dependence(x2, "Petal.Length", interactions = TRUE) -#' sv_dependence(x2, c("Petal.Length", "Species"), color_var = NULL, interactions = TRUE) -#' -#' # Show main effect of "Petal.Length" for setosa and virginica separately -#' mx <- c( -#' setosa = x2[x2$X$Species == "setosa"], -#' virginica = x2[x2$X$Species == "virginica"] +#' sv_dependence( +#' x2, c("Petal.Length", "Species"), color_var = NULL, interactions = TRUE #' ) -#' sv_dependence(mx, "Petal.Length", color_var = NULL, interactions = TRUE) +#' } #' @export #' @seealso [potential_interactions()] sv_dependence <- function(object, ...) { diff --git a/R/sv_dependence2D.R b/R/sv_dependence2D.R index d7d6167..6f1e6f6 100644 --- a/R/sv_dependence2D.R +++ b/R/sv_dependence2D.R @@ -28,15 +28,12 @@ #' @param ... Arguments passed to [ggplot2::geom_jitter()]. #' @returns An object of class "ggplot" (or "patchwork") representing a dependence plot. #' @examples +#' \dontrun{ #' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' sv <- shapviz(fit, X_pred = dtrain, X = iris) #' sv_dependence2D(sv, x = "Petal.Length", y = "Species") -#' sv_dependence2D(sv, x = "Petal.Length", y = "Sepal.Width") #' sv_dependence2D(sv, x = c("Petal.Length", "Species"), y = "Sepal.Width") -#' sv_dependence2D( -#' sv, x = c("Petal.Length", "Species"), y = c("Petal.Width", "Sepal.Width") -#' ) #' #' # SHAP interaction values #' sv2 <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) @@ -48,6 +45,7 @@ #' # mshapviz object #' mx <- split(sv, f = iris$Species) #' sv_dependence2D(mx, x = "Petal.Length", y = "Sepal.Width") +#' } #' @export #' @seealso [sv_dependence()] sv_dependence2D <- function(object, ...) { diff --git a/R/sv_force.R b/R/sv_force.R index 6d99b11..1d8a463 100644 --- a/R/sv_force.R +++ b/R/sv_force.R @@ -11,17 +11,16 @@ #' (via [ggrepel::geom_text_repel()]). #' @returns An object of class "ggplot" (or "patchwork") representing a force plot. #' @examples +#' \dontrun{ #' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) #' sv_force(x) #' sv_force(x, row_id = 65, max_display = 3, size = 9, fill_colors = 4:5) #' #' # Aggregate over all observations with Petal.Length == 1.4 #' sv_force(x, row_id = x$X$Petal.Length == 1.4) -#' -#' # Combine two force plots via {patchwork} -#' sv_force(c(Obs1 = x[1], Obs2 = x[2])) +#' } #' @export #' @seealso [sv_waterfall()] sv_force <- function(object, ...) { diff --git a/R/sv_importance.R b/R/sv_importance.R index 1e9f2fe..fd53c65 100644 --- a/R/sv_importance.R +++ b/R/sv_importance.R @@ -42,19 +42,16 @@ #' `kind = "no"` - a named numeric vector of sorted SHAP feature importances #' (or a list of such vectors in case of an object of class "mshapviz"). #' @examples +#' \dontrun{ #' X_train <- data.matrix(iris[, -1]) #' dtrain <- xgboost::xgb.DMatrix(X_train, label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz(fit, X_pred = X_train) #' sv_importance(x) -#' sv_importance(x, kind = "beeswarm", show_numbers = TRUE) #' sv_importance(x, kind = "no") +#' sv_importance(x, kind = "beeswarm", show_numbers = TRUE) +#' } #' -#' X <- data.frame(matrix(rnorm(1000), ncol = 20)) -#' S <- as.matrix(X) -#' x2 <- shapviz(S, X) -#' sv_importance(x2) -#' sv_importance(x2, max_display = 5) #' @seealso \code{\link{sv_interaction}} #' @export sv_importance <- function(object, ...) { diff --git a/R/sv_interaction.R b/R/sv_interaction.R index 8f2f489..36c30d4 100644 --- a/R/sv_interaction.R +++ b/R/sv_interaction.R @@ -18,12 +18,13 @@ #' numeric matrix of average absolute SHAP interactions sorted by the average #' absolute SHAP values (or a list of such matrices in case of "mshapviz" object). #' @examples +#' \dontrun{ #' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) -#' sv_interaction(x) -#' sv_interaction(x, max_display = 2, size = 3, alpha = 0.1) #' sv_interaction(x, kind = "no") +#' sv_interaction(x, max_display = 2, size = 3) +#' } #' @seealso [sv_importance()] #' @export sv_interaction <- function(object, ...) { diff --git a/R/sv_waterfall.R b/R/sv_waterfall.R index 0222053..21e8783 100644 --- a/R/sv_waterfall.R +++ b/R/sv_waterfall.R @@ -35,8 +35,9 @@ #' will altogether suppress adding text to the bars. #' @returns An object of class "ggplot" (or "patchwork") representing a waterfall plot. #' @examples +#' \dontrun{ #' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +#' fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) #' sv_waterfall(x) #' sv_waterfall(x, row_id = 123, max_display = 2, size = 9, fill_colors = 4:5) @@ -48,16 +49,7 @@ #' #' # Aggregate over all observations with Petal.Length == 1.4 #' sv_waterfall(x, row_id = x$X$Petal.Length == 1.4) -#' -#' # More features -#' X <- as.data.frame(matrix(1:100, nrow = 10)) -#' S <- as.matrix(X) -#' shp <- shapviz(S, X) -#' sv_waterfall(shp) -#' -#' # Combine two waterfall plots via {patchwork} -#' sv_waterfall(c(Obs1 = x[1], Obs2 = x[2])) + -#' patchwork::plot_layout(ncol = 1) +#' } #' @export #' @seealso [sv_force()] sv_waterfall <- function(object, ...) { diff --git a/cran-comments.md b/cran-comments.md index 90c84c6..c1cd7fe 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,18 @@ +# Re-resubmission + +Examples that take 0.09 seconds on my Windows laptop take 6 seconds on Linux. I don't know how to fix this, so I am resorting to dontruns around all examples involving XGBoost. + +# Re-resubmission + +Examples still taking too long on Linux. No idea how to fix this except using dontruns... + +# Resubmission + +Examples taking too long on Linux. + +- I have now reduced the number of examples. +- And the number of boosting rounds. + # shapviz 0.9.0 Hello CRAN team @@ -8,29 +23,21 @@ Hello CRAN team ## Checks look good -### check(manual = TRUE, cran = TRUE) - -> checking data for ASCII and uncompressed saves ... OK - WARNING - 'qpdf' is needed for checks on size reduction of PDFs +### check(manual = TRUE, cran = TRUE) > checking HTML version of manual ... NOTE Skipping checking HTML validation: no command 'tidy' found -### check_rhub(): Some usual notes... +### RHub * checking package dependencies ... NOTE Packages which this enhances but not available for checking: 'fastshap', 'h2o', 'lightgbm' * checking HTML version of manual ... NOTE +Skipping checking HTML validation: no command 'tidy' found Skipping checking math rendering: package 'V8' unavailable -* checking for non-standard things in the check directory ... NOTE -Found the following files/directories: - ''NULL'' -* checking for detritus in the temp directory ... NOTE -Found the following files/directories: - 'lastMiKTeXException' - -### check_win_devel() + + +### Winbuilder() Status: OK diff --git a/man/shapviz.Rd b/man/shapviz.Rd index da5c9b6..3ef9642 100644 --- a/man/shapviz.Rd +++ b/man/shapviz.Rd @@ -168,11 +168,11 @@ return a "mshapviz" object, containing a "shapviz" object per output. S <- matrix(c(1, -1, -1, 1), ncol = 2, dimnames = list(NULL, c("x", "y"))) X <- data.frame(x = c("a", "b"), y = c(100, 10)) shapviz(S, X, baseline = 4) - +\dontrun{ # XGBoost models -X_pred <- data.matrix(iris[, -1L]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50L, nthread = 1L) +X_pred <- data.matrix(iris[, -1]) +dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1]) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) # Will use numeric matrix "X_pred" as feature matrix x <- shapviz(fit, X_pred = X_pred) @@ -187,15 +187,13 @@ sv_dependence(x, "Species") x <- shapviz(fit, X_pred = dtrain, X = iris) # Multiclass setting -params <- list(objective = "multi:softprob", num_class = 3L) -X_pred <- data.matrix(iris[, -5L]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5L]) - 1L) -fit <- xgboost::xgb.train( - params = params, data = dtrain, nrounds = 50L, nthread = 1L -) +params <- list(objective = "multi:softprob", num_class = 3, nthread = 1) +X_pred <- data.matrix(iris[, -5]) +dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1) +fit <- xgboost::xgb.train(params = params, data = dtrain, nrounds = 10) # Select specific class -x <- shapviz(fit, X_pred = X_pred, which_class = 3L) +x <- shapviz(fit, X_pred = X_pred, which_class = 3) x # Or combine all classes to "mshapviz" object @@ -203,9 +201,9 @@ x <- shapviz(fit, X_pred = X_pred) x # What if we would have one-hot-encoded values and want to explain the original column? -X_pred <- stats::model.matrix(~ . -1, iris[, -1L]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1L])) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50L) +X_pred <- stats::model.matrix(~ . -1, iris[, -1]) +dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1])) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz( fit, X_pred = X_pred, @@ -217,32 +215,33 @@ summary(x) # Similarly with LightGBM if (requireNamespace("lightgbm", quietly = TRUE)) { fit <- lightgbm::lgb.train( - params = list(objective = "regression", num_thread = 1L), - data = lightgbm::lgb.Dataset(X_pred, label = iris[, 1L]), - nrounds = 50L, - verbose = -2L + params = list(objective = "regression", num_thread = 1), + data = lightgbm::lgb.Dataset(X_pred, label = iris[, 1]), + nrounds = 10, + verbose = -2 ) x <- shapviz(fit, X_pred = X_pred) x # Multiclass - params <- list(objective = "multiclass", num_class = 3L, num_thread = 1L) - X_pred <- data.matrix(iris[, -5L]) - dtrain <- lightgbm::lgb.Dataset(X_pred, label = as.integer(iris[, 5L]) - 1L) - fit <- lightgbm::lgb.train(params = params, data = dtrain, nrounds = 50L) + params <- list(objective = "multiclass", num_class = 3, num_thread = 1) + X_pred <- data.matrix(iris[, -5]) + dtrain <- lightgbm::lgb.Dataset(X_pred, label = as.integer(iris[, 5]) - 1) + fit <- lightgbm::lgb.train(params = params, data = dtrain, nrounds = 10) # Select specific class - x <- shapviz(fit, X_pred = X_pred, which_class = 3L) + x <- shapviz(fit, X_pred = X_pred, which_class = 3) x # Or combine all classes to a "mshapviz" object mx <- shapviz(fit, X_pred = X_pred) mx - all.equal(mx[[3L]], x) + all.equal(mx[[3]], x) +} } } \seealso{ -\code{\link[=sv_importance]{sv_importance()}}, \code{\link[=sv_dependence]{sv_dependence()}}, \code{\link[=sv_interaction]{sv_interaction()}}, +\code{\link[=sv_importance]{sv_importance()}}, \code{\link[=sv_dependence]{sv_dependence()}}, \code{\link[=sv_dependence2D]{sv_dependence2D()}}, \code{\link[=sv_interaction]{sv_interaction()}}, \code{\link[=sv_waterfall]{sv_waterfall()}}, \code{\link[=sv_force]{sv_force()}}, \code{\link[=collapse_shap]{collapse_shap()}} } diff --git a/man/split.shapviz.Rd b/man/split.shapviz.Rd index eb0e527..35227f2 100644 --- a/man/split.shapviz.Rd +++ b/man/split.shapviz.Rd @@ -20,12 +20,14 @@ A "mshapviz" object. Splits "shapviz" object along a vector \code{f} into an object of class "mshapviz". } \examples{ +\dontrun{ dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) sv <- shapviz(fit, X_pred = dtrain, X = iris) mx <- split(sv, f = iris$Species) sv_dependence(mx, "Petal.Length") } +} \seealso{ \code{\link[=shapviz]{shapviz()}}, \code{\link[=rbind.shapviz]{rbind.shapviz()}} } diff --git a/man/sv_dependence.Rd b/man/sv_dependence.Rd index 283c1b1..b486032 100644 --- a/man/sv_dependence.Rd +++ b/man/sv_dependence.Rd @@ -86,8 +86,9 @@ to focus on pure interaction effects (multiplied by two) or on pure main effects }} \examples{ +\dontrun{ dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris) sv_dependence(x, "Petal.Length") sv_dependence(x, "Petal.Length", color_var = "Species") @@ -95,17 +96,13 @@ sv_dependence(x, "Petal.Length", color_var = NULL) sv_dependence(x, c("Species", "Petal.Length")) sv_dependence(x, "Petal.Width", color_var = c("Species", "Petal.Length")) -# SHAP interaction values +# SHAP interaction values/main effects x2 <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) sv_dependence(x2, "Petal.Length", interactions = TRUE) -sv_dependence(x2, c("Petal.Length", "Species"), color_var = NULL, interactions = TRUE) - -# Show main effect of "Petal.Length" for setosa and virginica separately -mx <- c( - setosa = x2[x2$X$Species == "setosa"], - virginica = x2[x2$X$Species == "virginica"] +sv_dependence( + x2, c("Petal.Length", "Species"), color_var = NULL, interactions = TRUE ) -sv_dependence(mx, "Petal.Length", color_var = NULL, interactions = TRUE) +} } \seealso{ \code{\link[=potential_interactions]{potential_interactions()}} diff --git a/man/sv_dependence2D.Rd b/man/sv_dependence2D.Rd index e8170ce..7a685ee 100644 --- a/man/sv_dependence2D.Rd +++ b/man/sv_dependence2D.Rd @@ -93,15 +93,12 @@ to focus on pure interaction effects (multiplied by two). }} \examples{ +\dontrun{ dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) sv <- shapviz(fit, X_pred = dtrain, X = iris) sv_dependence2D(sv, x = "Petal.Length", y = "Species") -sv_dependence2D(sv, x = "Petal.Length", y = "Sepal.Width") sv_dependence2D(sv, x = c("Petal.Length", "Species"), y = "Sepal.Width") -sv_dependence2D( - sv, x = c("Petal.Length", "Species"), y = c("Petal.Width", "Sepal.Width") -) # SHAP interaction values sv2 <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) @@ -114,6 +111,7 @@ sv_dependence2D( mx <- split(sv, f = iris$Species) sv_dependence2D(mx, x = "Petal.Length", y = "Sepal.Width") } +} \seealso{ \code{\link[=sv_dependence]{sv_dependence()}} } diff --git a/man/sv_force.Rd b/man/sv_force.Rd index bc1079c..3621732 100644 --- a/man/sv_force.Rd +++ b/man/sv_force.Rd @@ -96,17 +96,16 @@ baseline SHAP value. }} \examples{ +\dontrun{ dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) sv_force(x) sv_force(x, row_id = 65, max_display = 3, size = 9, fill_colors = 4:5) # Aggregate over all observations with Petal.Length == 1.4 sv_force(x, row_id = x$X$Petal.Length == 1.4) - -# Combine two force plots via {patchwork} -sv_force(c(Obs1 = x[1], Obs2 = x[2])) +} } \seealso{ \code{\link[=sv_waterfall]{sv_waterfall()}} diff --git a/man/sv_importance.Rd b/man/sv_importance.Rd index cbb026d..596ec13 100644 --- a/man/sv_importance.Rd +++ b/man/sv_importance.Rd @@ -111,19 +111,16 @@ are sorted in decreasing order of importance. }} \examples{ +\dontrun{ X_train <- data.matrix(iris[, -1]) dtrain <- xgboost::xgb.DMatrix(X_train, label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz(fit, X_pred = X_train) sv_importance(x) -sv_importance(x, kind = "beeswarm", show_numbers = TRUE) sv_importance(x, kind = "no") +sv_importance(x, kind = "beeswarm", show_numbers = TRUE) +} -X <- data.frame(matrix(rnorm(1000), ncol = 20)) -S <- as.matrix(X) -x2 <- shapviz(S, X) -sv_importance(x2) -sv_importance(x2, max_display = 5) } \seealso{ \code{\link{sv_interaction}} diff --git a/man/sv_interaction.Rd b/man/sv_interaction.Rd index 37660aa..a8390fa 100644 --- a/man/sv_interaction.Rd +++ b/man/sv_interaction.Rd @@ -87,12 +87,13 @@ The features are sorted in decreasing order of usual SHAP importance. }} \examples{ +\dontrun{ dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) -sv_interaction(x) -sv_interaction(x, max_display = 2, size = 3, alpha = 0.1) sv_interaction(x, kind = "no") +sv_interaction(x, max_display = 2, size = 3) +} } \seealso{ \code{\link[=sv_importance]{sv_importance()}} diff --git a/man/sv_waterfall.Rd b/man/sv_waterfall.Rd index 48b87f1..35ac9ec 100644 --- a/man/sv_waterfall.Rd +++ b/man/sv_waterfall.Rd @@ -102,8 +102,9 @@ baseline SHAP value. }} \examples{ +\dontrun{ dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) -fit <- xgboost::xgb.train(data = dtrain, nrounds = 50, nthread = 1) +fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) sv_waterfall(x) sv_waterfall(x, row_id = 123, max_display = 2, size = 9, fill_colors = 4:5) @@ -115,16 +116,7 @@ sv_waterfall( # Aggregate over all observations with Petal.Length == 1.4 sv_waterfall(x, row_id = x$X$Petal.Length == 1.4) - -# More features -X <- as.data.frame(matrix(1:100, nrow = 10)) -S <- as.matrix(X) -shp <- shapviz(S, X) -sv_waterfall(shp) - -# Combine two waterfall plots via {patchwork} -sv_waterfall(c(Obs1 = x[1], Obs2 = x[2])) + - patchwork::plot_layout(ncol = 1) +} } \seealso{ \code{\link[=sv_force]{sv_force()}} diff --git a/packaging.R b/packaging.R index 1a5681f..4f3ddec 100644 --- a/packaging.R +++ b/packaging.R @@ -101,7 +101,7 @@ library(devtools) document() test() -check(manual = TRUE, cran = TRUE, vignettes = TRUE) +check(manual = TRUE, cran = TRUE, vignettes = FALSE) build() # build(binary = TRUE) install(upgrade = FALSE) @@ -109,7 +109,7 @@ install(upgrade = FALSE) # Run only if package is public(!) and should go to CRAN if (FALSE) { check_win_devel() - check_rhub() + check_rhub(platforms = "debian-gcc-devel") # Wait until above checks are passed without relevant notes/warnings # then submit to CRAN diff --git a/vignettes/geographic.Rmd b/vignettes/geographic.Rmd index 9c91c4d..6814b7c 100644 --- a/vignettes/geographic.Rmd +++ b/vignettes/geographic.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ## Setting -In a model with geographic components, we want to express a functional $T$ (usually the expectation or a quantile) of a response $Y$ as a function $f$ of a set of geographic features (latitude/longitude and/or postal code and/or other features varying with location): +In a model with geographic components, we want to express a functional $T$ (usually the expectation or a quantile) of a response $Y$ as a function $f$ of a set of geographic features (latitude/longitude and/or postal code and/or other features varying with location), and other features: $$ T(Y \mid X^\textrm{geo}, X^\textrm{other}) \approx f(X^\textrm{geo}, X^\textrm{other}) @@ -34,7 +34,7 @@ That's where the additivity of SHAP values comes into play: The sum of SHAP valu ## A first example -For illustration, we will use a beautiful house price dataset containing information on about 14'000 houses sold in 2016 in Miami-Date County. Some of the columns are as follows: +For illustration, we will use a beautiful house price dataset containing information on about 14'000 houses sold in 2016 in Miami-Dade County. Some of the columns are as follows: - **SALE_PRC**: Sale price in USD: Its logarithm will be our model response. - *LATITUDE*, *LONGITUDE*: Coordinates