diff --git a/NEWS.md b/NEWS.md index 7387ebb..8996565 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,15 +2,16 @@ ## User-visible changes -- `mshapviz()` objects can now be rowbinded via `rbind()` or `+`. Implemented by @jmaspons in [#110](https://github.com/ModelOriented/shapviz/pull/110). +- `mshapviz()` objects can now be rowbinded via `rbind()` or `+`. Implemented by [@jmaspons](https://github.com/jmaspons) in [#110](https://github.com/ModelOriented/shapviz/pull/110). - `mshapviz()` is more strict when combining multiple "shapviz" objects. These now need to have identical column names, see [#114](https://github.com/ModelOriented/shapviz/pull/114). ## Small changes - `print.shapviz()` now shows top two rows of SHAP matrix. - Re-activate all unit tests. +- Setting `nthread = 1` in all calls to `xgb.DMatrix()` as suggested by [@jmaspons](https://github.com/jmaspons) in [issue #109](https://github.com/ModelOriented/shapviz/issues/109). - Added "How to contribute" to README. -- `permshap()` connector is now part of {kerneshap}. +- `permshap()` connector is now part of {kerneshap} [#122](https://github.com/ModelOriented/shapviz/pull/122). ## Bug fixes diff --git a/R/shapviz.R b/R/shapviz.R index e6a1779..39a9726 100644 --- a/R/shapviz.R +++ b/R/shapviz.R @@ -114,7 +114,7 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' \dontrun{ #' # XGBoost models #' X_pred <- data.matrix(iris[, -1]) -#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1], nthread = 1) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' #' # Will use numeric matrix "X_pred" as feature matrix @@ -132,8 +132,10 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' # Multiclass setting #' params <- list(objective = "multi:softprob", num_class = 3) #' X_pred <- data.matrix(iris[, -5]) -#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1) -#' fit <- xgboost::xgb.train(params = params, data = dtrain, nrounds = 10) +#' dtrain <- xgboost::xgb.DMatrix( +#' X_pred, label = as.integer(iris[, 5]) - 1, nthread = 1 +#' ) +#' fit <- xgboost::xgb.train(params = params, data = dtrain, nrounds = 10, nthread = 1) #' #' # Select specific class #' x <- shapviz(fit, X_pred = X_pred, which_class = 3) @@ -145,7 +147,7 @@ shapviz.matrix = function(object, X, baseline = 0, collapse = NULL, #' #' # What if we would have one-hot-encoded values and want to explain the original column? #' X_pred <- stats::model.matrix(~ . -1, iris[, -1]) -#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1])) +#' dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1]), nthread = 1) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz( #' fit, diff --git a/R/sv_dependence.R b/R/sv_dependence.R index 20e7c2d..8205b50 100644 --- a/R/sv_dependence.R +++ b/R/sv_dependence.R @@ -35,7 +35,9 @@ #' @returns An object of class "ggplot" (or "patchwork") representing a dependence plot. #' @examples #' \dontrun{ -#' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix( +#' data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +#' ) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris) #' sv_dependence(x, "Petal.Length") diff --git a/R/sv_dependence2D.R b/R/sv_dependence2D.R index 6f4455a..2f3e41e 100644 --- a/R/sv_dependence2D.R +++ b/R/sv_dependence2D.R @@ -30,7 +30,9 @@ #' @returns An object of class "ggplot" (or "patchwork") representing a dependence plot. #' @examples #' \dontrun{ -#' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix( +#' data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +#' ) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' sv <- shapviz(fit, X_pred = dtrain, X = iris) #' sv_dependence2D(sv, x = "Petal.Length", y = "Species") diff --git a/R/sv_force.R b/R/sv_force.R index 1d8a463..3e3a305 100644 --- a/R/sv_force.R +++ b/R/sv_force.R @@ -12,7 +12,9 @@ #' @returns An object of class "ggplot" (or "patchwork") representing a force plot. #' @examples #' \dontrun{ -#' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix( +#' data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +#' ) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) #' sv_force(x) diff --git a/R/sv_importance.R b/R/sv_importance.R index 8537698..921a683 100644 --- a/R/sv_importance.R +++ b/R/sv_importance.R @@ -48,7 +48,7 @@ #' @examples #' \dontrun{ #' X_train <- data.matrix(iris[, -1]) -#' dtrain <- xgboost::xgb.DMatrix(X_train, label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix(X_train, label = iris[, 1], nthread = 1) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz(fit, X_pred = X_train) #' sv_importance(x) diff --git a/R/sv_interaction.R b/R/sv_interaction.R index 36c30d4..be0707f 100644 --- a/R/sv_interaction.R +++ b/R/sv_interaction.R @@ -19,7 +19,9 @@ #' absolute SHAP values (or a list of such matrices in case of "mshapviz" object). #' @examples #' \dontrun{ -#' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix( +#' data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +#' ) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) #' sv_interaction(x, kind = "no") diff --git a/R/sv_waterfall.R b/R/sv_waterfall.R index 21e8783..c5b217e 100644 --- a/R/sv_waterfall.R +++ b/R/sv_waterfall.R @@ -36,7 +36,9 @@ #' @returns An object of class "ggplot" (or "patchwork") representing a waterfall plot. #' @examples #' \dontrun{ -#' dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +#' dtrain <- xgboost::xgb.DMatrix( +#' data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +#' ) #' fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) #' x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) #' sv_waterfall(x) diff --git a/man/shapviz.Rd b/man/shapviz.Rd index 4c305ec..a408183 100644 --- a/man/shapviz.Rd +++ b/man/shapviz.Rd @@ -177,7 +177,7 @@ shapviz(S, X, baseline = 4) \dontrun{ # XGBoost models X_pred <- data.matrix(iris[, -1]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1], nthread = 1) fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) # Will use numeric matrix "X_pred" as feature matrix @@ -195,8 +195,10 @@ x <- shapviz(fit, X_pred = dtrain, X = iris) # Multiclass setting params <- list(objective = "multi:softprob", num_class = 3) X_pred <- data.matrix(iris[, -5]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1) -fit <- xgboost::xgb.train(params = params, data = dtrain, nrounds = 10) +dtrain <- xgboost::xgb.DMatrix( + X_pred, label = as.integer(iris[, 5]) - 1, nthread = 1 +) +fit <- xgboost::xgb.train(params = params, data = dtrain, nrounds = 10, nthread = 1) # Select specific class x <- shapviz(fit, X_pred = X_pred, which_class = 3) @@ -208,7 +210,7 @@ x # What if we would have one-hot-encoded values and want to explain the original column? X_pred <- stats::model.matrix(~ . -1, iris[, -1]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1])) +dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 1]), nthread = 1) fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz( fit, diff --git a/man/sv_dependence.Rd b/man/sv_dependence.Rd index b486032..ec08909 100644 --- a/man/sv_dependence.Rd +++ b/man/sv_dependence.Rd @@ -87,7 +87,9 @@ to focus on pure interaction effects (multiplied by two) or on pure main effects }} \examples{ \dontrun{ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +) fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris) sv_dependence(x, "Petal.Length") diff --git a/man/sv_dependence2D.Rd b/man/sv_dependence2D.Rd index fb4d762..5248e4b 100644 --- a/man/sv_dependence2D.Rd +++ b/man/sv_dependence2D.Rd @@ -95,7 +95,9 @@ has no effect. }} \examples{ \dontrun{ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +) fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) sv <- shapviz(fit, X_pred = dtrain, X = iris) sv_dependence2D(sv, x = "Petal.Length", y = "Species") diff --git a/man/sv_force.Rd b/man/sv_force.Rd index 3621732..2eeddc9 100644 --- a/man/sv_force.Rd +++ b/man/sv_force.Rd @@ -97,7 +97,9 @@ baseline SHAP value. }} \examples{ \dontrun{ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +) fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) sv_force(x) diff --git a/man/sv_importance.Rd b/man/sv_importance.Rd index 2b62500..59f6f8d 100644 --- a/man/sv_importance.Rd +++ b/man/sv_importance.Rd @@ -119,7 +119,7 @@ are sorted in decreasing order of importance. \examples{ \dontrun{ X_train <- data.matrix(iris[, -1]) -dtrain <- xgboost::xgb.DMatrix(X_train, label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix(X_train, label = iris[, 1], nthread = 1) fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz(fit, X_pred = X_train) sv_importance(x) diff --git a/man/sv_interaction.Rd b/man/sv_interaction.Rd index a8390fa..c24cba0 100644 --- a/man/sv_interaction.Rd +++ b/man/sv_interaction.Rd @@ -88,7 +88,9 @@ The features are sorted in decreasing order of usual SHAP importance. }} \examples{ \dontrun{ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +) fit <- xgboost::xgb.train(data = dtrain, nrounds = 10, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris, interactions = TRUE) sv_interaction(x, kind = "no") diff --git a/man/sv_waterfall.Rd b/man/sv_waterfall.Rd index 35ac9ec..8c3f689 100644 --- a/man/sv_waterfall.Rd +++ b/man/sv_waterfall.Rd @@ -103,7 +103,9 @@ baseline SHAP value. }} \examples{ \dontrun{ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1]), label = iris[, 1]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1]), label = iris[, 1], nthread = 1 +) fit <- xgboost::xgb.train(data = dtrain, nrounds = 20, nthread = 1) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1]) sv_waterfall(x) diff --git a/tests/testthat/test-collapse_shap.R b/tests/testthat/test-collapse_shap.R index 0d9e257..9fe70ec 100644 --- a/tests/testthat/test-collapse_shap.R +++ b/tests/testthat/test-collapse_shap.R @@ -87,7 +87,7 @@ test_that("collapse_shap works for SHAP interactions and two collapses (result i # # Real data example form <- Sepal.Length ~ Sepal.Width + Species - 1 iris_dummy <- model.matrix(form, data = iris) -dtrain <- xgboost::xgb.DMatrix(iris_dummy, label = iris[, 1L]) +dtrain <- xgboost::xgb.DMatrix(iris_dummy, label = iris[, 1L], nthread = 1) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) coll <- list(Species = paste0("Species", levels(iris$Species))) diff --git a/tests/testthat/test-interface.R b/tests/testthat/test-interface.R index 39adac2..3435ee0 100644 --- a/tests/testthat/test-interface.R +++ b/tests/testthat/test-interface.R @@ -259,7 +259,7 @@ test_that("combining incompatible shapviz objects fails", { # Multiclass with XGBoost X_pred <- data.matrix(iris[, -5L]) -dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5L]) - 1L) +dtrain <- xgboost::xgb.DMatrix(X_pred, label = as.integer(iris[, 5L]) - 1L, nthread = 1) fit <- xgboost::xgb.train( params = list(nthread = 1L), data = dtrain, diff --git a/tests/testthat/test-plots-mshapviz.R b/tests/testthat/test-plots-mshapviz.R index 8ec607b..f4d8833 100644 --- a/tests/testthat/test-plots-mshapviz.R +++ b/tests/testthat/test-plots-mshapviz.R @@ -1,4 +1,6 @@ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1L]), label = iris[, 1L]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1L]), label = iris[, 1L], nthread = 1 +) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1L]) x <- c(m1 = x, m2 = x) @@ -83,7 +85,7 @@ test_that("Interaction plots provide patchwork object", { # Non-standard name ir <- iris ir["strange name"] <- ir$Sepal.Width * ir$Petal.Length -dtrain <- xgboost::xgb.DMatrix(data.matrix(ir[, -1L]), label = ir[, 1L]) +dtrain <- xgboost::xgb.DMatrix(data.matrix(ir[, -1L]), label = ir[, 1L], nthread = 1) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- shapviz(fit, X_pred = dtrain, X = ir[, -1L]) x <- c(m1 = x, m2 = x) @@ -108,7 +110,7 @@ test_that("plots work for non-syntactic column names", { test_that("sv_importance() and sv_interaction() and kind = 'no' gives matrix", { X_pred <- data.matrix(iris[, -1L]) - dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L]) + dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L], nthread = 1) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- shapviz(fit, X_pred = X_pred, interactions = TRUE) x <- c(m1 = x, m2 = x) @@ -122,7 +124,7 @@ test_that("sv_importance() and sv_interaction() and kind = 'no' gives matrix", { test_that("sv_dependence() does not work with multiple v", { X_pred <- data.matrix(iris[, -1L]) - dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L]) + dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L], nthread = 1) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- c(m1 = shapviz(fit, X_pred = X_pred), m2 = shapviz(fit, X_pred = X_pred)) expect_error(sv_dependence(x, v = c("Species", "Sepal.Width"))) diff --git a/tests/testthat/test-plots-shapviz.R b/tests/testthat/test-plots-shapviz.R index a9e855f..2aea6bf 100644 --- a/tests/testthat/test-plots-shapviz.R +++ b/tests/testthat/test-plots-shapviz.R @@ -1,4 +1,6 @@ -dtrain <- xgboost::xgb.DMatrix(data.matrix(iris[, -1L]), label = iris[, 1L]) +dtrain <- xgboost::xgb.DMatrix( + data.matrix(iris[, -1L]), label = iris[, 1L], nthread = 1 +) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- shapviz(fit, X_pred = dtrain, X = iris[, -1L]) @@ -124,7 +126,7 @@ test_that("Interaction plots provide ggplot object", { # Non-standard name ir <- iris ir["strange name"] <- ir$Sepal.Width * ir$Petal.Length -dtrain <- xgboost::xgb.DMatrix(data.matrix(ir[, -1L]), label = ir[, 1L]) +dtrain <- xgboost::xgb.DMatrix(data.matrix(ir[, -1L]), label = ir[, 1L], nthread = 1) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- shapviz(fit, X_pred = dtrain, X = ir[, -1L]) @@ -160,7 +162,7 @@ test_that("there are no default sv_*() methods", { test_that("sv_importance() and sv_interaction() and kind = 'no' gives numeric output", { X_pred <- data.matrix(iris[, -1L]) - dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L]) + dtrain <- xgboost::xgb.DMatrix(X_pred, label = iris[, 1L], nthread = 1) fit <- xgboost::xgb.train(params = list(nthread = 1L), data = dtrain, nrounds = 1L) x <- shapviz(fit, X_pred = X_pred, interactions = TRUE) diff --git a/vignettes/basic_use.Rmd b/vignettes/basic_use.Rmd index a96278e..9f99858 100644 --- a/vignettes/basic_use.Rmd +++ b/vignettes/basic_use.Rmd @@ -88,7 +88,7 @@ diamonds[, ord] <- lapply(diamonds[, ord], factor, ordered = FALSE) # Fit XGBoost model x <- c("carat", "clarity", "cut", "color") -dtrain <- xgb.DMatrix(data.matrix(diamonds[x]), label = diamonds$price) +dtrain <- xgb.DMatrix(data.matrix(diamonds[x]), label = diamonds$price, nthread = 1) fit <- xgb.train( params = list(learning_rate = 0.1, nthread = 1), data = dtrain, nrounds = 65 ) diff --git a/vignettes/geographic.Rmd b/vignettes/geographic.Rmd index ff2e341..29a4e33 100644 --- a/vignettes/geographic.Rmd +++ b/vignettes/geographic.Rmd @@ -71,8 +71,8 @@ y_train <- log(miami$SALE_PRC[ix]) y_valid <- log(miami$SALE_PRC[-ix]) # Fit XGBoost model with early stopping -dtrain <- xgb.DMatrix(X_train, label = y_train) -dvalid <- xgb.DMatrix(X_valid, label = y_valid) +dtrain <- xgb.DMatrix(X_train, label = y_train, nthread = 1) +dvalid <- xgb.DMatrix(X_valid, label = y_valid, nthread = 1) params <- list( learning_rate = 0.2, objective = "reg:squarederror", max_depth = 5, nthread = 1 @@ -127,8 +127,8 @@ x2 <- c(x, more_geo) X_train2 <- data.matrix(miami[ix, x2]) X_valid2 <- data.matrix(miami[-ix, x2]) -dtrain2 <- xgb.DMatrix(X_train2, label = y_train) -dvalid2 <- xgb.DMatrix(X_valid2, label = y_valid) +dtrain2 <- xgb.DMatrix(X_train2, label = y_train, nthread = 1) +dvalid2 <- xgb.DMatrix(X_valid2, label = y_valid, nthread = 1) # Build interaction constraint vector ic <- c( diff --git a/vignettes/multiple_output.Rmd b/vignettes/multiple_output.Rmd index c212091..b664a9c 100644 --- a/vignettes/multiple_output.Rmd +++ b/vignettes/multiple_output.Rmd @@ -47,7 +47,7 @@ library(xgboost) params <- list(objective = "multi:softprob", num_class = 3, nthread = 1) X_pred <- data.matrix(iris[, -5]) -dtrain <- xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1) +dtrain <- xgb.DMatrix(X_pred, label = as.integer(iris[, 5]) - 1, nthread = 1) fit <- xgb.train(params = params, data = dtrain, nrounds = 50) # Create "mshapviz" object (logit scale) @@ -125,7 +125,7 @@ library(patchwork) library(xgboost) X_pred <- data.matrix(iris[, -1]) -dtrain <- xgb.DMatrix(X_pred, label = iris[, 1]) +dtrain <- xgb.DMatrix(X_pred, label = iris[, 1], nthread = 1) fit_xgb <- xgb.train(params = list(nthread = 1), data = dtrain, nrounds = 50) # Create "mshapviz" object