Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for deploying recipes #179

Merged
merged 7 commits into from
Mar 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ S3method(handler_predict,kproto)
S3method(handler_predict,lm)
S3method(handler_predict,model_stack)
S3method(handler_predict,ranger)
S3method(handler_predict,recipe)
S3method(handler_predict,train)
S3method(handler_predict,workflow)
S3method(handler_predict,xgb.Booster)
Expand All @@ -25,6 +26,7 @@ S3method(handler_startup,gam)
S3method(handler_startup,keras.engine.training.Model)
S3method(handler_startup,model_stack)
S3method(handler_startup,ranger)
S3method(handler_startup,recipe)
S3method(handler_startup,train)
S3method(handler_startup,workflow)
S3method(handler_startup,xgb.Booster)
Expand All @@ -44,6 +46,7 @@ S3method(vetiver_create_description,kproto)
S3method(vetiver_create_description,lm)
S3method(vetiver_create_description,model_stack)
S3method(vetiver_create_description,ranger)
S3method(vetiver_create_description,recipe)
S3method(vetiver_create_description,train)
S3method(vetiver_create_description,workflow)
S3method(vetiver_create_description,xgb.Booster)
Expand All @@ -54,6 +57,7 @@ S3method(vetiver_create_meta,keras.engine.training.Model)
S3method(vetiver_create_meta,kproto)
S3method(vetiver_create_meta,model_stack)
S3method(vetiver_create_meta,ranger)
S3method(vetiver_create_meta,recipe)
S3method(vetiver_create_meta,train)
S3method(vetiver_create_meta,workflow)
S3method(vetiver_create_meta,xgb.Booster)
Expand All @@ -66,6 +70,7 @@ S3method(vetiver_prepare_model,kproto)
S3method(vetiver_prepare_model,lm)
S3method(vetiver_prepare_model,model_stack)
S3method(vetiver_prepare_model,ranger)
S3method(vetiver_prepare_model,recipe)
S3method(vetiver_prepare_model,train)
S3method(vetiver_prepare_model,workflow)
S3method(vetiver_prepare_model,xgb.Booster)
Expand All @@ -78,6 +83,7 @@ S3method(vetiver_ptype,kproto)
S3method(vetiver_ptype,lm)
S3method(vetiver_ptype,model_stack)
S3method(vetiver_ptype,ranger)
S3method(vetiver_ptype,recipe)
S3method(vetiver_ptype,train)
S3method(vetiver_ptype,workflow)
S3method(vetiver_ptype,xgb.Booster)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# vetiver (development version)

* Added support for keras (#164).
* Added support for keras (#164) and recipes (#179).

* Moved where `required_pkgs` metadata is stored remotely, from the binary blob to plain text YAML (#176).

Expand Down
52 changes: 52 additions & 0 deletions R/recipe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' @rdname vetiver_create_description
#' @export
vetiver_create_description.recipe <- function(model) {
num_steps <- length(model$steps)
cli::pluralize("A feature engineering recipe with {num_steps} step{?s}")
}

#' @rdname vetiver_create_meta
#' @export
vetiver_create_meta.recipe <- function(model, metadata) {
reqs <- required_pkgs(model)
reqs <- sort(unique(c(reqs, "recipes")))
vetiver_meta(metadata, required_pkgs = reqs)
}

#' @rdname vetiver_create_ptype
#' @export
vetiver_ptype.recipe <- function(model, ...) {
rlang::check_dots_used()
dots <- list(...)
check_ptype_data(dots)
ptype <- vctrs::vec_ptype(dots$prototype_data)
tibble::as_tibble(ptype)
}

#' @rdname vetiver_create_description
#' @export
vetiver_prepare_model.recipe <- function(model) {
if (!recipes::fully_trained(model)) {
rlang::abort("Your `model` object is not a trained recipe.")
}
ret <- butcher::butcher(model)
ret <- bundle::bundle(ret)
ret
}

#' @rdname handler_startup
#' @export
handler_startup.recipe <- function(vetiver_model) {
attach_pkgs(vetiver_model$metadata$required_pkgs)
}

#' @rdname handler_startup
#' @export
handler_predict.recipe <- function(vetiver_model, ...) {

function(req) {
new_data <- req$body
new_data <- vetiver_type_convert(new_data, vetiver_model$prototype)
recipes::bake(vetiver_model$model, new_data = new_data, ...)
}
}
8 changes: 7 additions & 1 deletion man/handler_startup.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions man/vetiver_create_description.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/vetiver_create_meta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/vetiver_create_ptype.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions tests/testthat/_snaps/recipe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# can print recipe

Code
v
Output

-- car-splines - <bundled_recipe> model for deployment
A feature engineering recipe with 1 step using 2 features

# create plumber.R for recipe

Code
cat(readr::read_lines(tmp), sep = "\n")
Output
# Generated by the vetiver package; edit with care

library(pins)
library(plumber)
library(rapidoc)
library(vetiver)

# Packages needed to generate model predictions
if (FALSE) {
library(recipes)
}
b <- board_folder(path = "<redacted>")
v <- vetiver_pin_read(b, "car-splines")

#* @plumber
function(pr) {
pr %>% vetiver_api(v)
}

70 changes: 70 additions & 0 deletions tests/testthat/test-recipe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
skip_if_not_installed("recipes")
skip_if_not_installed("plumber")

library(plumber)
library(recipes)

trained_rec <-
recipe(mpg ~ disp + wt, mtcars) %>%
step_ns(wt) %>%
prep(retain = FALSE)

v <- vetiver_model(trained_rec, "car-splines", prototype_data = mtcars[c("disp", "wt")])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Notice that we are requiring the user to pass in some prototype_data (check out the vetiver_ptype.recipe method). This is what we have to do for ranger because the info on the training data isn't in there anywhere. If I was understanding Max correctly, this is what he was recommending.

I want to note, though, that the original column names and types are stored in a list, at trained_rec$var_info. Would there be a way to reconstruct the needed info (i.e. a ptype)?

Copy link
Collaborator

@EmilHvitfeldt EmilHvitfeldt Feb 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As it stands right now, there isn't a foolproof way of going from trained_rec$var_info to ptypes, since there is no guarantee that a 1-1 mapping can be found. This is much clearly seen since the type will be listed as other for any classes we don't currently specify.

I do however wish that this information was in recipes, as it is useful, even if we don't force the input checking. I will note and see if we can add such information in a future version.

Which is another thing. The variable checking in recipes is done on a optional per-step basis, and can at times be quite loose. many steps doesn't care if input is double or integer. step_dummy() as a gross outlier doesn't do any type checking


test_that("can print recipe", {
expect_snapshot(v)
})

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would normally have a test here like this:

test_that("can predict recipe", {
    preds <- predict(v, mtcars)
    expect_equal(<<blah blah blah>>)
})

But I don't think that's possible for recipes. The predict method for a vetiver model does bundle::unbundle() and then calls predict on what is inside. I guess we could add a bake method for a vetiver model if needed? This is separate from the API where we can say exactly what to do at the endpoint.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For more clarity, this is also separate from calling predict() on a remote vetiver endpoint, which would also work. What we don't have a way to do right now is read the recipe back into memory from remote storage (a pin) and then call bake() on it, without the user manually getting out the recipe object themselves and unbundling it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@isabelizimm do you mind summing up here what the situation is for unsupervised models from scikit-learn as deployed by vetiver? These models typically have a predict method so this is not a problem in Python, right?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at just the clustering algorithms from scikit-learn, most of them have a predict method. You can use these in a Pipeline (similar to workflow), same as other models. Vetiver Python doesn't look for supervised/unsupervised models, only if it is coming from scikit-learn, so it will return the outputs of the predict method as expected.

If one of the unsupervised learning models that do NOT have a predict method are used as the last element in a Pipeline, there will be an error along the lines of model has no predict method.

FWIW: (clustering algorithms with predict: k-means, bisecting k-means, affinity propagation, mean shift, BIRCH, Gaussian mixture. do NOT have predict: spectral clustering, agglomerative clustering, DBSCAN, OPTIC)

test_that("can pin a recipe", {
b <- board_temp()
vetiver_pin_write(b, v)
pinned <- pin_read(b, "car-splines")
expect_equal(
pinned,
list(
model = bundle::bundle(butcher::butcher(trained_rec)),
prototype = vctrs::vec_slice(tibble::as_tibble(mtcars[c("disp", "wt")]), 0)
)
)
expect_equal(
pin_meta(b, "car-splines")$user$required_pkgs,
c("recipes")
)
})

test_that("default endpoint for recipe", {
p <- pr() %>% vetiver_api(v)
p_routes <- p$routes[-1]
expect_equal(names(p_routes), c("ping", "predict"))
expect_equal(map_chr(p_routes, "verbs"),
c(ping = "GET", predict = "POST"))
})

test_that("default OpenAPI spec", {
v$metadata <- list(url = "potatoes")
p <- pr() %>% vetiver_api(v)
car_spec <- p$getApiSpec()
expect_equal(car_spec$info$description,
"A feature engineering recipe with 1 step")
post_spec <- car_spec$paths$`/predict`$post
expect_equal(names(post_spec), c("summary", "requestBody", "responses"))
expect_equal(as.character(post_spec$summary),
"Return predictions from model using 2 features")
get_spec <- car_spec$paths$`/pin-url`$get
expect_equal(as.character(get_spec$summary),
"Get URL of pinned vetiver model")

})

test_that("create plumber.R for recipe", {
skip_on_cran()
b <- board_folder(path = tmp_dir)
vetiver_pin_write(b, v)
tmp <- tempfile()
vetiver_write_plumber(b, "car-splines", file = tmp)
expect_snapshot(
cat(readr::read_lines(tmp), sep = "\n"),
transform = redact_vetiver
)
})