From 118aeef5ff6034b4a0a163b6a9a4e9747af0dafa Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 11:56:33 -0700 Subject: [PATCH 01/10] add ptype element to recipe object --- R/recipe.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/recipe.R b/R/recipe.R index e61f3b0b7..67ba1a6b4 100644 --- a/R/recipe.R +++ b/R/recipe.R @@ -184,7 +184,8 @@ recipe.data.frame <- template = x, levels = NULL, retained = NA, - requirements = requirements + requirements = requirements, + ptype = vctrs::vec_ptype(x) ) class(out) <- "recipe" out From 37a0e5aed23b1efdc5a992317dce6244518c2945 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 11:56:56 -0700 Subject: [PATCH 02/10] update test to deal with ptype element --- tests/testthat/test-selections.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-selections.R b/tests/testthat/test-selections.R index d131c5b13..baef83d5b 100644 --- a/tests/testthat/test-selections.R +++ b/tests/testthat/test-selections.R @@ -310,6 +310,9 @@ test_that("old recipes from 1.0.1 work with new get_types", { ) expect_false(identical(old_rec_sac, rec_sac)) + # Avoid issue with new ptype field in 1.1.0 + rec_sac$ptype <- NULL + expect_identical( prep(old_rec_sac), prep(rec_sac) From 858fe913e7f349b0606bb0e9485eafca6c698932 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 17:19:17 -0700 Subject: [PATCH 03/10] add recipes_ptype() function --- NAMESPACE | 1 + R/ptype.R | 107 +++++++++++++++++++++++++++++++++++++++++++ _pkgdown.yml | 1 + man/recipes_ptype.Rd | 88 +++++++++++++++++++++++++++++++++++ 4 files changed, 197 insertions(+) create mode 100644 R/ptype.R create mode 100644 man/recipes_ptype.Rd diff --git a/NAMESPACE b/NAMESPACE index 7700f05bd..831413a39 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -602,6 +602,7 @@ export(recipes_extension_check) export(recipes_names_outcomes) export(recipes_names_predictors) export(recipes_pkg_check) +export(recipes_ptype) export(recipes_remove_cols) export(remove_original_cols) export(remove_role) diff --git a/R/ptype.R b/R/ptype.R new file mode 100644 index 000000000..bd11b790e --- /dev/null +++ b/R/ptype.R @@ -0,0 +1,107 @@ +#' Prototype of recipe object +#' +#' This helper function returns the prototype of the input data set expected by +#' the recipe object. +#' +#' @param x A `recipe` object. +#' @param stage A single character. Must be one of `"prep"` or `"bake"`. See +#' details for more. Defaults to `"prep"`. +#' +#' @details +#' The returned ptype is a tibble of the data set that the recipe object is +#' expecting. The specifics of which columns depend on the `stage`. +#' +#' At `prep()` time, when `stage = "prep"`, the ptype is the data passed to +#' `recipe()`. The following code chunk represents a possible recipe scenario. +#' Calling `recipes_ptype(rec_spec, stage = "prep")` and +#' `recipes_ptype(rec_prep, stage = "prep")` both returns a ptype tibble +#' correspodning to `data_ptype`. This information is used internally in +#' `prep()` to verify that `data_training` has the right columns with the right +#' types. +#' +#' ```r +#' rec_spec <- recipe(outcome ~ ., data = data_ptype) %>% +#' step_normalize(all_numeric_predictors()) %>% +#' step_dummy(all_nominal_predictors()) +#' +#' rec_prep <- prep(rec_spec, training = data_training) +#' ``` +#' +#' At `bake()` time, when `stage = "bake"`, the ptype representents the data +#' that are required for `bake()` to run. +#' +#' ```r +#' data_bake <- bake(rec_prep, new_data = data_testing) +#' ``` +#' +#' What this means in practice is that unless otherwise specified, everything +#' but outcomes and case weights are required. These requirements can be changed +#' with `update_role_requirements()` and `recipes_ptype()` respects those +#' changes. +#' +#' @return A zero row tibble. +#' @keywords internal +#' +#' @seealso [developer_functions] +#' +#' @examples +#' training <- tibble( +#' y = 1:10, +#' id = 1:10, +#' x1 = letters[1:10], +#' x2 = factor(letters[1:10]), +#' cw = hardhat::importance_weights(1:10) +#' ) +#' training +#' +#' rec_spec <- recipe(y ~ ., data = training) +#' +#' # outcomes and case_weights are not requred at bake time +#' recipes_ptype(rec_spec, stage = "prep") +#' recipes_ptype(rec_spec, stage = "bake") +#' +#' rec_spec <- recipe(y ~ ., data = training) %>% +#' update_role(x1, new_role = "id") +#' +#' # outcomes and case_weights are not requred at bake time +#' # "id" column is assumed to be needed +#' recipes_ptype(rec_spec, stage = "prep") +#' recipes_ptype(rec_spec, stage = "bake") +#' +#' rec_spec <- recipe(y ~ ., data = training) %>% +#' update_role(x1, new_role = "id") %>% +#' update_role_requirements("id", bake = FALSE) +#' +#' # update_role_requirements() is used to specify that "id" isn't needed +#' recipes_ptype(rec_spec, stage = "prep") +#' recipes_ptype(rec_spec, stage = "bake") +#' +#' @export +recipes_ptype <- function(x, ..., stage = "prep") { + check_dots_empty0(...) + + if (is.null(x$ptype)) { + cli::cli_abort( + c( + x = "Doesn't work on recipes created prior to version 1.1.0.", + i = "Please recreate recipe." + ) + ) + } + + ptype <- x$ptype + + stage <- rlang::arg_match(stage, values = c("prep", "bake")) + + if (stage == "bake") { + required_roles <- compute_bake_role_requirements(x) + + var_info <- x$var_info + + required_var <- var_info$variable[required_roles[var_info$role]] + + ptype <- ptype[names(ptype) %in% required_var] + } + + ptype +} \ No newline at end of file diff --git a/_pkgdown.yml b/_pkgdown.yml index 3f8d9a43b..44742f863 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -165,6 +165,7 @@ reference: - prepper - recipes_eval_select - recipes_extension_check + - recipes_ptype - recipes-role-indicator - update.step - title: Tidy Methods diff --git a/man/recipes_ptype.Rd b/man/recipes_ptype.Rd new file mode 100644 index 000000000..3cb36bfe3 --- /dev/null +++ b/man/recipes_ptype.Rd @@ -0,0 +1,88 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ptype.R +\name{recipes_ptype} +\alias{recipes_ptype} +\title{Prototype of recipe object} +\usage{ +recipes_ptype(x, ..., stage = "prep") +} +\arguments{ +\item{x}{A \code{recipe} object.} + +\item{stage}{A single character. Must be one of \code{"prep"} or \code{"bake"}. See +details for more. Defaults to \code{"prep"}.} +} +\value{ +A zero row tibble. +} +\description{ +This helper function returns the prototype of the input data set expected by +the recipe object. +} +\details{ +The returned ptype is a tibble of the data set that the recipe object is +expecting. The specifics of which columns depend on the \code{stage}. + +At \code{prep()} time, when \code{stage = "prep"}, the ptype is the data passed to +\code{recipe()}. The following code chunk represents a possible recipe scenario. +Calling \code{recipes_ptype(rec_spec, stage = "prep")} and +\code{recipes_ptype(rec_prep, stage = "prep")} both returns a ptype tibble +correspodning to \code{data_ptype}. This information is used internally in +\code{prep()} to verify that \code{data_training} has the right columns with the right +types. + +\if{html}{\out{
}}\preformatted{rec_spec <- recipe(outcome ~ ., data = data_ptype) \%>\% + step_normalize(all_numeric_predictors()) \%>\% + step_dummy(all_nominal_predictors()) + +rec_prep <- prep(rec_spec, training = data_training) +}\if{html}{\out{
}} + +At \code{bake()} time, when \code{stage = "bake"}, the ptype representents the data +that are required for \code{bake()} to run. + +\if{html}{\out{
}}\preformatted{data_bake <- bake(rec_prep, new_data = data_testing) +}\if{html}{\out{
}} + +What this means in practice is that unless otherwise specified, everything +but outcomes and case weights are required. These requirements can be changed +with \code{update_role_requirements()} and \code{recipes_ptype()} respects those +changes. +} +\examples{ +training <- tibble( + y = 1:10, + id = 1:10, + x1 = letters[1:10], + x2 = factor(letters[1:10]), + cw = hardhat::importance_weights(1:10) +) +training + +rec_spec <- recipe(y ~ ., data = training) + +# outcomes and case_weights are not requred at bake time +recipes_ptype(rec_spec, stage = "prep") +recipes_ptype(rec_spec, stage = "bake") + +rec_spec <- recipe(y ~ ., data = training) \%>\% + update_role(x1, new_role = "id") + +# outcomes and case_weights are not requred at bake time +# "id" column is assumed to be needed +recipes_ptype(rec_spec, stage = "prep") +recipes_ptype(rec_spec, stage = "bake") + +rec_spec <- recipe(y ~ ., data = training) \%>\% + update_role(x1, new_role = "id") \%>\% + update_role_requirements("id", bake = FALSE) + +# update_role_requirements() is used to specify that "id" isn't needed +recipes_ptype(rec_spec, stage = "prep") +recipes_ptype(rec_spec, stage = "bake") + +} +\seealso{ +\link{developer_functions} +} +\keyword{internal} From c852428374a99a8a77ce128570b3929206c76833 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 17:28:26 -0700 Subject: [PATCH 04/10] document recipes_ptype() in developer_functions --- R/developer.R | 5 +++++ man/developer_functions.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/developer.R b/R/developer.R index a9f1b841b..0a0664935 100644 --- a/R/developer.R +++ b/R/developer.R @@ -139,6 +139,11 @@ #' #' # Interacting with recipe objects #' +#' [recipes_ptype()] returns the ptype, expected variables and types, that an +#' recipe object expects at `prep()` and `bake()` time. Controlled using the +#' `stage` argument. Can be used by functions that interact with recipes to +#' verify data is correct before passing it to `prep()` and `bake()`. +#' #' [detect_step()] returns a logical indicator to determine if a given step or #' check is included in a recipe. #' diff --git a/man/developer_functions.Rd b/man/developer_functions.Rd index 024200535..5d29975d7 100644 --- a/man/developer_functions.Rd +++ b/man/developer_functions.Rd @@ -128,6 +128,11 @@ new columns. } \section{Interacting with recipe objects}{ +\code{\link[=recipes_ptype]{recipes_ptype()}} returns the ptype, expected variables and types, that an +recipe object expects at \code{prep()} and \code{bake()} time. Controlled using the +\code{stage} argument. Can be used by functions that interact with recipes to +verify data is correct before passing it to \code{prep()} and \code{bake()}. + \code{\link[=detect_step]{detect_step()}} returns a logical indicator to determine if a given step or check is included in a recipe. From 0261c3bc1ad3b2547db9c9c63b0cc3d68bbabe40 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 17:29:59 -0700 Subject: [PATCH 05/10] add news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 6e2ee62b6..e101f6bb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,8 @@ * New `extract_fit_time()` method has been added that returns the time it took to train the recipe. (#1071) +* Developer helper function `recipes_ptype()` has been added, returning expected input data for `prep()` and `bake()` for a given recipe object. (#1329) + * The `prefix` argument of `step_dummy_multi_choice()` is not properly documented. (#1298) * `step_dummy()` now gives an informative error on attempt to generate too many columns to fit in memory. (#828) From b20aa13304f55725ce7bd3ea500b9334d6efb500 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 17:52:46 -0700 Subject: [PATCH 06/10] document order of columns in ptype --- R/ptype.R | 3 +++ man/recipes_ptype.Rd | 3 +++ 2 files changed, 6 insertions(+) diff --git a/R/ptype.R b/R/ptype.R index bd11b790e..124d2a311 100644 --- a/R/ptype.R +++ b/R/ptype.R @@ -39,6 +39,9 @@ #' with `update_role_requirements()` and `recipes_ptype()` respects those #' changes. #' +#' Note that the order of the columns aren't guaranteed to align with +#' `data_ptype` as the data internally is ordered according to roles. +#' #' @return A zero row tibble. #' @keywords internal #' diff --git a/man/recipes_ptype.Rd b/man/recipes_ptype.Rd index 3cb36bfe3..80402311f 100644 --- a/man/recipes_ptype.Rd +++ b/man/recipes_ptype.Rd @@ -48,6 +48,9 @@ What this means in practice is that unless otherwise specified, everything but outcomes and case weights are required. These requirements can be changed with \code{update_role_requirements()} and \code{recipes_ptype()} respects those changes. + +Note that the order of the columns aren't guaranteed to align with +\code{data_ptype} as the data internally is ordered according to roles. } \examples{ training <- tibble( From d557064facd357e9e8ee7d2974d0898b5a927a67 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 17:58:05 -0700 Subject: [PATCH 07/10] make recipes_ptype() work with NA roles --- R/ptype.R | 5 ++++- man/recipes_ptype.Rd | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/ptype.R b/R/ptype.R index 124d2a311..745257d59 100644 --- a/R/ptype.R +++ b/R/ptype.R @@ -4,6 +4,7 @@ #' the recipe object. #' #' @param x A `recipe` object. +#' @param ... currently not used. #' @param stage A single character. Must be one of `"prep"` or `"bake"`. See #' details for more. Defaults to `"prep"`. #' @@ -100,8 +101,10 @@ recipes_ptype <- function(x, ..., stage = "prep") { required_roles <- compute_bake_role_requirements(x) var_info <- x$var_info + roles <- var_info$role + roles <- chr_explicit_na(roles) - required_var <- var_info$variable[required_roles[var_info$role]] + required_var <- var_info$variable[required_roles[roles]] ptype <- ptype[names(ptype) %in% required_var] } diff --git a/man/recipes_ptype.Rd b/man/recipes_ptype.Rd index 80402311f..781ceeb55 100644 --- a/man/recipes_ptype.Rd +++ b/man/recipes_ptype.Rd @@ -9,6 +9,8 @@ recipes_ptype(x, ..., stage = "prep") \arguments{ \item{x}{A \code{recipe} object.} +\item{...}{currently not used.} + \item{stage}{A single character. Must be one of \code{"prep"} or \code{"bake"}. See details for more. Defaults to \code{"prep"}.} } From 81948a54c008173d166cdc9328de17432d88fd21 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 31 May 2024 17:58:21 -0700 Subject: [PATCH 08/10] test recipes_ptype() --- tests/testthat/_snaps/ptype.md | 9 +++ tests/testthat/test-ptype.R | 131 +++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 tests/testthat/_snaps/ptype.md create mode 100644 tests/testthat/test-ptype.R diff --git a/tests/testthat/_snaps/ptype.md b/tests/testthat/_snaps/ptype.md new file mode 100644 index 000000000..a1e63cfb8 --- /dev/null +++ b/tests/testthat/_snaps/ptype.md @@ -0,0 +1,9 @@ +# recipes_ptype errors on old recipes + + Code + recipes_ptype(rec) + Condition + Error in `recipes_ptype()`: + x Doesn't work on recipes created prior to version 1.1.0. + i Please recreate recipe. + diff --git a/tests/testthat/test-ptype.R b/tests/testthat/test-ptype.R new file mode 100644 index 000000000..b5b06c446 --- /dev/null +++ b/tests/testthat/test-ptype.R @@ -0,0 +1,131 @@ +test_that("recipes_ptype() works", { + data_orig <- tibble( + y = 1:10, + id = 1:10, + x1 = letters[1:10], + x2 = factor(letters[1:10]), + cw = hardhat::importance_weights(1:10) + ) + + rec_spec <- recipe(y ~ ., data = data_orig) + + exp_ptype <- vctrs::vec_ptype(data_orig) + + expect_identical( + recipes_ptype(rec_spec, stage = "prep")[names(exp_ptype)], + exp_ptype + ) + expect_identical( + recipes_ptype(rec_spec, stage = "bake"), + exp_ptype[c("id", "x1", "x2")] + ) +}) + +test_that("recipes_ptype() isn't affected by prepping recipe", { + data_orig <- tibble( + y = 1:10, + id = 1:10, + x1 = letters[1:10], + x2 = factor(letters[1:10]), + cw = hardhat::importance_weights(1:10) + ) + + rec_spec <- recipe(y ~ ., data = data_orig) %>% + step_dummy(all_nominal_predictors()) %>% + prep() + + exp_ptype <- vctrs::vec_ptype(data_orig) + + expect_identical( + recipes_ptype(rec_spec, stage = "prep")[names(exp_ptype)], + exp_ptype + ) + expect_identical( + recipes_ptype(rec_spec, stage = "bake"), + exp_ptype[c("id", "x1", "x2")] + ) +}) + +test_that("recipes_ptype() works with update_role()", { + data_orig <- tibble( + y = 1:10, + id = 1:10, + x1 = letters[1:10], + x2 = factor(letters[1:10]), + cw = hardhat::importance_weights(1:10) + ) + + rec_spec <- recipe(y ~ ., data = data_orig) %>% + update_role(id, new_role = "id") + + exp_ptype <- vctrs::vec_ptype(data_orig) + + expect_identical( + recipes_ptype(rec_spec, stage = "prep")[names(exp_ptype)], + exp_ptype + ) + expect_identical( + recipes_ptype(rec_spec, stage = "bake"), + exp_ptype[c("id", "x1", "x2")] + ) +}) + +test_that("recipes_ptype() works with update_role_requirements()", { + data_orig <- tibble( + y = 1:10, + id = 1:10, + x1 = letters[1:10], + x2 = factor(letters[1:10]), + cw = hardhat::importance_weights(1:10) + ) + + rec_spec <- recipe(y ~ ., data = data_orig) %>% + update_role(id, new_role = "id") %>% + update_role_requirements("id", bake = FALSE) + + exp_ptype <- vctrs::vec_ptype(data_orig) + + expect_identical( + recipes_ptype(rec_spec, stage = "prep")[names(exp_ptype)], + exp_ptype + ) + expect_identical( + recipes_ptype(rec_spec, stage = "bake"), + exp_ptype[c("x1", "x2")] + ) +}) + +test_that("recipes_ptype() works with NA roles", { + data_orig <- tibble( + y = 1:10, + id = 1:10, + x1 = letters[1:10], + x2 = factor(letters[1:10]), + cw = hardhat::importance_weights(1:10) + ) + + rec_spec <- recipe(data_orig) + + exp_ptype <- vctrs::vec_ptype(data_orig) + + expect_identical( + recipes_ptype(rec_spec, stage = "prep")[names(exp_ptype)], + exp_ptype + ) + expect_identical( + recipes_ptype(rec_spec, stage = "bake"), + exp_ptype[c("y", "id", "x1", "x2")] + ) +}) + +test_that("recipes_ptype errors on old recipes", { + rec <- recipe(mpg ~ ., data = mtcars) + + # simulate pre-1.1.0 recipe + rec$ptype <- NULL + + expect_snapshot( + error = TRUE, + recipes_ptype(rec) + ) +}) From ea3e34bbfb7f2ca4df2aff4029a8eca142812714 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 4 Jun 2024 11:52:08 -0700 Subject: [PATCH 09/10] Apply suggestions from code review Co-authored-by: Simon P. Couch --- R/developer.R | 2 +- R/ptype.R | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/developer.R b/R/developer.R index 0a0664935..06578c4d8 100644 --- a/R/developer.R +++ b/R/developer.R @@ -139,7 +139,7 @@ #' #' # Interacting with recipe objects #' -#' [recipes_ptype()] returns the ptype, expected variables and types, that an +#' [recipes_ptype()] returns the ptype, expected variables and types, that a #' recipe object expects at `prep()` and `bake()` time. Controlled using the #' `stage` argument. Can be used by functions that interact with recipes to #' verify data is correct before passing it to `prep()` and `bake()`. diff --git a/R/ptype.R b/R/ptype.R index 745257d59..85e808b99 100644 --- a/R/ptype.R +++ b/R/ptype.R @@ -14,9 +14,9 @@ #' #' At `prep()` time, when `stage = "prep"`, the ptype is the data passed to #' `recipe()`. The following code chunk represents a possible recipe scenario. -#' Calling `recipes_ptype(rec_spec, stage = "prep")` and -#' `recipes_ptype(rec_prep, stage = "prep")` both returns a ptype tibble -#' correspodning to `data_ptype`. This information is used internally in +#' `recipes_ptype(rec_spec, stage = "prep")` and +#' `recipes_ptype(rec_prep, stage = "prep")` both return a ptype tibble +#' corresponding to `data_ptype`. This information is used internally in #' `prep()` to verify that `data_training` has the right columns with the right #' types. #' @@ -28,7 +28,7 @@ #' rec_prep <- prep(rec_spec, training = data_training) #' ``` #' -#' At `bake()` time, when `stage = "bake"`, the ptype representents the data +#' At `bake()` time, when `stage = "bake"`, the ptype represents the data #' that are required for `bake()` to run. #' #' ```r @@ -37,7 +37,7 @@ #' #' What this means in practice is that unless otherwise specified, everything #' but outcomes and case weights are required. These requirements can be changed -#' with `update_role_requirements()` and `recipes_ptype()` respects those +#' with `update_role_requirements()`, and `recipes_ptype()` respects those #' changes. #' #' Note that the order of the columns aren't guaranteed to align with @@ -60,14 +60,14 @@ #' #' rec_spec <- recipe(y ~ ., data = training) #' -#' # outcomes and case_weights are not requred at bake time +#' # outcomes and case_weights are not required at bake time #' recipes_ptype(rec_spec, stage = "prep") #' recipes_ptype(rec_spec, stage = "bake") #' #' rec_spec <- recipe(y ~ ., data = training) %>% #' update_role(x1, new_role = "id") #' -#' # outcomes and case_weights are not requred at bake time +#' # outcomes and case_weights are not required at bake time #' # "id" column is assumed to be needed #' recipes_ptype(rec_spec, stage = "prep") #' recipes_ptype(rec_spec, stage = "bake") @@ -110,4 +110,4 @@ recipes_ptype <- function(x, ..., stage = "prep") { } ptype -} \ No newline at end of file +} From 6b5811e2ee7b13f5fa11dbcc5a01a7903a1c3d59 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 4 Jun 2024 11:58:58 -0700 Subject: [PATCH 10/10] devtools::document() --- man/developer_functions.Rd | 2 +- man/recipes_ptype.Rd | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/man/developer_functions.Rd b/man/developer_functions.Rd index 5d29975d7..a62163ad5 100644 --- a/man/developer_functions.Rd +++ b/man/developer_functions.Rd @@ -128,7 +128,7 @@ new columns. } \section{Interacting with recipe objects}{ -\code{\link[=recipes_ptype]{recipes_ptype()}} returns the ptype, expected variables and types, that an +\code{\link[=recipes_ptype]{recipes_ptype()}} returns the ptype, expected variables and types, that a recipe object expects at \code{prep()} and \code{bake()} time. Controlled using the \code{stage} argument. Can be used by functions that interact with recipes to verify data is correct before passing it to \code{prep()} and \code{bake()}. diff --git a/man/recipes_ptype.Rd b/man/recipes_ptype.Rd index 781ceeb55..7f518ae49 100644 --- a/man/recipes_ptype.Rd +++ b/man/recipes_ptype.Rd @@ -27,9 +27,9 @@ expecting. The specifics of which columns depend on the \code{stage}. At \code{prep()} time, when \code{stage = "prep"}, the ptype is the data passed to \code{recipe()}. The following code chunk represents a possible recipe scenario. -Calling \code{recipes_ptype(rec_spec, stage = "prep")} and -\code{recipes_ptype(rec_prep, stage = "prep")} both returns a ptype tibble -correspodning to \code{data_ptype}. This information is used internally in +\code{recipes_ptype(rec_spec, stage = "prep")} and +\code{recipes_ptype(rec_prep, stage = "prep")} both return a ptype tibble +corresponding to \code{data_ptype}. This information is used internally in \code{prep()} to verify that \code{data_training} has the right columns with the right types. @@ -40,7 +40,7 @@ types. rec_prep <- prep(rec_spec, training = data_training) }\if{html}{\out{}} -At \code{bake()} time, when \code{stage = "bake"}, the ptype representents the data +At \code{bake()} time, when \code{stage = "bake"}, the ptype represents the data that are required for \code{bake()} to run. \if{html}{\out{
}}\preformatted{data_bake <- bake(rec_prep, new_data = data_testing) @@ -48,7 +48,7 @@ that are required for \code{bake()} to run. What this means in practice is that unless otherwise specified, everything but outcomes and case weights are required. These requirements can be changed -with \code{update_role_requirements()} and \code{recipes_ptype()} respects those +with \code{update_role_requirements()}, and \code{recipes_ptype()} respects those changes. Note that the order of the columns aren't guaranteed to align with @@ -66,14 +66,14 @@ training rec_spec <- recipe(y ~ ., data = training) -# outcomes and case_weights are not requred at bake time +# outcomes and case_weights are not required at bake time recipes_ptype(rec_spec, stage = "prep") recipes_ptype(rec_spec, stage = "bake") rec_spec <- recipe(y ~ ., data = training) \%>\% update_role(x1, new_role = "id") -# outcomes and case_weights are not requred at bake time +# outcomes and case_weights are not required at bake time # "id" column is assumed to be needed recipes_ptype(rec_spec, stage = "prep") recipes_ptype(rec_spec, stage = "bake")