From 559b732e84a00579ffdaa353091a1e2673ed1f5c Mon Sep 17 00:00:00 2001 From: hadley Date: Sun, 22 May 2016 15:03:34 +0200 Subject: [PATCH] Add sep argument to spread. Fixes #68 --- NEWS.md | 4 ++++ R/spread.R | 29 ++++++++++++++++++++++------- man/spread.Rd | 7 ++++++- man/spread_.Rd | 7 ++++++- tests/testthat/test-spread.R | 11 +++++++++++ 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2e26df2b4..066c71fc2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # tidyr 0.4.1.9000 +* `spread()` gains a `sep` argument. If not-null, this will name columns + as "keyvalue". Additionally, if sep is `NULL` missing values will be + converted to `` (#68). + * `unnest()` now works with non-syntactic names (#190). * `unnest()` gains `.id` argument that works the same way as `bind_rows()`. diff --git a/R/spread.R b/R/spread.R index 5b75d36e9..260f187cb 100644 --- a/R/spread.R +++ b/R/spread.R @@ -30,11 +30,13 @@ #' value = c(5.1, "setosa", 1, 7.0, "versicolor", 2)) #' df %>% spread(var, value) %>% str #' df %>% spread(var, value, convert = TRUE) %>% str -spread <- function(data, key, value, fill = NA, convert = FALSE, drop = TRUE) { +spread <- function(data, key, value, fill = NA, convert = FALSE, drop = TRUE, + sep = NULL) { key_col <- col_name(substitute(key)) value_col <- col_name(substitute(value)) - spread_(data, key_col, value_col, fill = fill, convert = convert, drop = drop) + spread_(data, key_col, value_col, fill = fill, convert = convert, drop = drop, + sep = sep) } #' Standard-evaluation version of \code{spread}. @@ -55,10 +57,13 @@ spread <- function(data, key, value, fill = NA, convert = FALSE, drop = TRUE) { #' conversion. #' @param drop If \code{FALSE}, will keep factor levels that don't appear in the #' data, filling in missing combinations with \code{fill}. +#' @param sep If \code{NULL}, the column names will be taken from the values of +#' \code{key} variable. If non-\code{NULL}, the column names will be given +#' by "". #' @keywords internal #' @export spread_ <- function(data, key_col, value_col, fill = NA, convert = FALSE, - drop = TRUE) { + drop = TRUE, sep = NULL) { if (!(key_col %in% names(data))) { stop("Key column '", key_col, "' does not exist in input.", call. = FALSE) } @@ -72,7 +77,7 @@ spread_ <- function(data, key_col, value_col, fill = NA, convert = FALSE, #' @export #' @importFrom tibble as_data_frame spread_.data.frame <- function(data, key_col, value_col, fill = NA, - convert = FALSE, drop = TRUE) { + convert = FALSE, drop = TRUE, sep = NULL) { col <- data[key_col] col_id <- id(col, drop = drop) @@ -120,7 +125,7 @@ spread_.data.frame <- function(data, key_col, value_col, fill = NA, ordered <- as.character(ordered) } dim(ordered) <- c(attr(row_id, "n"), attr(col_id, "n")) - colnames(ordered) <- as.character(col_labels[[1]]) + colnames(ordered) <- col_names(col_labels, sep = sep) ordered <- as_data_frame_matrix(ordered) @@ -131,19 +136,29 @@ spread_.data.frame <- function(data, key_col, value_col, fill = NA, append_df(row_labels, ordered) } +col_names <- function(x, sep = NULL) { + names <- as.character(x[[1]]) + + if (is.null(sep)) { + ifelse(is.na(names), "", names) + } else { + paste(names(x)[[1]], names, sep = sep) + } +} + as_data_frame_matrix <- function(x) { utils::getS3method("as_data_frame", "matrix", envir = asNamespace("tibble"))(x) } #' @export spread_.tbl_df <- function(data, key_col, value_col, fill = NA, - convert = FALSE, drop = TRUE) { + convert = FALSE, drop = TRUE, sep = NULL) { as_data_frame(NextMethod()) } #' @export spread_.grouped_df <- function(data, key_col, value_col, fill = NA, - convert = FALSE, drop = TRUE) { + convert = FALSE, drop = TRUE, sep = NULL) { regroup(NextMethod(), data, c(key_col, value_col)) } diff --git a/man/spread.Rd b/man/spread.Rd index 91c6eacaa..9c146aa04 100644 --- a/man/spread.Rd +++ b/man/spread.Rd @@ -4,7 +4,8 @@ \alias{spread} \title{Spread a key-value pair across multiple columns.} \usage{ -spread(data, key, value, fill = NA, convert = FALSE, drop = TRUE) +spread(data, key, value, fill = NA, convert = FALSE, drop = TRUE, + sep = NULL) } \arguments{ \item{data}{A data frame.} @@ -29,6 +30,10 @@ conversion.} \item{drop}{If \code{FALSE}, will keep factor levels that don't appear in the data, filling in missing combinations with \code{fill}.} + +\item{sep}{If \code{NULL}, the column names will be taken from the values of +\code{key} variable. If non-\code{NULL}, the column names will be given +by "".} } \description{ Spread a key-value pair across multiple columns. diff --git a/man/spread_.Rd b/man/spread_.Rd index b48510918..92422af6b 100644 --- a/man/spread_.Rd +++ b/man/spread_.Rd @@ -4,7 +4,8 @@ \alias{spread_} \title{Standard-evaluation version of \code{spread}.} \usage{ -spread_(data, key_col, value_col, fill = NA, convert = FALSE, drop = TRUE) +spread_(data, key_col, value_col, fill = NA, convert = FALSE, drop = TRUE, + sep = NULL) } \arguments{ \item{data}{A data frame.} @@ -25,6 +26,10 @@ conversion.} \item{drop}{If \code{FALSE}, will keep factor levels that don't appear in the data, filling in missing combinations with \code{fill}.} + +\item{sep}{If \code{NULL}, the column names will be taken from the values of +\code{key} variable. If non-\code{NULL}, the column names will be given +by "".} } \description{ This is a S3 generic. diff --git a/tests/testthat/test-spread.R b/tests/testthat/test-spread.R index e5b5d0c25..84b4ed291 100644 --- a/tests/testthat/test-spread.R +++ b/tests/testthat/test-spread.R @@ -164,3 +164,14 @@ test_that("grouping vars are kept where possible", { out <- df %>% group_by(key) %>% spread(key, value) expect_equal(out, data_frame(a = 1L, b = 2L)) }) + + +test_that("col names never contains NA", { + df <- data_frame(x = c(1, NA), y = 1:2) + df %>% + spread(x, y) %>% + expect_named(c("1", "")) + df %>% + spread(x, y, sep = "_") %>% + expect_named(c("x_1", "x_NA")) +})