From 559b732e84a00579ffdaa353091a1e2673ed1f5c Mon Sep 17 00:00:00 2001
From: hadley <h.wickham@gmail.com>
Date: Sun, 22 May 2016 15:03:34 +0200
Subject: [PATCH] Add sep argument to spread.

Fixes #68
---
 NEWS.md                      |  4 ++++
 R/spread.R                   | 29 ++++++++++++++++++++++-------
 man/spread.Rd                |  7 ++++++-
 man/spread_.Rd               |  7 ++++++-
 tests/testthat/test-spread.R | 11 +++++++++++
 5 files changed, 49 insertions(+), 9 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index 2e26df2b4..066c71fc2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # tidyr 0.4.1.9000
 
+* `spread()` gains a `sep` argument. If not-null, this will name columns
+  as "key<sep>value". Additionally, if sep is `NULL` missing values will be
+  converted to `<NA>` (#68).
+
 * `unnest()` now works with non-syntactic names (#190).
 
 * `unnest()` gains `.id` argument that works the same way as `bind_rows()`.
diff --git a/R/spread.R b/R/spread.R
index 5b75d36e9..260f187cb 100644
--- a/R/spread.R
+++ b/R/spread.R
@@ -30,11 +30,13 @@
 #'                  value = c(5.1, "setosa", 1, 7.0, "versicolor", 2))
 #' df %>% spread(var, value) %>% str
 #' df %>% spread(var, value, convert = TRUE) %>% str
-spread <- function(data, key, value, fill = NA, convert = FALSE, drop = TRUE) {
+spread <- function(data, key, value, fill = NA, convert = FALSE, drop = TRUE,
+                   sep = NULL) {
   key_col <- col_name(substitute(key))
   value_col <- col_name(substitute(value))
 
-  spread_(data, key_col, value_col, fill = fill, convert = convert, drop = drop)
+  spread_(data, key_col, value_col, fill = fill, convert = convert, drop = drop,
+    sep = sep)
 }
 
 #' Standard-evaluation version of \code{spread}.
@@ -55,10 +57,13 @@ spread <- function(data, key, value, fill = NA, convert = FALSE, drop = TRUE) {
 #'   conversion.
 #' @param drop If \code{FALSE}, will keep factor levels that don't appear in the
 #'   data, filling in missing combinations with \code{fill}.
+#' @param sep If \code{NULL}, the column names will be taken from the values of
+#'   \code{key} variable. If non-\code{NULL}, the column names will be given
+#'   by "<key_name><sep><key_value>".
 #' @keywords internal
 #' @export
 spread_ <- function(data, key_col, value_col, fill = NA, convert = FALSE,
-                    drop = TRUE) {
+                    drop = TRUE, sep = NULL) {
   if (!(key_col %in% names(data))) {
     stop("Key column '", key_col, "' does not exist in input.", call. = FALSE)
   }
@@ -72,7 +77,7 @@ spread_ <- function(data, key_col, value_col, fill = NA, convert = FALSE,
 #' @export
 #' @importFrom tibble as_data_frame
 spread_.data.frame <- function(data, key_col, value_col, fill = NA,
-                               convert = FALSE, drop = TRUE) {
+                               convert = FALSE, drop = TRUE, sep = NULL) {
 
   col <- data[key_col]
   col_id <- id(col, drop = drop)
@@ -120,7 +125,7 @@ spread_.data.frame <- function(data, key_col, value_col, fill = NA,
     ordered <- as.character(ordered)
   }
   dim(ordered) <- c(attr(row_id, "n"), attr(col_id, "n"))
-  colnames(ordered) <- as.character(col_labels[[1]])
+  colnames(ordered) <- col_names(col_labels, sep = sep)
 
   ordered <- as_data_frame_matrix(ordered)
 
@@ -131,19 +136,29 @@ spread_.data.frame <- function(data, key_col, value_col, fill = NA,
   append_df(row_labels, ordered)
 }
 
+col_names <- function(x, sep = NULL) {
+  names <- as.character(x[[1]])
+
+  if (is.null(sep)) {
+    ifelse(is.na(names), "<NA>", names)
+  } else {
+    paste(names(x)[[1]], names, sep = sep)
+  }
+}
+
 as_data_frame_matrix <- function(x) {
  utils::getS3method("as_data_frame", "matrix", envir = asNamespace("tibble"))(x)
 }
 
 #' @export
 spread_.tbl_df <- function(data, key_col, value_col, fill = NA,
-                           convert = FALSE, drop = TRUE) {
+                           convert = FALSE, drop = TRUE, sep = NULL) {
   as_data_frame(NextMethod())
 }
 
 #' @export
 spread_.grouped_df <- function(data, key_col, value_col, fill = NA,
-                               convert = FALSE, drop = TRUE) {
+                               convert = FALSE, drop = TRUE, sep = NULL) {
   regroup(NextMethod(), data, c(key_col, value_col))
 }
 
diff --git a/man/spread.Rd b/man/spread.Rd
index 91c6eacaa..9c146aa04 100644
--- a/man/spread.Rd
+++ b/man/spread.Rd
@@ -4,7 +4,8 @@
 \alias{spread}
 \title{Spread a key-value pair across multiple columns.}
 \usage{
-spread(data, key, value, fill = NA, convert = FALSE, drop = TRUE)
+spread(data, key, value, fill = NA, convert = FALSE, drop = TRUE,
+  sep = NULL)
 }
 \arguments{
 \item{data}{A data frame.}
@@ -29,6 +30,10 @@ conversion.}
 
 \item{drop}{If \code{FALSE}, will keep factor levels that don't appear in the
 data, filling in missing combinations with \code{fill}.}
+
+\item{sep}{If \code{NULL}, the column names will be taken from the values of
+\code{key} variable. If non-\code{NULL}, the column names will be given
+by "<key_name><sep><key_value>".}
 }
 \description{
 Spread a key-value pair across multiple columns.
diff --git a/man/spread_.Rd b/man/spread_.Rd
index b48510918..92422af6b 100644
--- a/man/spread_.Rd
+++ b/man/spread_.Rd
@@ -4,7 +4,8 @@
 \alias{spread_}
 \title{Standard-evaluation version of \code{spread}.}
 \usage{
-spread_(data, key_col, value_col, fill = NA, convert = FALSE, drop = TRUE)
+spread_(data, key_col, value_col, fill = NA, convert = FALSE, drop = TRUE,
+  sep = NULL)
 }
 \arguments{
 \item{data}{A data frame.}
@@ -25,6 +26,10 @@ conversion.}
 
 \item{drop}{If \code{FALSE}, will keep factor levels that don't appear in the
 data, filling in missing combinations with \code{fill}.}
+
+\item{sep}{If \code{NULL}, the column names will be taken from the values of
+\code{key} variable. If non-\code{NULL}, the column names will be given
+by "<key_name><sep><key_value>".}
 }
 \description{
 This is a S3 generic.
diff --git a/tests/testthat/test-spread.R b/tests/testthat/test-spread.R
index e5b5d0c25..84b4ed291 100644
--- a/tests/testthat/test-spread.R
+++ b/tests/testthat/test-spread.R
@@ -164,3 +164,14 @@ test_that("grouping vars are kept where possible", {
   out <- df %>% group_by(key) %>% spread(key, value)
   expect_equal(out, data_frame(a = 1L, b = 2L))
 })
+
+
+test_that("col names never contains NA", {
+  df <- data_frame(x = c(1, NA), y = 1:2)
+  df %>%
+    spread(x, y) %>%
+    expect_named(c("1", "<NA>"))
+  df %>%
+    spread(x, y, sep = "_") %>%
+    expect_named(c("x_1", "x_NA"))
+})