From 0224120c001f125320ef69b3cb9cc0bc193f6615 Mon Sep 17 00:00:00 2001 From: Randall Pruim Date: Tue, 16 Sep 2014 01:30:55 -0400 Subject: [PATCH 1/8] new histogram stuff based on center/width/boundary --- .gitignore | 1 + NAMESPACE | 3 +- R/compute_bin.R | 312 +++++++++++++++------ R/layer_bars.R | 2 +- R/layer_bins.R | 29 +- R/scales.R | 2 +- man/bin_vector.Rd | 40 +++ man/compute_bin.Rd | 26 +- man/layer_bars.Rd | 2 +- man/layer_histograms.Rd | 31 +- man/scale_ordinal.Rd | 2 +- tests/specs/layer.r | 4 +- tests/specs/layer/freqpoly-grouped.json | 6 +- tests/specs/layer/histogram.json | 6 +- tests/specs/line/layer-line-nominal-x.json | 65 ++--- tests/specs/line/layer-line.json | 65 ++--- tests/specs/scales/datetime_hist.json | 6 +- tests/testthat/test-compute-bin.r | 51 ++-- vignettes/cookbook.Rmd | 34 ++- vignettes/ggvis-basics.Rmd | 6 +- vignettes/overview.Rmd | 2 +- 21 files changed, 458 insertions(+), 237 deletions(-) create mode 100644 man/bin_vector.Rd diff --git a/.gitignore b/.gitignore index 807ea251..c834446e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .Rproj.user .Rhistory .RData +*~ diff --git a/NAMESPACE b/NAMESPACE index 5b7c8024..3714b378 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,7 +71,6 @@ S3method(extract_reactives,default) S3method(extract_reactives,ggvis_props) S3method(extract_reactives,prop_reactive) S3method(filter,ggvis) -S3method(filter,reactive) S3method(format,ggvis_axis) S3method(format,ggvis_legend) S3method(format,ggvis_props) @@ -180,6 +179,7 @@ export(arrange.reactive) export(auto_group) export(axis_props) export(band) +export(bin_vector) export(bind_shiny) export(bind_shiny_ui) export(compute_align) @@ -205,6 +205,7 @@ export(explain) export(export_png) export(export_svg) export(filter) +export(filter.reactive) export(fullseq) export(get_data) export(ggvis) diff --git a/R/compute_bin.R b/R/compute_bin.R index ca8af759..03db9438 100644 --- a/R/compute_bin.R +++ b/R/compute_bin.R @@ -4,16 +4,25 @@ #' grouped data frames and ggvis visualisations. #' @param x_var,w_var Names of x and weight variables. The x variable must be #' continuous. -#' @param binwidth The width of the bins. The default is \code{NULL}, which +#' @param width The width of the bins. The default is \code{NULL}, which #' yields 30 bins that cover the range of the data. You should always override #' this value, exploring multiple widths to find the best to illustrate the #' stories in your data. -#' @param origin The initial position of the left-most bin. If \code{NULL}, the -#' the default, will use the smallest value in the dataset. +#' @param center The center of one of the bins. Note that if center is above or below +#' the range of the data, things will be shifted by an appropriate number of \code{width}s. +#' To center on integers, +#' for example, use \code{width = 1} and \code{center = 0}, even if \code{0} is +#' outside the range of the data. At most one of \code{center} and \code{boundary} may be +#' specified. +#' @param boundary A boundary between two bins. As with \code{center}, things are shifted +#' when \code{boundary} is outside the range of the data. For example, to center on +#' integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside +#' the range of the data. At most one of \code{center} and \code{boundary} may be +#' specified. #' @param right Should bins be right-open, left-closed, or #' right-closed, left-open. #' @param pad If \code{TRUE}, adds empty bins at either end of x. This -#' ensures frequency polygons touch 0, and adds padidng between the data +#' ensures frequency polygons touch 0, and adds padding between the data #' and axis for histograms. #' @seealso \code{\link{compute_count}} For counting cases at specific locations #' of a continuous variable. This is useful when the variable is continuous @@ -27,20 +36,22 @@ #' \item{width_}{width of bin} #' @examples #' mtcars %>% compute_bin(~mpg) -#' mtcars %>% compute_bin(~mpg, binwidth = 10) -#' mtcars %>% group_by(cyl) %>% compute_bin(~mpg, binwidth = 10) +#' mtcars %>% compute_bin(~mpg, width = 10) +#' mtcars %>% group_by(cyl) %>% compute_bin(~mpg, width = 10) #' #' # It doesn't matter whether you transform inside or outside of a vis #' mtcars %>% compute_bin(~mpg) %>% ggvis(~x_, ~count_) %>% layer_paths() #' mtcars %>% ggvis(~ x_, ~ count_) %>% compute_bin(~mpg) %>% layer_paths() -compute_bin <- function(x, x_var, w_var = NULL, binwidth = NULL, - origin = NULL, right = TRUE, pad = TRUE) { +compute_bin <- function(x, x_var, w_var = NULL, width = NULL, + center = NULL, boundary = NULL, + right = TRUE, pad = TRUE) { UseMethod("compute_bin") } #' @export -compute_bin.data.frame <- function(x, x_var, w_var = NULL, binwidth = NULL, - origin = NULL, right = TRUE, pad = TRUE) { +compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, + center = NULL, boundary = NULL, + right = TRUE, pad = TRUE) { assert_that(is.formula(x_var)) x_val <- eval_vector(x, x_var) @@ -57,35 +68,38 @@ compute_bin.data.frame <- function(x, x_var, w_var = NULL, binwidth = NULL, w_val <- eval_vector(x, w_var) } - params <- bin_params(range2(x_val), binwidth = binwidth, origin = origin, - right = right) + params <- bin_params(range2(x_val), width = width, center = center, + boundary = boundary, right = right) - bin_vector(x_val, weight = w_val, binwidth = params$binwidth, - origin = params$origin, right = params$right, pad = pad) + # note: origin is a boundary, so this works. + bin_vector(x_val, weight = w_val, width = params$binwidth, boundary = params$origin, + right = params$right, pad = pad) } #' @export -compute_bin.grouped_df <- function(x, x_var, w_var = NULL, binwidth = NULL, - origin = NULL, right = TRUE, pad = TRUE) { +compute_bin.grouped_df <- function(x, x_var, w_var = NULL, width = NULL, + center = NULL, boundary = NULL, + right = TRUE, pad = TRUE) { x_val <- eval_vector(x, x_var) - params <- bin_params(range2(x_val), binwidth = binwidth, origin = origin, - right = right) + params <- bin_params(range2(x_val), width = width, center = center, + boundary = boundary, right = right) dplyr::do(x, compute_bin(., x_var, w_var = w_var, - binwidth = params$binwidth, - origin = params$origin, + width = params$binwidth, + boundary = params$origin, # origin is a boundary, so this works right = params$right, pad = pad)) } #' @export -compute_bin.ggvis <- function(x, x_var, w_var = NULL, binwidth = NULL, - origin = NULL, right = TRUE, pad = TRUE) { - args <- list(x_var = x_var, w_var = w_var, binwidth = binwidth, - origin = origin, right = right, pad = pad) +compute_bin.ggvis <- function(x, x_var, w_var = NULL, width = NULL, + center = NULL, boundary = NULL, + right = TRUE, pad = TRUE) { + args <- list(x_var = x_var, w_var = w_var, width = width, + center = center, boundary = boundary, right = right, pad = pad) register_computation(x, args, "bin", function(data, args) { output <- do_call(compute_bin, quote(data), .args = args) @@ -95,78 +109,204 @@ compute_bin.ggvis <- function(x, x_var, w_var = NULL, binwidth = NULL, # Compute parameters ----------------------------------------------------------- -bin_params <- function(x_range, binwidth = NULL, origin = NULL, right = TRUE) { +bin_params <- function(x_range, width = NULL, center = NULL, boundary = NULL, + right = TRUE) { UseMethod("bin_params") } +# compute origin from x_range and width +tilelayer_origin <- function(x_range, width) { + stopifnot(is.numeric(x_range) && length(x_range) == 2) + stopifnot(is.numeric(width) && length(width) == 1) + num_central_bins <- trunc(diff(x_range) / width) - 1 + side_width <- (diff(x_range) - num_central_bins * width) / 2 # width of partial tiles on either side + x_range[1] + side_width - width + # adjust_breaks should be called to handle any round-off fuzziness issues +} + +compute_origin <- function(x_range, width, boundary) { + shift <- floor( (x_range[1] - boundary) / width ) + boundary + shift * width +} + #' @export -bin_params.numeric <- function(x_range, binwidth = NULL, origin = NULL, +bin_params.numeric <- function(x_range, width = NULL, + center = NULL, + boundary = NULL, right = TRUE) { + stopifnot(is.numeric(x_range) && length(x_range) == 2) + if (!is.null(boundary) && !is.null(center)) { + stop( "Only one of 'boundary' and 'center' may be specified." ) + } - if (is.null(binwidth)) { - binwidth <- diff(x_range) / 30 - notify_guess(binwidth, "range / 30") + if (is.null(width)) { + width <- diff(x_range) / 30 + notify_guess(width, "range / 30") } - if (is.null(origin)) { - origin <- round_any(x_range[1], binwidth, floor) + # if neither edge nor center given, compute both using tile layer's algorithm + # this puts min and max of data in outer half of their bins. + + if (is.null(boundary) && is.null(center)) { + boundary <- tilelayer_origin(x_range, width) + # center <- boundary + width / 2 } - list(binwidth = binwidth, origin = origin, right = right) + # if center given but not boundary, compute boundary from center + if (is.null(boundary)) boundary <- center - width / 2 + + origin <- compute_origin( x_range, width, boundary ) + + list(binwidth = width, origin = origin, right = right) } #' @export -bin_params.POSIXct <- function(x_range, binwidth = NULL, origin = NULL, +bin_params.POSIXct <- function(x_range, width = NULL, + center = NULL, boundary = NULL, right = TRUE) { - if (is.null(binwidth)) { - binwidth <- as.numeric(diff(x_range) / 30, units = "secs") - notify_guess(binwidth, "range / 30") + if (!is.null(boundary) && !is.null(center)) { + stop( "Only one of 'boundary' and 'center' may be specified." ) + } + + x_range <- as.numeric(x_range) + if (inherits(width, "Period")) { + width <- as.numeric(as.difftime(width, units = "secs")) + } + if (!is.null(width)) width <- as.numeric(width) + if (!is.null(center)) width <- as.numeric(center) + if (!is.null(boundary)) width <- as.numeric(boundary) + + if (is.null(width)) { + width <- diff(x_range) / 30 + notify_guess(width, "range / 30 (in seconds)") + width <- as.numeric(width, units = "secs") } - list(binwidth = binwidth, origin = origin, right = right) + if (is.null(boundary) && is.null(center)) { + boundary <- tilelayer_origin(x_range, width) + center <- boundary + width / 2 + } + + if (!is.numeric(width)) width <- as.numeric(width, units = 'secs') + if (!is.null(center)) center <- as.numeric(center) + if (!is.null(boundary)) boundary<- as.numeric(boundary) + + # if we have center but not boundary, compute boundary + if (is.null(boundary)) boundary <- center - width / 2 + + origin <- compute_origin( x_range, width, boundary ) + + list(binwidth = width, origin = origin, right = right, + origin.POSIX = structure(origin, class = c("POSIXct", "POSIXt")) + ) } +### check on this -- get center in instead of origin #' @export -bin_params.Date <- function(x_range, binwidth = NULL, origin = NULL, - right = TRUE) { +bin_params.Date <- function(x_range, width = NULL, center = NULL, + boundary = NULL, right = TRUE) { - if (is.null(binwidth)) { - binwidth <- as.numeric(diff(x_range) / 30) - notify_guess(binwidth, "range / 30") + # convert things to numeric as we go along + + x_range <- as.numeric(x_range) + + if (is.null(width)) { + width <- diff(x_range) / 30 + notify_guess(width, "range / 30") } + width <- as.numeric(width) - list(binwidth = binwidth, origin = origin, right = right) + if (is.null(boundary) && is.null(center)) { + boundary <- tilelayer_origin(x_range, width) + center <- boundary + width / 2 + } + + # if we have center but not boundary, compute boundary + if (is.null(boundary)) { + center <- as.numeric(center) + boundary <- center - width / 2 + } + + origin <- compute_origin( x_range, width, boundary ) + + # do we need to convert this back to date format? + + list(binwidth = width, origin = origin, right = right) } + #' @export -bin_params.integer <- function(x_range, binwidth = NULL, origin = NULL, +bin_params.integer <- function(x_range, width = NULL, + center = NULL, boundary = NULL, right = TRUE) { - if (is.null(binwidth)) { - binwidth <- 1 - origin <- x_range[1] - 1/2 - notify_guess(binwidth) + if (!is.null(boundary) && !is.null(center)) { + stop( "Only one of 'boundary' and 'center' may be specified." ) + } + + if (is.null(width)) { + width <- max(pretty(round(diff(x_range) / 30))) + if (width <= 1) width <- 1 + num_bins <- ceiling( diff(x_range) / width ) + notify_guess(width, paste0("approximately range/", num_bins) ) } - list(binwidth = binwidth, origin = origin, right = right) + if (is.null(boundary) && is.null(center)) { + boundary <- tilelayer_origin(x_range, width) + center <- boundary + width / 2 + } + + # if we have center but not boundary, compute boundary + if (is.null(boundary)) boundary <- center - width / 2 + + origin <- compute_origin( x_range, width, boundary ) + + list(binwidth = width, origin = origin, right = right) } # Bin individual vector -------------------------------------------------------- +#' Bin vectors +#' +#' A generic and several implementations for binning vectors. +#' +#' @export +#' @param x a vector to bin +#' @param weight if specified, an integer vector of the same length as \code{x} +#' representing the number of occurances of each value in \code{x} +#' @param width the width of a bin +#' @param center the center of a bin +#' @param boundary the boundary of a bin. \code{center} and \code{boundary} should +#' not both be specified. +#' @param right a logical indicating whether the right boundary of a bin is +#' included with the bin. +#' @param pad a logical indicatign whether the bins should be padded to include +#' an empty bin on each side. +#' @param ... additional arguments passed through to instances of the generic bin_vector <- function(x, weight = NULL, ...) { UseMethod("bin_vector") } +#' @rdname bin_vector #' @export -bin_vector.numeric <- function(x, weight = NULL, ..., binwidth = 1, - origin = NULL, right = TRUE, pad = TRUE) { + +bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, + center = NULL, boundary = NULL, + right = TRUE, pad = TRUE) { + stopifnot(is.null(width) || (is.numeric(width) && length(width) == 1)) + stopifnot(is.null(center) || (is.numeric(center) && length(center) == 1)) + stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) + stopifnot(is.flag(right)) + if (!is.null(center) && !is.null(boundary)) { + stop("Only one of 'center' and 'boundary' may be specified.") + } + if (length(na.omit(x)) == 0) { return(bin_out()) } - stopifnot(is.numeric(binwidth) && length(binwidth) == 1) - stopifnot(is.null(origin) || (is.numeric(origin) && length(origin) == 1)) + stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) stopifnot(is.flag(right)) if (is.null(weight)) { @@ -175,41 +315,44 @@ bin_vector.numeric <- function(x, weight = NULL, ..., binwidth = 1, weight[is.na(weight)] <- 0 } - if (is.null(origin)) { - origin <- round_any(min(x), binwidth, floor) - } + params <- bin_params(range(x), width, center, boundary, right) - breaks <- seq(origin, max(x) + binwidth, binwidth) - fuzzybreaks <- adjust_breaks(breaks, open = if (right) "right" else "left") + breaks <- seq(params$origin, max(x) + params$binwidth, params$binwidth) + fuzzybreaks <- adjust_breaks(breaks, open = if (params$right) "right" else "left") - bins <- cut(x, fuzzybreaks, include.lowest = TRUE, right = right) + bins <- cut(x, fuzzybreaks, include.lowest = TRUE, right = params$right) left <- breaks[-length(breaks)] right <- breaks[-1] - x <- (left + right)/2 - width <- diff(breaks) + x <- (left + right) / 2 + bin_widths <- diff(breaks) - count <- as.numeric(tapply(weight, bins, sum, na.rm = TRUE)) - count[is.na(count)] <- 0 + count <- as.integer(tapply(weight, bins, sum, na.rm = TRUE)) + count[is.na(count)] <- 0L if (pad) { - count <- c(0, count, 0) - width <- c(binwidth, width, binwidth) - x <- c(x[1] - binwidth, x, x[length(x)] + binwidth) + count <- c(0L, count, 0L) + bin_widths <- c(params$binwidth, bin_widths, params$binwidth) + x <- c(x[1] - params$binwidth, x, x[length(x)] + params$binwidth) } - bin_out(count, x, width) + bin_out(count, x, bin_widths) } +#' @rdname bin_vector #' @export -bin_vector.POSIXct <- function(x, weight = NULL, ..., binwidth = 1, - origin = NULL, right = TRUE, pad = TRUE) { - if (!is.null(origin)) - origin <- as.numeric(origin) +bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, + center = NULL, boundary = NULL, + right = TRUE, pad=TRUE) { # Convert times to raw numbers (seconds since UNIX epoch), and call bin.numeric - results <- bin_vector(as.numeric(x), weight = weight, binwidth = binwidth, - origin = origin, right = right, pad = pad) + if (inherits(width, "Period")) width <- as.numeric(as.difftime(width, units = "secs")) + if (!is.null(width)) width <- as.numeric(width) + center <- if (!is.null(center)) center <- as.numeric(center) + boundary <- if (!is.null(boundary)) boundary <- as.numeric(boundary) + + results <- bin_vector(as.numeric(x), weight = weight, width = width, + center = center, boundary = boundary, right = right, pad=pad) # Convert some columns from numeric back to POSIXct objects tz <- attr(x, "tzone", TRUE) @@ -222,15 +365,22 @@ bin_vector.POSIXct <- function(x, weight = NULL, ..., binwidth = 1, } #' @export -bin_vector.Date <- function(x, weight = NULL, ..., binwidth = 1, - origin = NULL, right = TRUE, pad = TRUE) { - - if (!is.null(origin)) - origin <- as.numeric(origin) +bin_vector.Date <- function(x, weight = NULL, ..., width = NULL, center=NULL, + boundary = NULL, right = TRUE, pad = TRUE) { # Convert times to raw numbers, and call bin_vector.numeric - results <- bin_vector(as.numeric(x), weight = weight, binwidth = binwidth, - origin = origin, right = right, pad = pad) + + if (!is.null(width)) + width <- as.numeric(width) + if (!is.null(center)) + center<- as.numeric(center) + if (!is.null(boundary)) + boundary <- as.numeric(boundary) + + results <- bin_vector(as.numeric(x), weight = weight, + width = width, + center=center, boundary = boundary, + right = right, pad = pad) # Convert some columns from numeric back to Date objects time_cols <- c("x_", "xmin_", "xmax_") @@ -247,7 +397,7 @@ bin_vector.default <- function(x, weight = NULL, ...) { stop("Don't know how to bin vector of type ", class(x)) } -bin_out <- function(count = numeric(0), x = numeric(0), width = numeric(0), +bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), xmin = x - width / 2, xmax = x + width / 2) { data.frame( count_ = count, diff --git a/R/layer_bars.R b/R/layer_bars.R index fc6bc228..028628e6 100644 --- a/R/layer_bars.R +++ b/R/layer_bars.R @@ -53,7 +53,7 @@ #' # unique values that you want to preserve. If you have many unique #' # values and you want to bin, use layer_histogram #' cocaine %>% ggvis(~price) %>% layer_bars() -#' cocaine %>% ggvis(~price) %>% layer_histograms(binwidth = 100) +#' cocaine %>% ggvis(~price) %>% layer_histograms(width = 100) #' #' # If you have unique x values, you can use layer_bars() as an alternative #' # to layer_points() diff --git a/R/layer_bins.R b/R/layer_bins.R index 8ce02dbd..d4c07e4f 100644 --- a/R/layer_bins.R +++ b/R/layer_bins.R @@ -10,8 +10,8 @@ #' @examples #' # Create histograms and frequency polygons with layers #' mtcars %>% ggvis(~mpg) %>% layer_histograms() -#' mtcars %>% ggvis(~mpg) %>% layer_histograms(binwidth = 2) -#' mtcars %>% ggvis(~mpg) %>% layer_freqpolys(binwidth = 2) +#' mtcars %>% ggvis(~mpg) %>% layer_histograms(width = 2) +#' mtcars %>% ggvis(~mpg) %>% layer_freqpolys(width = 2) #' #' # These are equivalent to combining compute_bin with the corresponding #' # mark @@ -19,12 +19,12 @@ #' #' # With grouping #' mtcars %>% ggvis(~mpg, fill = ~factor(cyl)) %>% group_by(cyl) %>% -#' layer_histograms(binwidth = 2) +#' layer_histograms(width = 2) #' mtcars %>% ggvis(~mpg, stroke = ~factor(cyl)) %>% group_by(cyl) %>% -#' layer_freqpolys(binwidth = 2) -layer_histograms <- function(vis, ..., binwidth = NULL, origin = NULL, - right = TRUE, stack = TRUE) { - +#' layer_freqpolys(width = 2) +layer_histograms <- function(vis, ..., width = NULL, center = NULL, + boundary = NULL, right = TRUE, stack = TRUE) +{ new_props <- merge_props(cur_props(vis), props(...)) check_unsupported_props(new_props, c("x", "y", "x2", "y2"), @@ -38,8 +38,8 @@ layer_histograms <- function(vis, ..., binwidth = NULL, origin = NULL, label = "count") layer_f(vis, function(x) { - x <- compute_bin(x, x_var, binwidth = binwidth, origin = origin, - right = right, pad = FALSE) + x <- compute_bin(x, x_var, width = width, center = center, + boundary = boundary, right = right) if (stack) { x <- compute_stack(x, stack_var = ~count_, group_var = ~x_) @@ -63,7 +63,7 @@ layer_histograms <- function(vis, ..., binwidth = NULL, origin = NULL, #' @rdname layer_histograms #' @export -layer_freqpolys <- function(vis, ..., binwidth = NULL, origin = NULL, +layer_freqpolys <- function(vis, ..., width = NULL, center = NULL, boundary = NULL, right = TRUE) { new_props <- merge_props(cur_props(vis), props(...)) @@ -76,12 +76,13 @@ layer_freqpolys <- function(vis, ..., binwidth = NULL, origin = NULL, vis <- set_scale_label(vis, "x", prop_label(new_props$x.update)) vis <- set_scale_label(vis, "y", "count") - params <- bin_params(range(x_val, na.rm = TRUE), binwidth = value(binwidth), - origin = value(origin), right = value(right)) + params <- bin_params(range(x_val, na.rm = TRUE), width = value(width), + center = value(center), boundary = value(boundary), + right = value(right)) layer_f(vis, function(x) { - x <- compute_bin(x, x_var, binwidth = params$binwidth, - origin = params$origin, right = params$right) + x <- compute_bin(x, x_var, width = params$binwidth, + boundary = params$origin, right = params$right) path_props <- merge_props(new_props, props(x = ~x_, y = ~count_)) x <- emit_paths(x, path_props) diff --git a/R/scales.R b/R/scales.R index 2a90f8ae..ffa587d8 100644 --- a/R/scales.R +++ b/R/scales.R @@ -277,7 +277,7 @@ scale_datetime <- function(vis, property, domain = NULL, range = NULL, #' #' p <- ToothGrowth %>% group_by(supp) %>% #' ggvis(~len, fill = ~supp) %>% -#' layer_histograms(binwidth = 4, stack = TRUE) +#' layer_histograms(width = 4, stack = TRUE) #' #' # Control range of fill scale #' p %>% scale_nominal("fill", range = c("pink", "lightblue")) diff --git a/man/bin_vector.Rd b/man/bin_vector.Rd new file mode 100644 index 00000000..5e7eb0a8 --- /dev/null +++ b/man/bin_vector.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{bin_vector} +\alias{bin_vector} +\alias{bin_vector.POSIXct} +\alias{bin_vector.numeric} +\title{Bin vectors} +\usage{ +bin_vector(x, weight = NULL, ...) + +\method{bin_vector}{numeric}(x, weight = NULL, ..., width = NULL, + center = NULL, boundary = NULL, right = TRUE, pad = TRUE) + +\method{bin_vector}{POSIXct}(x, weight = NULL, ..., width = NULL, + center = NULL, boundary = NULL, right = TRUE, pad = TRUE) +} +\arguments{ +\item{x}{a vector to bin} + +\item{weight}{if specified, an integer vector of the same length as \code{x} +representing the number of occurances of each value in \code{x}} + +\item{...}{additional arguments passed through to instances of the generic} + +\item{width}{the width of a bin} + +\item{center}{the center of a bin} + +\item{boundary}{the boundary of a bin. \code{center} and \code{boundary} should +not both be specified.} + +\item{right}{a logical indicating whether the right boundary of a bin is +included with the bin.} + +\item{pad}{a logical indicatign whether the bins should be padded to include +an empty bin on each side.} +} +\description{ +A generic and several implementations for binning vectors. +} + diff --git a/man/compute_bin.Rd b/man/compute_bin.Rd index 9e831fde..9d9930be 100644 --- a/man/compute_bin.Rd +++ b/man/compute_bin.Rd @@ -3,8 +3,8 @@ \alias{compute_bin} \title{Bin data along a continuous variable} \usage{ -compute_bin(x, x_var, w_var = NULL, binwidth = NULL, origin = NULL, - right = TRUE, pad = TRUE) +compute_bin(x, x_var, w_var = NULL, width = NULL, center = NULL, + boundary = NULL, right = TRUE, pad = TRUE) } \arguments{ \item{x}{Dataset-like object to bin. Built-in methods for data frames, @@ -13,19 +13,29 @@ grouped data frames and ggvis visualisations.} \item{x_var,w_var}{Names of x and weight variables. The x variable must be continuous.} -\item{binwidth}{The width of the bins. The default is \code{NULL}, which +\item{width}{The width of the bins. The default is \code{NULL}, which yields 30 bins that cover the range of the data. You should always override this value, exploring multiple widths to find the best to illustrate the stories in your data.} -\item{origin}{The initial position of the left-most bin. If \code{NULL}, the -the default, will use the smallest value in the dataset.} +\item{center}{The center of one of the bins. Note that if center is above or below +the range of the data, things will be shifted by an appropriate number of \code{width}s. +To center on integers, +for example, use \code{width = 1} and \code{center = 0}, even if \code{0} is +outside the range of the data. At most one of \code{center} and \code{boundary} may be +specified.} + +\item{boundary}{A boundary between two bins. As with \code{center}, things are shifted +when \code{boundary} is outside the range of the data. For example, to center on +integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside +the range of the data. At most one of \code{center} and \code{boundary} may be +specified.} \item{right}{Should bins be right-open, left-closed, or right-closed, left-open.} \item{pad}{If \code{TRUE}, adds empty bins at either end of x. This -ensures frequency polygons touch 0, and adds padidng between the data +ensures frequency polygons touch 0, and adds padding between the data and axis for histograms.} } \value{ @@ -41,8 +51,8 @@ Bin data along a continuous variable } \examples{ mtcars \%>\% compute_bin(~mpg) -mtcars \%>\% compute_bin(~mpg, binwidth = 10) -mtcars \%>\% group_by(cyl) \%>\% compute_bin(~mpg, binwidth = 10) +mtcars \%>\% compute_bin(~mpg, width = 10) +mtcars \%>\% group_by(cyl) \%>\% compute_bin(~mpg, width = 10) # It doesn't matter whether you transform inside or outside of a vis mtcars \%>\% compute_bin(~mpg) \%>\% ggvis(~x_, ~count_) \%>\% layer_paths() diff --git a/man/layer_bars.Rd b/man/layer_bars.Rd index d315dc4a..6fac923b 100644 --- a/man/layer_bars.Rd +++ b/man/layer_bars.Rd @@ -59,7 +59,7 @@ cocaine \%>\% ggvis(~month, ~weight) \%>\% layer_bars() # unique values that you want to preserve. If you have many unique # values and you want to bin, use layer_histogram cocaine \%>\% ggvis(~price) \%>\% layer_bars() -cocaine \%>\% ggvis(~price) \%>\% layer_histograms(binwidth = 100) +cocaine \%>\% ggvis(~price) \%>\% layer_histograms(width = 100) # If you have unique x values, you can use layer_bars() as an alternative # to layer_points() diff --git a/man/layer_histograms.Rd b/man/layer_histograms.Rd index 274fa4d1..ad6a94cd 100644 --- a/man/layer_histograms.Rd +++ b/man/layer_histograms.Rd @@ -4,23 +4,34 @@ \alias{layer_histograms} \title{Display binned data} \usage{ -layer_histograms(vis, ..., binwidth = NULL, origin = NULL, right = TRUE, - stack = TRUE) +layer_histograms(vis, ..., width = NULL, center = NULL, boundary = NULL, + right = TRUE, stack = TRUE) -layer_freqpolys(vis, ..., binwidth = NULL, origin = NULL, right = TRUE) +layer_freqpolys(vis, ..., width = NULL, center = NULL, boundary = NULL, + right = TRUE) } \arguments{ \item{vis}{Visualisation to modify} \item{...}{Visual properties used to override defaults.} -\item{binwidth}{The width of the bins. The default is \code{NULL}, which +\item{width}{The width of the bins. The default is \code{NULL}, which yields 30 bins that cover the range of the data. You should always override this value, exploring multiple widths to find the best to illustrate the stories in your data.} -\item{origin}{The initial position of the left-most bin. If \code{NULL}, the -the default, will use the smallest value in the dataset.} +\item{center}{The center of one of the bins. Note that if center is above or below +the range of the data, things will be shifted by an appropriate number of \code{width}s. +To center on integers, +for example, use \code{width = 1} and \code{center = 0}, even if \code{0} is +outside the range of the data. At most one of \code{center} and \code{boundary} may be +specified.} + +\item{boundary}{A boundary between two bins. As with \code{center}, things are shifted +when \code{boundary} is outside the range of the data. For example, to center on +integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside +the range of the data. At most one of \code{center} and \code{boundary} may be +specified.} \item{right}{Should bins be right-open, left-closed, or right-closed, left-open.} @@ -33,8 +44,8 @@ Display binned data \examples{ # Create histograms and frequency polygons with layers mtcars \%>\% ggvis(~mpg) \%>\% layer_histograms() -mtcars \%>\% ggvis(~mpg) \%>\% layer_histograms(binwidth = 2) -mtcars \%>\% ggvis(~mpg) \%>\% layer_freqpolys(binwidth = 2) +mtcars \%>\% ggvis(~mpg) \%>\% layer_histograms(width = 2) +mtcars \%>\% ggvis(~mpg) \%>\% layer_freqpolys(width = 2) # These are equivalent to combining compute_bin with the corresponding # mark @@ -42,9 +53,9 @@ mtcars \%>\% compute_bin(~mpg) \%>\% ggvis(~x_, ~count_) \%>\% layer_paths() # With grouping mtcars \%>\% ggvis(~mpg, fill = ~factor(cyl)) \%>\% group_by(cyl) \%>\% - layer_histograms(binwidth = 2) + layer_histograms(width = 2) mtcars \%>\% ggvis(~mpg, stroke = ~factor(cyl)) \%>\% group_by(cyl) \%>\% - layer_freqpolys(binwidth = 2) + layer_freqpolys(width = 2) } \seealso{ \code{\link{layer_bars}} For bar graphs of counts at each unique diff --git a/man/scale_ordinal.Rd b/man/scale_ordinal.Rd index e606ad71..9db8cb90 100644 --- a/man/scale_ordinal.Rd +++ b/man/scale_ordinal.Rd @@ -90,7 +90,7 @@ p \%>\% scale_nominal("x", reverse = TRUE) p <- ToothGrowth \%>\% group_by(supp) \%>\% ggvis(~len, fill = ~supp) \%>\% - layer_histograms(binwidth = 4, stack = TRUE) + layer_histograms(width = 4, stack = TRUE) # Control range of fill scale p \%>\% scale_nominal("fill", range = c("pink", "lightblue")) diff --git a/tests/specs/layer.r b/tests/specs/layer.r index 3e2c35f5..dedc84d0 100644 --- a/tests/specs/layer.r +++ b/tests/specs/layer.r @@ -2,13 +2,13 @@ library(ggvis) mtcars %>% ggvis(x = ~wt) %>% - layer_histograms(binwidth = 1) %>% + layer_histograms(width = 1) %>% save_spec("layer/histogram.json") mtcars %>% ggvis(x = ~wt, stroke = ~cyl) %>% group_by(cyl) %>% - layer_freqpolys(binwidth = 1) %>% + layer_freqpolys(width = 1) %>% save_spec("layer/freqpoly-grouped.json") mtcars %>% diff --git a/tests/specs/layer/freqpoly-grouped.json b/tests/specs/layer/freqpoly-grouped.json index f9adb553..1223893e 100644 --- a/tests/specs/layer/freqpoly-grouped.json +++ b/tests/specs/layer/freqpoly-grouped.json @@ -10,7 +10,7 @@ "count_" : "number" } }, - "values" : "\"cyl\",\"x_\",\"count_\"\n4,0.5,0\n4,1.5,4\n4,2.5,5\n4,3.5,2\n4,4.5,0\n6,0.5,0\n6,1.5,0\n6,2.5,3\n6,3.5,4\n6,4.5,0\n8,0.5,0\n8,1.5,0\n8,2.5,0\n8,3.5,10\n8,4.5,1\n8,5.5,3\n8,6.5,0" + "values" : "\"cyl\",\"x_\",\"count_\"\n4,0.9685,0\n4,1.9685,8\n4,2.9685,3\n4,3.9685,0\n6,1.9685,0\n6,2.9685,7\n6,3.9685,0\n8,1.9685,0\n8,2.9685,3\n8,3.9685,8\n8,4.9685,3\n8,5.9685,0" }, { "name" : "mtcars0/regroup1/bin2", @@ -42,7 +42,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0.2\n6.8" + "values" : "\"domain\"\n0.7185\n6.2185" }, { "name" : "scale/y", @@ -52,7 +52,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n-0.5\n10.5" + "values" : "\"domain\"\n-0.4\n8.4" } ], "scales" : [ diff --git a/tests/specs/layer/histogram.json b/tests/specs/layer/histogram.json index a8ed5b8a..f929810a 100644 --- a/tests/specs/layer/histogram.json +++ b/tests/specs/layer/histogram.json @@ -11,7 +11,7 @@ "stack_lwr_" : "number" } }, - "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n1,2,4,0\n2,3,8,0\n3,4,16,0\n4,5,1,0\n5,6,3,0" + "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n0.4685,1.4685,0,0\n1.4685,2.4685,8,0\n2.4685,3.4685,13,0\n3.4685,4.4685,8,0\n4.4685,5.4685,3,0\n5.4685,6.4685,0,0" }, { "name" : "scale/x", @@ -21,7 +21,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0.75\n6.25" + "values" : "\"domain\"\n0.1685\n6.7685" }, { "name" : "scale/y", @@ -31,7 +31,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0\n16.8" + "values" : "\"domain\"\n0\n13.65" } ], "scales" : [ diff --git a/tests/specs/line/layer-line-nominal-x.json b/tests/specs/line/layer-line-nominal-x.json index 5339fb12..a83f2134 100644 --- a/tests/specs/line/layer-line-nominal-x.json +++ b/tests/specs/line/layer-line-nominal-x.json @@ -1,7 +1,7 @@ { "data" : [ { - "name" : "df0/regroup1/arrange2_flat", + "name" : "df0/regroup1/arrange2", "format" : { "type" : "csv", "parse" : { @@ -10,18 +10,6 @@ }, "values" : "\"w\",\"y\",\"z\"\n\"a\",0.175504838116467,\"1\"\n\"b\",0.92520075570792,\"1\"\n\"c\",0.605496409814805,\"1\"\n\"d\",0.879450253676623,\"1\"\n\"a\",0.36204455466941,\"2\"\n\"b\",0.239515871042386,\"2\"\n\"c\",0.00647905003279448,\"2\"\n\"d\",0.154914033133537,\"2\"\n\"a\",0.574154397239909,\"3\"\n\"b\",0.732962509151548,\"3\"\n\"c\",0.489878148771822,\"3\"\n\"d\",0.870066102826968,\"3\"" }, - { - "name" : "df0/regroup1/arrange2", - "source" : "df0/regroup1/arrange2_flat", - "transform" : [ - { - "type" : "treefacet", - "keys" : [ - "data.z" - ] - } - ] - }, { "name" : "scale/stroke", "format" : { @@ -87,36 +75,31 @@ ], "marks" : [ { - "type" : "group", - "from" : { - "data" : "df0/regroup1/arrange2" - }, - "marks" : [ - { - "type" : "line", - "properties" : { - "update" : { - "x" : { - "scale" : "x", - "field" : "data.w" - }, - "y" : { - "scale" : "y", - "field" : "data.y" - }, - "stroke" : { - "scale" : "stroke", - "field" : "data.z" - } - }, - "ggvis" : { - "data" : { - "value" : "df0/regroup1/arrange2" - } - } + "type" : "line", + "properties" : { + "update" : { + "x" : { + "scale" : "x", + "field" : "data.w" + }, + "y" : { + "scale" : "y", + "field" : "data.y" + }, + "stroke" : { + "scale" : "stroke", + "field" : "data.z" + } + }, + "ggvis" : { + "data" : { + "value" : "df0/regroup1/arrange2" } } - ] + }, + "from" : { + "data" : "df0/regroup1/arrange2" + } } ], "width" : 600, diff --git a/tests/specs/line/layer-line.json b/tests/specs/line/layer-line.json index 549fe2a1..ba167246 100644 --- a/tests/specs/line/layer-line.json +++ b/tests/specs/line/layer-line.json @@ -1,7 +1,7 @@ { "data" : [ { - "name" : "df0/regroup1/arrange2_flat", + "name" : "df0/regroup1/arrange2", "format" : { "type" : "csv", "parse" : { @@ -11,18 +11,6 @@ }, "values" : "\"x\",\"y\",\"z\"\n0.0220783790573478,0.605496409814805,\"1\"\n0.39672307879664,0.879450253676623,\"1\"\n0.483322170330212,0.92520075570792,\"1\"\n0.82535388879478,0.175504838116467,\"1\"\n0.0126281732227653,0.154914033133537,\"2\"\n0.592421213164926,0.239515871042386,\"2\"\n0.706804817542434,0.00647905003279448,\"2\"\n0.838725288398564,0.36204455466941,\"2\"\n0.0603655390441418,0.732962509151548,\"3\"\n0.150009069126099,0.574154397239909,\"3\"\n0.192939087515697,0.489878148771822,\"3\"\n0.575029835337773,0.870066102826968,\"3\"" }, - { - "name" : "df0/regroup1/arrange2", - "source" : "df0/regroup1/arrange2_flat", - "transform" : [ - { - "type" : "treefacet", - "keys" : [ - "data.z" - ] - } - ] - }, { "name" : "scale/stroke", "format" : { @@ -89,36 +77,31 @@ ], "marks" : [ { - "type" : "group", - "from" : { - "data" : "df0/regroup1/arrange2" - }, - "marks" : [ - { - "type" : "line", - "properties" : { - "update" : { - "x" : { - "scale" : "x", - "field" : "data.x" - }, - "y" : { - "scale" : "y", - "field" : "data.y" - }, - "stroke" : { - "scale" : "stroke", - "field" : "data.z" - } - }, - "ggvis" : { - "data" : { - "value" : "df0/regroup1/arrange2" - } - } + "type" : "line", + "properties" : { + "update" : { + "x" : { + "scale" : "x", + "field" : "data.x" + }, + "y" : { + "scale" : "y", + "field" : "data.y" + }, + "stroke" : { + "scale" : "stroke", + "field" : "data.z" + } + }, + "ggvis" : { + "data" : { + "value" : "df0/regroup1/arrange2" } } - ] + }, + "from" : { + "data" : "df0/regroup1/arrange2" + } } ], "width" : 600, diff --git a/tests/specs/scales/datetime_hist.json b/tests/specs/scales/datetime_hist.json index c04ff016..7900e07d 100644 --- a/tests/specs/scales/datetime_hist.json +++ b/tests/specs/scales/datetime_hist.json @@ -11,7 +11,7 @@ "stack_lwr_" : "number" } }, - "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n1371388245103,1371477077092,1,0\n1371477077092,1371565909080,0,0\n1371565909080,1371654741069,0,0\n1371654741069,1371743573058,1,0\n1371743573058,1371832405047,0,0\n1371832405047,1371921237036,0,0\n1371921237036,1372010069025,0,0\n1372010069025,1372098901014,1,0\n1372098901014,1372187733003,5,0\n1372187733003,1372276564992,1,0\n1372276564992,1372365396981,5,0\n1372365396981,1372454228970,3,0\n1372454228970,1372543060959,3,0\n1372543060959,1372631892948,2,0\n1372631892948,1372720724937,4,0\n1372720724937,1372809556925,2,0\n1372809556925,1372898388914,2,0\n1372898388914,1372987220903,3,0\n1372987220903,1373076052892,2,0\n1373076052892,1373164884881,2,0\n1373164884881,1373253716870,0,0\n1373253716870,1373342548859,0,0\n1373342548859,1373431380848,0,0\n1373431380848,1373520212837,0,0\n1373520212837,1373609044826,0,0\n1373609044826,1373697876815,1,0\n1373697876815,1373786708804,0,0\n1373786708804,1373875540793,1,0\n1373875540793,1373964372782,0,0\n1373964372782,1374053204771,0,0\n1374053204771,1374142036759,1,0" + "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n1371262258146,1371351090134,0,0\n1371351090134,1371439922123,1,0\n1371439922123,1371528754112,0,0\n1371528754112,1371617586101,0,0\n1371617586101,1371706418090,0,0\n1371706418090,1371795250079,1,0\n1371795250079,1371884082068,0,0\n1371884082068,1371972914057,0,0\n1371972914057,1372061746046,0,0\n1372061746046,1372150578035,4,0\n1372150578035,1372239410024,2,0\n1372239410024,1372328242013,4,0\n1372328242013,1372417074002,4,0\n1372417074002,1372505905991,3,0\n1372505905991,1372594737979,3,0\n1372594737979,1372683569968,2,0\n1372683569968,1372772401957,3,0\n1372772401957,1372861233946,2,0\n1372861233946,1372950065935,3,0\n1372950065935,1373038897924,2,0\n1373038897924,1373127729913,3,0\n1373127729913,1373216561902,0,0\n1373216561902,1373305393891,0,0\n1373305393891,1373394225880,0,0\n1373394225880,1373483057869,0,0\n1373483057869,1373571889858,0,0\n1373571889858,1373660721847,1,0\n1373660721847,1373749553836,0,0\n1373749553836,1373838385824,1,0\n1373838385824,1373927217813,0,0\n1373927217813,1374016049802,0,0\n1374016049802,1374104881791,1,0\n1374104881791,1374193713780,0,0" }, { "name" : "scale/x", @@ -21,7 +21,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n1371250555520\n1374279726342" + "values" : "\"domain\"\n1371115685364\n1374340286562" }, { "name" : "scale/y", @@ -31,7 +31,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0\n5.25" + "values" : "\"domain\"\n0\n4.2" } ], "scales" : [ diff --git a/tests/testthat/test-compute-bin.r b/tests/testthat/test-compute-bin.r index 0edf7e8b..59fc904e 100644 --- a/tests/testthat/test-compute-bin.r +++ b/tests/testthat/test-compute-bin.r @@ -1,37 +1,48 @@ context("compute_bin") test_that("bin_vector preserves dates and times", { - dates <- as.Date("2013-07-01") + 1:100 - res <- bin_vector(dates, binwidth = 30) + dates <- as.Date("2013-06-01") + 0:100 + UTCtimes <- as.POSIXct('2001-06-01 21:00', tz = 'UTC') + seq(0, 1000, by = 10) + NYtimes <- as.POSIXct('2001-06-01 21:00', tz = 'America/New_York') + 0:10 * 100 + + res <- bin_vector(dates, width = 30) expect_true(inherits(res$x_, "Date")) expect_true(inherits(res$xmin_, "Date")) expect_true(inherits(res$xmax_, "Date")) - expect_identical(sum(res$count_), 100) + expect_identical(sum(res$count_), length(dates)) - times <- as.POSIXct('2001-06-11 21:00', tz = 'America/New_York') + 1:10 * 100 - res <- bin_vector(times, binwidth = 120) + res <- bin_vector(NYtimes, width = 120) expect_true(inherits(res$x_, "POSIXct")) expect_true(inherits(res$xmin_, "POSIXct")) expect_true(inherits(res$xmax_, "POSIXct")) - expect_identical(sum(res$count_), 10) - expect_identical(attr(times, "tzone"), attr(res$x_, "tzone")) + expect_identical(sum(res$count_), length(NYtimes)) + expect_identical(attr(NYtimes, "tzone"), attr(res$x_, "tzone")) - times <- as.POSIXct('2001-06-11 21:00', tz = 'UTC') + seq(1, 1000, by = 10) - res <- bin_vector(times, binwidth = 120) - expect_identical(sum(res$count_), 100) - expect_identical(attr(times, "tzone"), attr(res$x_, "tzone")) + res <- bin_vector(UTCtimes, width = 120) + expect_identical(sum(res$count_), length(UTCtimes)) + expect_identical(attr(UTCtimes, "tzone"), attr(res$x_, "tzone")) - # Can set origin - dates <- as.Date("2013-07-01") + 1:100 - res <- bin_vector(dates, binwidth = 30, origin = as.Date("2013-06-01"), - pad = FALSE) - expect_identical(sum(res$count_), 100) + # Can set boundary + res <- bin_vector(dates, width = 30, boundary = as.Date("2013-06-01"), pad = FALSE) + expect_identical(sum(res$count_), length(dates)) expect_identical(res$xmin_[1], as.Date("2013-06-01")) - res <- bin_vector(times, binwidth = 120, - origin = as.POSIXct('2001-06-11 21:00', tz = 'UTC'), + res <- bin_vector(UTCtimes, width = 120, + boundary = as.POSIXct('2001-06-01 21:07', tz = 'UTC'), + pad = FALSE) + expect_identical(sum(res$count_), length(UTCtimes)) + expect_identical(res$xmin_[5], as.POSIXct('2001-06-01 21:07', tz = 'UTC')) + + # Can set center + res <- bin_vector(dates, width = 30, center=as.Date("2013-07-01"), + pad = FALSE) + expect_identical(sum(res$count_), length(dates)) + expect_identical(res$x_[2], as.Date("2013-07-01")) + + res <- bin_vector(UTCtimes, width = 120, + center = as.POSIXct('2001-06-01 21:15', tz = 'UTC'), pad = FALSE) - expect_identical(sum(res$count_), 100) - expect_identical(res$xmin_[1], as.POSIXct('2001-06-11 21:00', tz = 'UTC')) + expect_identical(sum(res$count_), length(UTCtimes)) + expect_identical(res$x_[8], as.POSIXct('2001-06-01 21:15', tz = 'UTC')) }) diff --git a/vignettes/cookbook.Rmd b/vignettes/cookbook.Rmd index 9fcb069b..afb7d115 100644 --- a/vignettes/cookbook.Rmd +++ b/vignettes/cookbook.Rmd @@ -148,18 +148,46 @@ Basic histogram: faithful %>% ggvis(~eruptions) %>% layer_histograms() ``` -Modify the fill color and binwidth: + +The bin selection can be controled by specifying `width` and at most one of `center` or +`boundary` of one of the bins. `boundary` and `center` may be outside the range of +the data. + +```{r, message = FALSE} +faithful %>% ggvis(~eruptions) %>% layer_histograms(width=0.5, boundary=0) +faithful %>% ggvis(~eruptions) %>% layer_histograms(width=0.5, center=0) +``` + +Modify the fill color and bin width, and add titles for the axes, since the automatic titles aren't very informative: + ```{r, message = FALSE} faithful %>% ggvis(~eruptions, fill := "#fff8dc") %>% - layer_histograms(binwidth = 0.25) + layer_histograms(width = 0.25) ``` +By default, when the number of integer values is small, bins will +be centered at integers and have a width of 1: +```{r, message = FALSE} +cocaine %>% ggvis(~month, fill := "#fff8dc") %>% + layer_histograms() %>% + add_axis("x", title = "month") %>% + add_axis("y", title = "count") +``` + +This can be forced with +```{r, message = FALSE} +cocaine %>% ggvis(~month, fill := "#fff8dc") %>% + layer_histograms(width = 1, center = 0) %>% + add_axis("x", title = "month") %>% + add_axis("y", title = "count") +``` ## Box plots ```{r} -# For now, if you use a categ mtc <- mtcars %>% mutate(cyl = factor(cyl)) mtc %>% ggvis(~cyl, ~mpg) %>% layer_boxplots() ``` + + diff --git a/vignettes/ggvis-basics.Rmd b/vignettes/ggvis-basics.Rmd index 48fd00bb..8383d4fd 100644 --- a/vignettes/ggvis-basics.Rmd +++ b/vignettes/ggvis-basics.Rmd @@ -115,12 +115,14 @@ mtcars %>% layer_points() ``` -You can also connect interactive components to other plot parameters like the binwidth of a histogram: +You can also connect interactive components to other plot parameters like the width +and centers of histogram bins: ```{r} mtcars %>% ggvis(~wt) %>% - layer_histograms(binwidth = input_slider(0, 2, step = 0.1)) + layer_histograms(width = input_slider(0, 2, step = 0.10, label = "width"), + center = input_slider(0, 2, step = 0.05, label = "center")) ``` Behind the scenes, interactive plots are built with [shiny](http://www.rstudio.com/shiny/), and you can currently only have one running at a time in a given R session. To finish with a plot, press the stop button in Rstudio, or close the browser window and then press Escape or Ctrl + C in R. diff --git a/vignettes/overview.Rmd b/vignettes/overview.Rmd index 14f968ea..1f5efaae 100644 --- a/vignettes/overview.Rmd +++ b/vignettes/overview.Rmd @@ -37,7 +37,7 @@ Histogram: ```{r, echo = FALSE, fig.width = 4} # Histogram faithful %>% ggvis(~eruptions, fill := "#ffffdd", fill.hover := "#eebbbb") %>% - layer_histograms(binwidth = 0.2) %>% + layer_histograms(width = 0.2) %>% add_axis("x", title = "eruptions") %>% add_axis("y", title = "count") ``` From 5681183995b1c2997b5fa638b9b99ccb77b47b71 Mon Sep 17 00:00:00 2001 From: Randall Pruim Date: Tue, 16 Sep 2014 02:27:03 -0400 Subject: [PATCH 2/8] right = TRUE -> closed = "right" --- R/compute_bin.R | 88 ++++++++++++++++++++++++----------------- R/layer_bins.R | 13 +++--- man/bin_vector.Rd | 10 +++-- man/compute_bin.Rd | 6 +-- man/layer_histograms.Rd | 8 ++-- 5 files changed, 72 insertions(+), 53 deletions(-) diff --git a/R/compute_bin.R b/R/compute_bin.R index 03db9438..6d9695fb 100644 --- a/R/compute_bin.R +++ b/R/compute_bin.R @@ -19,8 +19,8 @@ #' integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside #' the range of the data. At most one of \code{center} and \code{boundary} may be #' specified. -#' @param right Should bins be right-open, left-closed, or -#' right-closed, left-open. +#' @param closed One of \code{"right"} or \code{"left"} indicating whether +#' right or left edges of bins are included in the bin. #' @param pad If \code{TRUE}, adds empty bins at either end of x. This #' ensures frequency polygons touch 0, and adds padding between the data #' and axis for histograms. @@ -44,14 +44,15 @@ #' mtcars %>% ggvis(~ x_, ~ count_) %>% compute_bin(~mpg) %>% layer_paths() compute_bin <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - right = TRUE, pad = TRUE) { + closed = c("right", "left"), pad = TRUE) { UseMethod("compute_bin") } #' @export compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - right = TRUE, pad = TRUE) { + closed = c("right", "left"), pad = TRUE) { + closed <- match.arg(closed) assert_that(is.formula(x_var)) x_val <- eval_vector(x, x_var) @@ -69,37 +70,39 @@ compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, } params <- bin_params(range2(x_val), width = width, center = center, - boundary = boundary, right = right) + boundary = boundary, closed = closed) # note: origin is a boundary, so this works. bin_vector(x_val, weight = w_val, width = params$binwidth, boundary = params$origin, - right = params$right, pad = pad) + closed = params$closed, pad = pad) } #' @export compute_bin.grouped_df <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - right = TRUE, pad = TRUE) { + closed = c("right", "left"), pad = TRUE) { + closed <- match.arg(closed) x_val <- eval_vector(x, x_var) params <- bin_params(range2(x_val), width = width, center = center, - boundary = boundary, right = right) + boundary = boundary, closed = closed) dplyr::do(x, compute_bin(., x_var, w_var = w_var, width = params$binwidth, boundary = params$origin, # origin is a boundary, so this works - right = params$right, + closed = params$closed, pad = pad)) } #' @export compute_bin.ggvis <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - right = TRUE, pad = TRUE) { + closed = c("right", "left"), pad = TRUE) { + closed <- match.arg(closed) args <- list(x_var = x_var, w_var = w_var, width = width, - center = center, boundary = boundary, right = right, pad = pad) + center = center, boundary = boundary, closed = closed, pad = pad) register_computation(x, args, "bin", function(data, args) { output <- do_call(compute_bin, quote(data), .args = args) @@ -110,7 +113,7 @@ compute_bin.ggvis <- function(x, x_var, w_var = NULL, width = NULL, # Compute parameters ----------------------------------------------------------- bin_params <- function(x_range, width = NULL, center = NULL, boundary = NULL, - right = TRUE) { + closed=c("right", "left")) { UseMethod("bin_params") } @@ -133,7 +136,8 @@ compute_origin <- function(x_range, width, boundary) { bin_params.numeric <- function(x_range, width = NULL, center = NULL, boundary = NULL, - right = TRUE) { + closed = c("right", "left")) { + closed <- match.arg(closed) stopifnot(is.numeric(x_range) && length(x_range) == 2) if (!is.null(boundary) && !is.null(center)) { stop( "Only one of 'boundary' and 'center' may be specified." ) @@ -157,14 +161,14 @@ bin_params.numeric <- function(x_range, width = NULL, origin <- compute_origin( x_range, width, boundary ) - list(binwidth = width, origin = origin, right = right) + list(binwidth = width, origin = origin, closed = closed) } #' @export bin_params.POSIXct <- function(x_range, width = NULL, center = NULL, boundary = NULL, - right = TRUE) { - + closed = c("right", "left")) { + closed <- match.arg(closed) if (!is.null(boundary) && !is.null(center)) { stop( "Only one of 'boundary' and 'center' may be specified." ) } @@ -197,7 +201,7 @@ bin_params.POSIXct <- function(x_range, width = NULL, origin <- compute_origin( x_range, width, boundary ) - list(binwidth = width, origin = origin, right = right, + list(binwidth = width, origin = origin, closed = closed, origin.POSIX = structure(origin, class = c("POSIXct", "POSIXt")) ) } @@ -205,8 +209,10 @@ bin_params.POSIXct <- function(x_range, width = NULL, ### check on this -- get center in instead of origin #' @export bin_params.Date <- function(x_range, width = NULL, center = NULL, - boundary = NULL, right = TRUE) { - + boundary = NULL, closed = c("right", "left")) { + + closed <- match.arg(closed) + # convert things to numeric as we go along x_range <- as.numeric(x_range) @@ -232,15 +238,17 @@ bin_params.Date <- function(x_range, width = NULL, center = NULL, # do we need to convert this back to date format? - list(binwidth = width, origin = origin, right = right) + list(binwidth = width, origin = origin, closed = closed) } #' @export bin_params.integer <- function(x_range, width = NULL, center = NULL, boundary = NULL, - right = TRUE) { + closed = c("right", "left")) { + closed <- match.arg(closed) + if (!is.null(boundary) && !is.null(center)) { stop( "Only one of 'boundary' and 'center' may be specified." ) } @@ -262,7 +270,7 @@ bin_params.integer <- function(x_range, width = NULL, origin <- compute_origin( x_range, width, boundary ) - list(binwidth = width, origin = origin, right = right) + list(binwidth = width, origin = origin, closed = closed) } # Bin individual vector -------------------------------------------------------- @@ -279,8 +287,8 @@ bin_params.integer <- function(x_range, width = NULL, #' @param center the center of a bin #' @param boundary the boundary of a bin. \code{center} and \code{boundary} should #' not both be specified. -#' @param right a logical indicating whether the right boundary of a bin is -#' included with the bin. +#' @param closed One of \code{"right"} or \code{"left"} indicating whether +#' right or left edges of bins are included in the bin. #' @param pad a logical indicatign whether the bins should be padded to include #' an empty bin on each side. #' @param ... additional arguments passed through to instances of the generic @@ -293,11 +301,14 @@ bin_vector <- function(x, weight = NULL, ...) { bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, center = NULL, boundary = NULL, - right = TRUE, pad = TRUE) { + closed = c("right", "left"), pad = TRUE) { stopifnot(is.null(width) || (is.numeric(width) && length(width) == 1)) stopifnot(is.null(center) || (is.numeric(center) && length(center) == 1)) stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) - stopifnot(is.flag(right)) + stopifnot(is.character(closed)) + + closed <- match.arg(closed) + if (!is.null(center) && !is.null(boundary)) { stop("Only one of 'center' and 'boundary' may be specified.") } @@ -307,7 +318,7 @@ bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, } stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) - stopifnot(is.flag(right)) + stopifnot(is.character(closed)) if (is.null(weight)) { weight <- rep(1, length(x)) @@ -315,12 +326,12 @@ bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, weight[is.na(weight)] <- 0 } - params <- bin_params(range(x), width, center, boundary, right) + params <- bin_params(range(x), width, center, boundary, closed) breaks <- seq(params$origin, max(x) + params$binwidth, params$binwidth) - fuzzybreaks <- adjust_breaks(breaks, open = if (params$right) "right" else "left") + fuzzybreaks <- adjust_breaks(breaks, closed = params$closed) - bins <- cut(x, fuzzybreaks, include.lowest = TRUE, right = params$right) + bins <- cut(x, fuzzybreaks, include.lowest = TRUE, right = params$closed == "right") left <- breaks[-length(breaks)] right <- breaks[-1] x <- (left + right) / 2 @@ -343,8 +354,9 @@ bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, center = NULL, boundary = NULL, - right = TRUE, pad=TRUE) { + closed = c("right", "left"), pad=TRUE) { + closed <- match.arg(closed) # Convert times to raw numbers (seconds since UNIX epoch), and call bin.numeric if (inherits(width, "Period")) width <- as.numeric(as.difftime(width, units = "secs")) if (!is.null(width)) width <- as.numeric(width) @@ -352,7 +364,7 @@ bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, boundary <- if (!is.null(boundary)) boundary <- as.numeric(boundary) results <- bin_vector(as.numeric(x), weight = weight, width = width, - center = center, boundary = boundary, right = right, pad=pad) + center = center, boundary = boundary, closed = closed, pad=pad) # Convert some columns from numeric back to POSIXct objects tz <- attr(x, "tzone", TRUE) @@ -366,8 +378,10 @@ bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, #' @export bin_vector.Date <- function(x, weight = NULL, ..., width = NULL, center=NULL, - boundary = NULL, right = TRUE, pad = TRUE) { + boundary = NULL, closed = c("right", "left"), pad = TRUE) { + closed <- match.arg(closed) + # Convert times to raw numbers, and call bin_vector.numeric if (!is.null(width)) @@ -380,7 +394,7 @@ bin_vector.Date <- function(x, weight = NULL, ..., width = NULL, center=NULL, results <- bin_vector(as.numeric(x), weight = weight, width = width, center=center, boundary = boundary, - right = right, pad = pad) + closed = closed, pad = pad) # Convert some columns from numeric back to Date objects time_cols <- c("x_", "xmin_", "xmax_") @@ -411,11 +425,11 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), # Adapt break fuzziness from base::hist - this protects from floating # point rounding errors -adjust_breaks <- function(breaks, open = "right") { - open <- match.arg(open, c("left", "right")) +adjust_breaks <- function(breaks, closed = "right") { + closed <- match.arg(closed, c("right", "left")) diddle <- 1e-08 * median(diff(breaks)) - if (open == "left") { + if (closed == "right") { fuzz <- c(-diddle, rep.int(diddle, length(breaks) - 1)) } else { fuzz <- c(rep.int(-diddle, length(breaks) - 1), diddle) diff --git a/R/layer_bins.R b/R/layer_bins.R index d4c07e4f..6bd9cff8 100644 --- a/R/layer_bins.R +++ b/R/layer_bins.R @@ -23,8 +23,9 @@ #' mtcars %>% ggvis(~mpg, stroke = ~factor(cyl)) %>% group_by(cyl) %>% #' layer_freqpolys(width = 2) layer_histograms <- function(vis, ..., width = NULL, center = NULL, - boundary = NULL, right = TRUE, stack = TRUE) + boundary = NULL, closed = c("right", "left"), stack = TRUE) { + closed <- match.arg(closed) new_props <- merge_props(cur_props(vis), props(...)) check_unsupported_props(new_props, c("x", "y", "x2", "y2"), @@ -39,7 +40,7 @@ layer_histograms <- function(vis, ..., width = NULL, center = NULL, layer_f(vis, function(x) { x <- compute_bin(x, x_var, width = width, center = center, - boundary = boundary, right = right) + boundary = boundary, closed = closed) if (stack) { x <- compute_stack(x, stack_var = ~count_, group_var = ~x_) @@ -64,7 +65,9 @@ layer_histograms <- function(vis, ..., width = NULL, center = NULL, #' @rdname layer_histograms #' @export layer_freqpolys <- function(vis, ..., width = NULL, center = NULL, boundary = NULL, - right = TRUE) { + closed = c("right", "left")) { + closed <- match.arg(closed) + new_props <- merge_props(cur_props(vis), props(...)) check_unsupported_props(new_props, c("x", "y"), @@ -78,11 +81,11 @@ layer_freqpolys <- function(vis, ..., width = NULL, center = NULL, boundary = NU params <- bin_params(range(x_val, na.rm = TRUE), width = value(width), center = value(center), boundary = value(boundary), - right = value(right)) + closed = value(closed)) layer_f(vis, function(x) { x <- compute_bin(x, x_var, width = params$binwidth, - boundary = params$origin, right = params$right) + boundary = params$origin, closed = params$closed) path_props <- merge_props(new_props, props(x = ~x_, y = ~count_)) x <- emit_paths(x, path_props) diff --git a/man/bin_vector.Rd b/man/bin_vector.Rd index 5e7eb0a8..ac1e9e89 100644 --- a/man/bin_vector.Rd +++ b/man/bin_vector.Rd @@ -8,10 +8,12 @@ bin_vector(x, weight = NULL, ...) \method{bin_vector}{numeric}(x, weight = NULL, ..., width = NULL, - center = NULL, boundary = NULL, right = TRUE, pad = TRUE) + center = NULL, boundary = NULL, closed = c("right", "left"), + pad = TRUE) \method{bin_vector}{POSIXct}(x, weight = NULL, ..., width = NULL, - center = NULL, boundary = NULL, right = TRUE, pad = TRUE) + center = NULL, boundary = NULL, closed = c("right", "left"), + pad = TRUE) } \arguments{ \item{x}{a vector to bin} @@ -28,8 +30,8 @@ representing the number of occurances of each value in \code{x}} \item{boundary}{the boundary of a bin. \code{center} and \code{boundary} should not both be specified.} -\item{right}{a logical indicating whether the right boundary of a bin is -included with the bin.} +\item{closed}{One of \code{"right"} or \code{"left"} indicating whether +right or left edges of bins are included in the bin.} \item{pad}{a logical indicatign whether the bins should be padded to include an empty bin on each side.} diff --git a/man/compute_bin.Rd b/man/compute_bin.Rd index 9d9930be..fa348052 100644 --- a/man/compute_bin.Rd +++ b/man/compute_bin.Rd @@ -4,7 +4,7 @@ \title{Bin data along a continuous variable} \usage{ compute_bin(x, x_var, w_var = NULL, width = NULL, center = NULL, - boundary = NULL, right = TRUE, pad = TRUE) + boundary = NULL, closed = c("right", "left"), pad = TRUE) } \arguments{ \item{x}{Dataset-like object to bin. Built-in methods for data frames, @@ -31,8 +31,8 @@ integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is ou the range of the data. At most one of \code{center} and \code{boundary} may be specified.} -\item{right}{Should bins be right-open, left-closed, or -right-closed, left-open.} +\item{closed}{One of \code{"right"} or \code{"left"} indicating whether +right or left edges of bins are included in the bin.} \item{pad}{If \code{TRUE}, adds empty bins at either end of x. This ensures frequency polygons touch 0, and adds padding between the data diff --git a/man/layer_histograms.Rd b/man/layer_histograms.Rd index ad6a94cd..50d11a16 100644 --- a/man/layer_histograms.Rd +++ b/man/layer_histograms.Rd @@ -5,10 +5,10 @@ \title{Display binned data} \usage{ layer_histograms(vis, ..., width = NULL, center = NULL, boundary = NULL, - right = TRUE, stack = TRUE) + closed = c("right", "left"), stack = TRUE) layer_freqpolys(vis, ..., width = NULL, center = NULL, boundary = NULL, - right = TRUE) + closed = c("right", "left")) } \arguments{ \item{vis}{Visualisation to modify} @@ -33,8 +33,8 @@ integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is ou the range of the data. At most one of \code{center} and \code{boundary} may be specified.} -\item{right}{Should bins be right-open, left-closed, or -right-closed, left-open.} +\item{closed}{One of \code{"right"} or \code{"left"} indicating whether +right or left edges of bins are included in the bin.} \item{stack}{If \code{TRUE}, will automatically stack overlapping bars.} } From e644c25a870b87ede6ea95f4bc6adbede597267e Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 17 Sep 2014 14:18:51 -0500 Subject: [PATCH 3/8] Revert extraneous changes and style fixes --- .gitignore | 1 - NAMESPACE | 3 +- R/compute_bin.R | 155 ++++++++++----------- R/layer_bins.R | 4 +- man/bin_vector.Rd | 11 +- tests/specs/layer/freqpoly-grouped.json | 6 +- tests/specs/layer/histogram.json | 6 +- tests/specs/line/layer-line-nominal-x.json | 76 +++++----- tests/specs/line/layer-line.json | 73 ++++++---- tests/specs/scales/datetime_hist.json | 6 +- tests/testthat/test-compute-bin.r | 3 +- 11 files changed, 172 insertions(+), 172 deletions(-) diff --git a/.gitignore b/.gitignore index c834446e..807ea251 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ .Rproj.user .Rhistory .RData -*~ diff --git a/NAMESPACE b/NAMESPACE index 3714b378..5b7c8024 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,6 +71,7 @@ S3method(extract_reactives,default) S3method(extract_reactives,ggvis_props) S3method(extract_reactives,prop_reactive) S3method(filter,ggvis) +S3method(filter,reactive) S3method(format,ggvis_axis) S3method(format,ggvis_legend) S3method(format,ggvis_props) @@ -179,7 +180,6 @@ export(arrange.reactive) export(auto_group) export(axis_props) export(band) -export(bin_vector) export(bind_shiny) export(bind_shiny_ui) export(compute_align) @@ -205,7 +205,6 @@ export(explain) export(export_png) export(export_svg) export(filter) -export(filter.reactive) export(fullseq) export(get_data) export(ggvis) diff --git a/R/compute_bin.R b/R/compute_bin.R index 6d9695fb..9d342f14 100644 --- a/R/compute_bin.R +++ b/R/compute_bin.R @@ -4,26 +4,26 @@ #' grouped data frames and ggvis visualisations. #' @param x_var,w_var Names of x and weight variables. The x variable must be #' continuous. -#' @param width The width of the bins. The default is \code{NULL}, which -#' yields 30 bins that cover the range of the data. You should always override -#' this value, exploring multiple widths to find the best to illustrate the -#' stories in your data. -#' @param center The center of one of the bins. Note that if center is above or below -#' the range of the data, things will be shifted by an appropriate number of \code{width}s. -#' To center on integers, -#' for example, use \code{width = 1} and \code{center = 0}, even if \code{0} is -#' outside the range of the data. At most one of \code{center} and \code{boundary} may be +#' @param width The width of the bins. The default is \code{NULL}, which yields +#' 30 bins that cover the range of the data. You should always override this +#' value, exploring multiple widths to find the best to illustrate the stories +#' in your data. +#' @param center The center of one of the bins. Note that if center is above or +#' below the range of the data, things will be shifted by an appropriate +#' number of \code{width}s. To center on integers, for example, use +#' \code{width=1} and \code{center=0}, even if \code{0} is outside the range +#' of the data. At most one of \code{center} and \code{boundary} may be #' specified. -#' @param boundary A boundary between two bins. As with \code{center}, things are shifted -#' when \code{boundary} is outside the range of the data. For example, to center on -#' integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside -#' the range of the data. At most one of \code{center} and \code{boundary} may be -#' specified. -#' @param closed One of \code{"right"} or \code{"left"} indicating whether -#' right or left edges of bins are included in the bin. -#' @param pad If \code{TRUE}, adds empty bins at either end of x. This -#' ensures frequency polygons touch 0, and adds padding between the data -#' and axis for histograms. +#' @param boundary A boundary between two bins. As with \code{center}, things +#' are shifted when \code{boundary} is outside the range of the data. For +#' example, to center on integers, use \code{width = 1} and \code{boundary = +#' 0.5}, even if \code{1} is outside the range of the data. At most one of +#' \code{center} and \code{boundary} may be specified. +#' @param closed One of \code{"right"} or \code{"left"} indicating whether right +#' or left edges of bins are included in the bin. +#' @param pad If \code{TRUE}, adds empty bins at either end of x. This ensures +#' frequency polygons touch 0, and adds padding between the data and axis for +#' histograms. #' @seealso \code{\link{compute_count}} For counting cases at specific locations #' of a continuous variable. This is useful when the variable is continuous #' but the data is granular. @@ -72,9 +72,8 @@ compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, params <- bin_params(range2(x_val), width = width, center = center, boundary = boundary, closed = closed) - # note: origin is a boundary, so this works. - bin_vector(x_val, weight = w_val, width = params$binwidth, boundary = params$origin, - closed = params$closed, pad = pad) + bin_vector(x_val, weight = w_val, width = params$binwidth, + boundary = params$origin, closed = params$closed, pad = pad) } #' @export @@ -113,34 +112,17 @@ compute_bin.ggvis <- function(x, x_var, w_var = NULL, width = NULL, # Compute parameters ----------------------------------------------------------- bin_params <- function(x_range, width = NULL, center = NULL, boundary = NULL, - closed=c("right", "left")) { + closed = c("right", "left")) { UseMethod("bin_params") } -# compute origin from x_range and width -tilelayer_origin <- function(x_range, width) { - stopifnot(is.numeric(x_range) && length(x_range) == 2) - stopifnot(is.numeric(width) && length(width) == 1) - num_central_bins <- trunc(diff(x_range) / width) - 1 - side_width <- (diff(x_range) - num_central_bins * width) / 2 # width of partial tiles on either side - x_range[1] + side_width - width - # adjust_breaks should be called to handle any round-off fuzziness issues -} - -compute_origin <- function(x_range, width, boundary) { - shift <- floor( (x_range[1] - boundary) / width ) - boundary + shift * width -} - #' @export -bin_params.numeric <- function(x_range, width = NULL, - center = NULL, - boundary = NULL, - closed = c("right", "left")) { +bin_params.numeric <- function(x_range, width = NULL, center = NULL, + boundary = NULL, closed = c("right", "left")) { closed <- match.arg(closed) stopifnot(is.numeric(x_range) && length(x_range) == 2) if (!is.null(boundary) && !is.null(center)) { - stop( "Only one of 'boundary' and 'center' may be specified." ) + stop("Only one of 'boundary' and 'center' may be specified.") } if (is.null(width)) { @@ -150,7 +132,6 @@ bin_params.numeric <- function(x_range, width = NULL, # if neither edge nor center given, compute both using tile layer's algorithm # this puts min and max of data in outer half of their bins. - if (is.null(boundary) && is.null(center)) { boundary <- tilelayer_origin(x_range, width) # center <- boundary + width / 2 @@ -159,18 +140,17 @@ bin_params.numeric <- function(x_range, width = NULL, # if center given but not boundary, compute boundary from center if (is.null(boundary)) boundary <- center - width / 2 - origin <- compute_origin( x_range, width, boundary ) + origin <- compute_origin(x_range, width, boundary) list(binwidth = width, origin = origin, closed = closed) } #' @export -bin_params.POSIXct <- function(x_range, width = NULL, - center = NULL, boundary = NULL, - closed = c("right", "left")) { +bin_params.POSIXct <- function(x_range, width = NULL, center = NULL, + boundary = NULL, closed = c("right", "left")) { closed <- match.arg(closed) if (!is.null(boundary) && !is.null(center)) { - stop( "Only one of 'boundary' and 'center' may be specified." ) + stop("Only one of 'boundary' and 'center' may be specified.") } x_range <- as.numeric(x_range) @@ -199,22 +179,20 @@ bin_params.POSIXct <- function(x_range, width = NULL, # if we have center but not boundary, compute boundary if (is.null(boundary)) boundary <- center - width / 2 - origin <- compute_origin( x_range, width, boundary ) + origin <- compute_origin(x_range, width, boundary) list(binwidth = width, origin = origin, closed = closed, origin.POSIX = structure(origin, class = c("POSIXct", "POSIXt")) ) } -### check on this -- get center in instead of origin #' @export bin_params.Date <- function(x_range, width = NULL, center = NULL, boundary = NULL, closed = c("right", "left")) { - + closed <- match.arg(closed) - - # convert things to numeric as we go along + # convert things to numeric as we go along x_range <- as.numeric(x_range) if (is.null(width)) { @@ -234,10 +212,9 @@ bin_params.Date <- function(x_range, width = NULL, center = NULL, boundary <- center - width / 2 } - origin <- compute_origin( x_range, width, boundary ) + origin <- compute_origin(x_range, width, boundary) # do we need to convert this back to date format? - list(binwidth = width, origin = origin, closed = closed) } @@ -248,16 +225,16 @@ bin_params.integer <- function(x_range, width = NULL, closed = c("right", "left")) { closed <- match.arg(closed) - + if (!is.null(boundary) && !is.null(center)) { - stop( "Only one of 'boundary' and 'center' may be specified." ) + stop("Only one of 'boundary' and 'center' may be specified.") } if (is.null(width)) { width <- max(pretty(round(diff(x_range) / 30))) - if (width <= 1) width <- 1 - num_bins <- ceiling( diff(x_range) / width ) - notify_guess(width, paste0("approximately range/", num_bins) ) + if (width < 1) width <- 1 + num_bins <- ceiling(diff(x_range) / width) + notify_guess(width, paste0("approximately range/", num_bins)) } if (is.null(boundary) && is.null(center)) { @@ -268,18 +245,33 @@ bin_params.integer <- function(x_range, width = NULL, # if we have center but not boundary, compute boundary if (is.null(boundary)) boundary <- center - width / 2 - origin <- compute_origin( x_range, width, boundary ) + origin <- compute_origin(x_range, width, boundary) list(binwidth = width, origin = origin, closed = closed) } +# compute origin from x_range and width +tilelayer_origin <- function(x_range, width) { + stopifnot(is.numeric(x_range) && length(x_range) == 2) + stopifnot(is.numeric(width) && length(width) == 1) + num_central_bins <- trunc(diff(x_range) / width) - 1 + # width of partial tiles on either side + side_width <- (diff(x_range) - num_central_bins * width) / 2 + x_range[1] + side_width - width + # adjust_breaks should be called to handle any round-off fuzziness issues +} + +compute_origin <- function(x_range, width, boundary) { + shift <- floor((x_range[1] - boundary) / width) + boundary + shift * width +} + # Bin individual vector -------------------------------------------------------- #' Bin vectors #' #' A generic and several implementations for binning vectors. #' -#' @export #' @param x a vector to bin #' @param weight if specified, an integer vector of the same length as \code{x} #' representing the number of occurances of each value in \code{x} @@ -287,28 +279,27 @@ bin_params.integer <- function(x_range, width = NULL, #' @param center the center of a bin #' @param boundary the boundary of a bin. \code{center} and \code{boundary} should #' not both be specified. -#' @param closed One of \code{"right"} or \code{"left"} indicating whether +#' @param closed One of \code{"right"} or \code{"left"} indicating whether #' right or left edges of bins are included in the bin. #' @param pad a logical indicatign whether the bins should be padded to include #' an empty bin on each side. #' @param ... additional arguments passed through to instances of the generic +#' @keywords internal bin_vector <- function(x, weight = NULL, ...) { UseMethod("bin_vector") } -#' @rdname bin_vector #' @export - bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), pad = TRUE) { + stopifnot(is.null(width) || (is.numeric(width) && length(width) == 1)) stopifnot(is.null(center) || (is.numeric(center) && length(center) == 1)) stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) - stopifnot(is.character(closed)) - + closed <- match.arg(closed) - + if (!is.null(center) && !is.null(boundary)) { stop("Only one of 'center' and 'boundary' may be specified.") } @@ -349,9 +340,7 @@ bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, bin_out(count, x, bin_widths) } -#' @rdname bin_vector #' @export - bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), pad=TRUE) { @@ -360,11 +349,12 @@ bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, # Convert times to raw numbers (seconds since UNIX epoch), and call bin.numeric if (inherits(width, "Period")) width <- as.numeric(as.difftime(width, units = "secs")) if (!is.null(width)) width <- as.numeric(width) - center <- if (!is.null(center)) center <- as.numeric(center) - boundary <- if (!is.null(boundary)) boundary <- as.numeric(boundary) + if (!is.null(center)) center <- as.numeric(center) + if (!is.null(boundary)) boundary <- as.numeric(boundary) results <- bin_vector(as.numeric(x), weight = weight, width = width, - center = center, boundary = boundary, closed = closed, pad=pad) + center = center, boundary = boundary, closed = closed, + pad = pad) # Convert some columns from numeric back to POSIXct objects tz <- attr(x, "tzone", TRUE) @@ -381,19 +371,14 @@ bin_vector.Date <- function(x, weight = NULL, ..., width = NULL, center=NULL, boundary = NULL, closed = c("right", "left"), pad = TRUE) { closed <- match.arg(closed) - - # Convert times to raw numbers, and call bin_vector.numeric - if (!is.null(width)) - width <- as.numeric(width) - if (!is.null(center)) - center<- as.numeric(center) - if (!is.null(boundary)) - boundary <- as.numeric(boundary) + # Convert times to raw numbers, and call bin_vector.numeric + if (!is.null(width)) width <- as.numeric(width) + if (!is.null(center)) center<- as.numeric(center) + if (!is.null(boundary)) boundary <- as.numeric(boundary) - results <- bin_vector(as.numeric(x), weight = weight, - width = width, - center=center, boundary = boundary, + results <- bin_vector(as.numeric(x), weight = weight, width = width, + center = center, boundary = boundary, closed = closed, pad = pad) # Convert some columns from numeric back to Date objects diff --git a/R/layer_bins.R b/R/layer_bins.R index 6bd9cff8..64d25d5f 100644 --- a/R/layer_bins.R +++ b/R/layer_bins.R @@ -23,8 +23,8 @@ #' mtcars %>% ggvis(~mpg, stroke = ~factor(cyl)) %>% group_by(cyl) %>% #' layer_freqpolys(width = 2) layer_histograms <- function(vis, ..., width = NULL, center = NULL, - boundary = NULL, closed = c("right", "left"), stack = TRUE) -{ + boundary = NULL, closed = c("right", "left"), + stack = TRUE) { closed <- match.arg(closed) new_props <- merge_props(cur_props(vis), props(...)) diff --git a/man/bin_vector.Rd b/man/bin_vector.Rd index ac1e9e89..f02c0bb3 100644 --- a/man/bin_vector.Rd +++ b/man/bin_vector.Rd @@ -1,19 +1,9 @@ % Generated by roxygen2 (4.0.2): do not edit by hand \name{bin_vector} \alias{bin_vector} -\alias{bin_vector.POSIXct} -\alias{bin_vector.numeric} \title{Bin vectors} \usage{ bin_vector(x, weight = NULL, ...) - -\method{bin_vector}{numeric}(x, weight = NULL, ..., width = NULL, - center = NULL, boundary = NULL, closed = c("right", "left"), - pad = TRUE) - -\method{bin_vector}{POSIXct}(x, weight = NULL, ..., width = NULL, - center = NULL, boundary = NULL, closed = c("right", "left"), - pad = TRUE) } \arguments{ \item{x}{a vector to bin} @@ -39,4 +29,5 @@ an empty bin on each side.} \description{ A generic and several implementations for binning vectors. } +\keyword{internal} diff --git a/tests/specs/layer/freqpoly-grouped.json b/tests/specs/layer/freqpoly-grouped.json index 1223893e..f9adb553 100644 --- a/tests/specs/layer/freqpoly-grouped.json +++ b/tests/specs/layer/freqpoly-grouped.json @@ -10,7 +10,7 @@ "count_" : "number" } }, - "values" : "\"cyl\",\"x_\",\"count_\"\n4,0.9685,0\n4,1.9685,8\n4,2.9685,3\n4,3.9685,0\n6,1.9685,0\n6,2.9685,7\n6,3.9685,0\n8,1.9685,0\n8,2.9685,3\n8,3.9685,8\n8,4.9685,3\n8,5.9685,0" + "values" : "\"cyl\",\"x_\",\"count_\"\n4,0.5,0\n4,1.5,4\n4,2.5,5\n4,3.5,2\n4,4.5,0\n6,0.5,0\n6,1.5,0\n6,2.5,3\n6,3.5,4\n6,4.5,0\n8,0.5,0\n8,1.5,0\n8,2.5,0\n8,3.5,10\n8,4.5,1\n8,5.5,3\n8,6.5,0" }, { "name" : "mtcars0/regroup1/bin2", @@ -42,7 +42,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0.7185\n6.2185" + "values" : "\"domain\"\n0.2\n6.8" }, { "name" : "scale/y", @@ -52,7 +52,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n-0.4\n8.4" + "values" : "\"domain\"\n-0.5\n10.5" } ], "scales" : [ diff --git a/tests/specs/layer/histogram.json b/tests/specs/layer/histogram.json index f929810a..a8ed5b8a 100644 --- a/tests/specs/layer/histogram.json +++ b/tests/specs/layer/histogram.json @@ -11,7 +11,7 @@ "stack_lwr_" : "number" } }, - "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n0.4685,1.4685,0,0\n1.4685,2.4685,8,0\n2.4685,3.4685,13,0\n3.4685,4.4685,8,0\n4.4685,5.4685,3,0\n5.4685,6.4685,0,0" + "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n1,2,4,0\n2,3,8,0\n3,4,16,0\n4,5,1,0\n5,6,3,0" }, { "name" : "scale/x", @@ -21,7 +21,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0.1685\n6.7685" + "values" : "\"domain\"\n0.75\n6.25" }, { "name" : "scale/y", @@ -31,7 +31,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0\n13.65" + "values" : "\"domain\"\n0\n16.8" } ], "scales" : [ diff --git a/tests/specs/line/layer-line-nominal-x.json b/tests/specs/line/layer-line-nominal-x.json index a83f2134..9fa3e221 100644 --- a/tests/specs/line/layer-line-nominal-x.json +++ b/tests/specs/line/layer-line-nominal-x.json @@ -1,7 +1,7 @@ { "data" : [ { - "name" : "df0/regroup1/arrange2", + "name" : "df0/regroup1/arrange2_flat", "format" : { "type" : "csv", "parse" : { @@ -10,19 +10,29 @@ }, "values" : "\"w\",\"y\",\"z\"\n\"a\",0.175504838116467,\"1\"\n\"b\",0.92520075570792,\"1\"\n\"c\",0.605496409814805,\"1\"\n\"d\",0.879450253676623,\"1\"\n\"a\",0.36204455466941,\"2\"\n\"b\",0.239515871042386,\"2\"\n\"c\",0.00647905003279448,\"2\"\n\"d\",0.154914033133537,\"2\"\n\"a\",0.574154397239909,\"3\"\n\"b\",0.732962509151548,\"3\"\n\"c\",0.489878148771822,\"3\"\n\"d\",0.870066102826968,\"3\"" }, + { + "name" : "df0/regroup1/arrange2", + "source" : "df0/regroup1/arrange2_flat", + "transform" : [ + { + "type" : "treefacet", + "keys" : [ + "data.z" + ] + } + ] + }, { "name" : "scale/stroke", "format" : { - "type" : "csv", - "parse" : {} + "type" : "csv" }, "values" : "\"domain\"\n\"1\"\n\"2\"\n\"3\"" }, { "name" : "scale/x", "format" : { - "type" : "csv", - "parse" : {} + "type" : "csv" }, "values" : "\"domain\"\n\"a\"\n\"b\"\n\"c\"\n\"d\"" }, @@ -75,31 +85,36 @@ ], "marks" : [ { - "type" : "line", - "properties" : { - "update" : { - "x" : { - "scale" : "x", - "field" : "data.w" - }, - "y" : { - "scale" : "y", - "field" : "data.y" - }, - "stroke" : { - "scale" : "stroke", - "field" : "data.z" - } - }, - "ggvis" : { - "data" : { - "value" : "df0/regroup1/arrange2" - } - } - }, + "type" : "group", "from" : { "data" : "df0/regroup1/arrange2" - } + }, + "marks" : [ + { + "type" : "line", + "properties" : { + "update" : { + "x" : { + "scale" : "x", + "field" : "data.w" + }, + "y" : { + "scale" : "y", + "field" : "data.y" + }, + "stroke" : { + "scale" : "stroke", + "field" : "data.z" + } + }, + "ggvis" : { + "data" : { + "value" : "df0/regroup1/arrange2" + } + } + } + } + ] } ], "width" : 600, @@ -129,16 +144,13 @@ "title" : "y" } ], - "padding" : null, "ggvis_opts" : { "width" : 600, "height" : 400, "keep_aspect" : false, "resizable" : true, - "padding" : {}, "duration" : 250, "renderer" : "svg", "hover_duration" : 0 - }, - "handlers" : null + } } diff --git a/tests/specs/line/layer-line.json b/tests/specs/line/layer-line.json index ba167246..1052e2e4 100644 --- a/tests/specs/line/layer-line.json +++ b/tests/specs/line/layer-line.json @@ -1,7 +1,7 @@ { "data" : [ { - "name" : "df0/regroup1/arrange2", + "name" : "df0/regroup1/arrange2_flat", "format" : { "type" : "csv", "parse" : { @@ -11,11 +11,22 @@ }, "values" : "\"x\",\"y\",\"z\"\n0.0220783790573478,0.605496409814805,\"1\"\n0.39672307879664,0.879450253676623,\"1\"\n0.483322170330212,0.92520075570792,\"1\"\n0.82535388879478,0.175504838116467,\"1\"\n0.0126281732227653,0.154914033133537,\"2\"\n0.592421213164926,0.239515871042386,\"2\"\n0.706804817542434,0.00647905003279448,\"2\"\n0.838725288398564,0.36204455466941,\"2\"\n0.0603655390441418,0.732962509151548,\"3\"\n0.150009069126099,0.574154397239909,\"3\"\n0.192939087515697,0.489878148771822,\"3\"\n0.575029835337773,0.870066102826968,\"3\"" }, + { + "name" : "df0/regroup1/arrange2", + "source" : "df0/regroup1/arrange2_flat", + "transform" : [ + { + "type" : "treefacet", + "keys" : [ + "data.z" + ] + } + ] + }, { "name" : "scale/stroke", "format" : { - "type" : "csv", - "parse" : {} + "type" : "csv" }, "values" : "\"domain\"\n\"1\"\n\"2\"\n\"3\"" }, @@ -77,31 +88,36 @@ ], "marks" : [ { - "type" : "line", - "properties" : { - "update" : { - "x" : { - "scale" : "x", - "field" : "data.x" - }, - "y" : { - "scale" : "y", - "field" : "data.y" - }, - "stroke" : { - "scale" : "stroke", - "field" : "data.z" - } - }, - "ggvis" : { - "data" : { - "value" : "df0/regroup1/arrange2" - } - } - }, + "type" : "group", "from" : { "data" : "df0/regroup1/arrange2" - } + }, + "marks" : [ + { + "type" : "line", + "properties" : { + "update" : { + "x" : { + "scale" : "x", + "field" : "data.x" + }, + "y" : { + "scale" : "y", + "field" : "data.y" + }, + "stroke" : { + "scale" : "stroke", + "field" : "data.z" + } + }, + "ggvis" : { + "data" : { + "value" : "df0/regroup1/arrange2" + } + } + } + } + ] } ], "width" : 600, @@ -131,16 +147,13 @@ "title" : "y" } ], - "padding" : null, "ggvis_opts" : { "width" : 600, "height" : 400, "keep_aspect" : false, "resizable" : true, - "padding" : {}, "duration" : 250, "renderer" : "svg", "hover_duration" : 0 - }, - "handlers" : null + } } diff --git a/tests/specs/scales/datetime_hist.json b/tests/specs/scales/datetime_hist.json index 7900e07d..c04ff016 100644 --- a/tests/specs/scales/datetime_hist.json +++ b/tests/specs/scales/datetime_hist.json @@ -11,7 +11,7 @@ "stack_lwr_" : "number" } }, - "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n1371262258146,1371351090134,0,0\n1371351090134,1371439922123,1,0\n1371439922123,1371528754112,0,0\n1371528754112,1371617586101,0,0\n1371617586101,1371706418090,0,0\n1371706418090,1371795250079,1,0\n1371795250079,1371884082068,0,0\n1371884082068,1371972914057,0,0\n1371972914057,1372061746046,0,0\n1372061746046,1372150578035,4,0\n1372150578035,1372239410024,2,0\n1372239410024,1372328242013,4,0\n1372328242013,1372417074002,4,0\n1372417074002,1372505905991,3,0\n1372505905991,1372594737979,3,0\n1372594737979,1372683569968,2,0\n1372683569968,1372772401957,3,0\n1372772401957,1372861233946,2,0\n1372861233946,1372950065935,3,0\n1372950065935,1373038897924,2,0\n1373038897924,1373127729913,3,0\n1373127729913,1373216561902,0,0\n1373216561902,1373305393891,0,0\n1373305393891,1373394225880,0,0\n1373394225880,1373483057869,0,0\n1373483057869,1373571889858,0,0\n1373571889858,1373660721847,1,0\n1373660721847,1373749553836,0,0\n1373749553836,1373838385824,1,0\n1373838385824,1373927217813,0,0\n1373927217813,1374016049802,0,0\n1374016049802,1374104881791,1,0\n1374104881791,1374193713780,0,0" + "values" : "\"xmin_\",\"xmax_\",\"stack_upr_\",\"stack_lwr_\"\n1371388245103,1371477077092,1,0\n1371477077092,1371565909080,0,0\n1371565909080,1371654741069,0,0\n1371654741069,1371743573058,1,0\n1371743573058,1371832405047,0,0\n1371832405047,1371921237036,0,0\n1371921237036,1372010069025,0,0\n1372010069025,1372098901014,1,0\n1372098901014,1372187733003,5,0\n1372187733003,1372276564992,1,0\n1372276564992,1372365396981,5,0\n1372365396981,1372454228970,3,0\n1372454228970,1372543060959,3,0\n1372543060959,1372631892948,2,0\n1372631892948,1372720724937,4,0\n1372720724937,1372809556925,2,0\n1372809556925,1372898388914,2,0\n1372898388914,1372987220903,3,0\n1372987220903,1373076052892,2,0\n1373076052892,1373164884881,2,0\n1373164884881,1373253716870,0,0\n1373253716870,1373342548859,0,0\n1373342548859,1373431380848,0,0\n1373431380848,1373520212837,0,0\n1373520212837,1373609044826,0,0\n1373609044826,1373697876815,1,0\n1373697876815,1373786708804,0,0\n1373786708804,1373875540793,1,0\n1373875540793,1373964372782,0,0\n1373964372782,1374053204771,0,0\n1374053204771,1374142036759,1,0" }, { "name" : "scale/x", @@ -21,7 +21,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n1371115685364\n1374340286562" + "values" : "\"domain\"\n1371250555520\n1374279726342" }, { "name" : "scale/y", @@ -31,7 +31,7 @@ "domain" : "number" } }, - "values" : "\"domain\"\n0\n4.2" + "values" : "\"domain\"\n0\n5.25" } ], "scales" : [ diff --git a/tests/testthat/test-compute-bin.r b/tests/testthat/test-compute-bin.r index 59fc904e..5d43e092 100644 --- a/tests/testthat/test-compute-bin.r +++ b/tests/testthat/test-compute-bin.r @@ -24,7 +24,8 @@ test_that("bin_vector preserves dates and times", { # Can set boundary - res <- bin_vector(dates, width = 30, boundary = as.Date("2013-06-01"), pad = FALSE) + res <- bin_vector(dates, width = 30, boundary = as.Date("2013-06-01"), + pad = FALSE) expect_identical(sum(res$count_), length(dates)) expect_identical(res$xmin_[1], as.Date("2013-06-01")) From 5c96ef41f4dd6929aa4ec7f1eec07fede278dda5 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 23 Sep 2014 23:41:29 -0500 Subject: [PATCH 4/8] Simplify compute_bin code --- DESCRIPTION | 1 + R/compute_bin.R | 278 +++++++++++++++++++----------------------------- R/layer_bins.R | 2 +- R/utils.R | 6 ++ 4 files changed, 120 insertions(+), 167 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cd7029de..dfd20f26 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Imports: Suggests: MASS, mgcv, + lubridate, testthat (>= 0.8.1), knitr (>= 1.6), rmarkdown diff --git a/R/compute_bin.R b/R/compute_bin.R index 9d342f14..fe8d60f5 100644 --- a/R/compute_bin.R +++ b/R/compute_bin.R @@ -57,6 +57,9 @@ compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, x_val <- eval_vector(x, x_var) + params <- bin_params(range2(x_val), width = width, center = center, + boundary = boundary, closed = closed) + x_na <- is.na(x_val) if (any(x_na)) { message("compute_bin: NA values ignored for binning.") @@ -69,11 +72,8 @@ compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, w_val <- eval_vector(x, w_var) } - params <- bin_params(range2(x_val), width = width, center = center, - boundary = boundary, closed = closed) - - bin_vector(x_val, weight = w_val, width = params$binwidth, - boundary = params$origin, closed = params$closed, pad = pad) + bin_vector(x_val, weight = w_val, width = params$width, + origin = params$origin, closed = params$closed, pad = pad) } #' @export @@ -83,16 +83,21 @@ compute_bin.grouped_df <- function(x, x_var, w_var = NULL, width = NULL, closed <- match.arg(closed) x_val <- eval_vector(x, x_var) + + # We want to use the same boundary and width across groups, so calculate + # bin params here. params <- bin_params(range2(x_val), width = width, center = center, boundary = boundary, closed = closed) dplyr::do(x, compute_bin(., x_var, w_var = w_var, - width = params$binwidth, - boundary = params$origin, # origin is a boundary, so this works + width = params$width, + center = NULL, + boundary = params$origin, closed = params$closed, - pad = pad)) + pad = pad + )) } #' @export @@ -120,116 +125,38 @@ bin_params <- function(x_range, width = NULL, center = NULL, boundary = NULL, bin_params.numeric <- function(x_range, width = NULL, center = NULL, boundary = NULL, closed = c("right", "left")) { closed <- match.arg(closed) - stopifnot(is.numeric(x_range) && length(x_range) == 2) - if (!is.null(boundary) && !is.null(center)) { - stop("Only one of 'boundary' and 'center' may be specified.") - } - - if (is.null(width)) { - width <- diff(x_range) / 30 - notify_guess(width, "range / 30") - } - - # if neither edge nor center given, compute both using tile layer's algorithm - # this puts min and max of data in outer half of their bins. - if (is.null(boundary) && is.null(center)) { - boundary <- tilelayer_origin(x_range, width) - # center <- boundary + width / 2 - } - - # if center given but not boundary, compute boundary from center - if (is.null(boundary)) boundary <- center - width / 2 - - origin <- compute_origin(x_range, width, boundary) - - list(binwidth = width, origin = origin, closed = closed) -} - -#' @export -bin_params.POSIXct <- function(x_range, width = NULL, center = NULL, - boundary = NULL, closed = c("right", "left")) { - closed <- match.arg(closed) + stopifnot(length(x_range) == 2) if (!is.null(boundary) && !is.null(center)) { stop("Only one of 'boundary' and 'center' may be specified.") } - x_range <- as.numeric(x_range) - if (inherits(width, "Period")) { - width <- as.numeric(as.difftime(width, units = "secs")) - } - if (!is.null(width)) width <- as.numeric(width) - if (!is.null(center)) width <- as.numeric(center) - if (!is.null(boundary)) width <- as.numeric(boundary) - - if (is.null(width)) { - width <- diff(x_range) / 30 - notify_guess(width, "range / 30 (in seconds)") - width <- as.numeric(width, units = "secs") - } - - if (is.null(boundary) && is.null(center)) { - boundary <- tilelayer_origin(x_range, width) - center <- boundary + width / 2 - } - - if (!is.numeric(width)) width <- as.numeric(width, units = 'secs') - if (!is.null(center)) center <- as.numeric(center) - if (!is.null(boundary)) boundary<- as.numeric(boundary) - - # if we have center but not boundary, compute boundary - if (is.null(boundary)) boundary <- center - width / 2 - - origin <- compute_origin(x_range, width, boundary) - - list(binwidth = width, origin = origin, closed = closed, - origin.POSIX = structure(origin, class = c("POSIXct", "POSIXt")) - ) -} - -#' @export -bin_params.Date <- function(x_range, width = NULL, center = NULL, - boundary = NULL, closed = c("right", "left")) { - - closed <- match.arg(closed) - - # convert things to numeric as we go along - x_range <- as.numeric(x_range) - if (is.null(width)) { width <- diff(x_range) / 30 notify_guess(width, "range / 30") } - width <- as.numeric(width) - - if (is.null(boundary) && is.null(center)) { - boundary <- tilelayer_origin(x_range, width) - center <- boundary + width / 2 - } - # if we have center but not boundary, compute boundary if (is.null(boundary)) { - center <- as.numeric(center) - boundary <- center - width / 2 + if (is.null(center)) { + # If neither edge nor center given, compute both using tile layer's + # algorithm. This puts min and max of data in outer half of their bins. + boundary <- tilelayer_origin(x_range, width) + + } else { + # If center given but not boundary, compute boundary. + boundary <- center - width / 2 + } } - origin <- compute_origin(x_range, width, boundary) + origin <- find_origin(x_range, width, boundary) - # do we need to convert this back to date format? - list(binwidth = width, origin = origin, closed = closed) + list(width = width, origin = origin, closed = closed) } - #' @export bin_params.integer <- function(x_range, width = NULL, center = NULL, boundary = NULL, closed = c("right", "left")) { - closed <- match.arg(closed) - - if (!is.null(boundary) && !is.null(center)) { - stop("Only one of 'boundary' and 'center' may be specified.") - } - if (is.null(width)) { width <- max(pretty(round(diff(x_range) / 30))) if (width < 1) width <- 1 @@ -237,20 +164,45 @@ bin_params.integer <- function(x_range, width = NULL, notify_guess(width, paste0("approximately range/", num_bins)) } - if (is.null(boundary) && is.null(center)) { - boundary <- tilelayer_origin(x_range, width) - center <- boundary + width / 2 - } + bin_params.numeric(as.numeric(x_range), width, center, boundary, closed) +} - # if we have center but not boundary, compute boundary - if (is.null(boundary)) boundary <- center - width / 2 +#' @export +bin_params.POSIXct <- function(x_range, width = NULL, center = NULL, + boundary = NULL, closed = c("right", "left")) { + if (!is.null(width)) { + # Period object from lubridate package - need lubridate::as.difftime to find + # the correct generic, instead of base::as.difftime. + if (is(width, "Period")) { + width <- as.numeric(lubridate::as.difftime(width, units = "secs")) + + } else { + width <- as.numeric(width, units = "secs") + } + } - origin <- compute_origin(x_range, width, boundary) + bin_params( + as.numeric(x_range), + as_numeric(width), + as_numeric(center), + as_numeric(boundary), + closed + ) +} - list(binwidth = width, origin = origin, closed = closed) +#' @export +bin_params.Date <- function(x_range, width = NULL, center = NULL, + boundary = NULL, closed = c("right", "left")) { + bin_params( + as.numeric(x_range), + as_numeric(width), + as_numeric(center), + as_numeric(boundary), + closed + ) } -# compute origin from x_range and width +# Compute origin from x_range and width tilelayer_origin <- function(x_range, width) { stopifnot(is.numeric(x_range) && length(x_range) == 2) stopifnot(is.numeric(width) && length(width) == 1) @@ -261,7 +213,8 @@ tilelayer_origin <- function(x_range, width) { # adjust_breaks should be called to handle any round-off fuzziness issues } -compute_origin <- function(x_range, width, boundary) { +# Find the left side of left-most bin +find_origin <- function(x_range, width, boundary) { shift <- floor((x_range[1] - boundary) / width) boundary + shift * width } @@ -272,44 +225,32 @@ compute_origin <- function(x_range, width, boundary) { #' #' A generic and several implementations for binning vectors. #' -#' @param x a vector to bin -#' @param weight if specified, an integer vector of the same length as \code{x} +#' @param x A vector to bin +#' @param weight If specified, an integer vector of the same length as \code{x} #' representing the number of occurances of each value in \code{x} -#' @param width the width of a bin -#' @param center the center of a bin -#' @param boundary the boundary of a bin. \code{center} and \code{boundary} should -#' not both be specified. +#' @param width The width of a bin +#' @param origin The left-most value for bins. #' @param closed One of \code{"right"} or \code{"left"} indicating whether #' right or left edges of bins are included in the bin. -#' @param pad a logical indicatign whether the bins should be padded to include +#' @param pad A logical indicating whether the bins should be padded to include #' an empty bin on each side. -#' @param ... additional arguments passed through to instances of the generic +#' @param ... additional arguments passed through to methods. #' @keywords internal bin_vector <- function(x, weight = NULL, ...) { UseMethod("bin_vector") } #' @export -bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, - center = NULL, boundary = NULL, - closed = c("right", "left"), pad = TRUE) { - - stopifnot(is.null(width) || (is.numeric(width) && length(width) == 1)) - stopifnot(is.null(center) || (is.numeric(center) && length(center) == 1)) - stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) - - closed <- match.arg(closed) - - if (!is.null(center) && !is.null(boundary)) { - stop("Only one of 'center' and 'boundary' may be specified.") - } - +bin_vector.numeric <- function(x, weight = NULL, ..., width = 1, + origin = NULL, closed = c("right", "left"), + pad = TRUE) { if (length(na.omit(x)) == 0) { return(bin_out()) } + closed <- match.arg(closed) - stopifnot(is.null(boundary) || (is.numeric(boundary) && length(boundary) == 1)) - stopifnot(is.character(closed)) + stopifnot(is.numeric(width) && length(width) == 1) + stopifnot(is.numeric(origin) && length(origin) == 1) if (is.null(weight)) { weight <- rep(1, length(x)) @@ -317,12 +258,14 @@ bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, weight[is.na(weight)] <- 0 } - params <- bin_params(range(x), width, center, boundary, closed) - - breaks <- seq(params$origin, max(x) + params$binwidth, params$binwidth) - fuzzybreaks <- adjust_breaks(breaks, closed = params$closed) + min_x <- origin + # Small correction factor so that we don't get an extra bin when, for + # example, origin=0, max(x)=20, width=10. + max_x <- max(x) + (1 - 1e-08) * width + breaks <- seq(min_x, max_x, width) + fuzzybreaks <- adjust_breaks(breaks, closed = closed) - bins <- cut(x, fuzzybreaks, include.lowest = TRUE, right = params$closed == "right") + bins <- cut(x, fuzzybreaks, include.lowest = TRUE, right = (closed == "right")) left <- breaks[-length(breaks)] right <- breaks[-1] x <- (left + right) / 2 @@ -333,28 +276,31 @@ bin_vector.numeric <- function(x, weight = NULL, ..., width = NULL, if (pad) { count <- c(0L, count, 0L) - bin_widths <- c(params$binwidth, bin_widths, params$binwidth) - x <- c(x[1] - params$binwidth, x, x[length(x)] + params$binwidth) + bin_widths <- c(width, bin_widths, width) + x <- c(x[1] - width, x, x[length(x)] + width) } bin_out(count, x, bin_widths) } #' @export -bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, - center = NULL, boundary = NULL, - closed = c("right", "left"), pad=TRUE) { +bin_vector.POSIXct <- function(x, weight = NULL, ..., width = 1, + origin = NULL, closed = c("right", "left"), + pad = TRUE) { - closed <- match.arg(closed) - # Convert times to raw numbers (seconds since UNIX epoch), and call bin.numeric - if (inherits(width, "Period")) width <- as.numeric(as.difftime(width, units = "secs")) - if (!is.null(width)) width <- as.numeric(width) - if (!is.null(center)) center <- as.numeric(center) - if (!is.null(boundary)) boundary <- as.numeric(boundary) + # Convert times to raw numbers (seconds since UNIX epoch) + if (is(width, "Period")) { + width <- as.numeric(lubridate::as.difftime(width, units = "secs")) + } - results <- bin_vector(as.numeric(x), weight = weight, width = width, - center = center, boundary = boundary, closed = closed, - pad = pad) + results <- bin_vector( + as.numeric(x), + weight = weight, + width = width, + origin = if (is.null(origin)) NULL else as.numeric(origin), + closed = closed, + pad = pad + ) # Convert some columns from numeric back to POSIXct objects tz <- attr(x, "tzone", TRUE) @@ -367,24 +313,24 @@ bin_vector.POSIXct <- function(x, weight = NULL, ..., width = NULL, } #' @export -bin_vector.Date <- function(x, weight = NULL, ..., width = NULL, center=NULL, - boundary = NULL, closed = c("right", "left"), pad = TRUE) { - - closed <- match.arg(closed) - - # Convert times to raw numbers, and call bin_vector.numeric - if (!is.null(width)) width <- as.numeric(width) - if (!is.null(center)) center<- as.numeric(center) - if (!is.null(boundary)) boundary <- as.numeric(boundary) - - results <- bin_vector(as.numeric(x), weight = weight, width = width, - center = center, boundary = boundary, - closed = closed, pad = pad) +bin_vector.Date <- function(x, weight = NULL, ..., width = 1, + origin = NULL, closed = c("right", "left"), + pad = TRUE) { + + results <- bin_vector( + as.numeric(x), + weight = weight, + width = width, + origin = if (is.null(origin)) NULL else as.numeric(origin), + closed = closed, + pad = pad + ) # Convert some columns from numeric back to Date objects time_cols <- c("x_", "xmin_", "xmax_") results[time_cols] <- lapply(results[time_cols], function(col) { - structure(col, class = "Date") + class(col) <- "Date" + col }) results @@ -410,7 +356,7 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0), # Adapt break fuzziness from base::hist - this protects from floating # point rounding errors -adjust_breaks <- function(breaks, closed = "right") { +adjust_breaks <- function(breaks, closed = "left") { closed <- match.arg(closed, c("right", "left")) diddle <- 1e-08 * median(diff(breaks)) diff --git a/R/layer_bins.R b/R/layer_bins.R index 64d25d5f..a780f19b 100644 --- a/R/layer_bins.R +++ b/R/layer_bins.R @@ -84,7 +84,7 @@ layer_freqpolys <- function(vis, ..., width = NULL, center = NULL, boundary = NU closed = value(closed)) layer_f(vis, function(x) { - x <- compute_bin(x, x_var, width = params$binwidth, + x <- compute_bin(x, x_var, width = params$width, boundary = params$origin, closed = params$closed) path_props <- merge_props(new_props, props(x = ~x_, y = ~count_)) diff --git a/R/utils.R b/R/utils.R index a04cf66c..a917bebc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -228,6 +228,12 @@ vpluck <- function(x, name, type) { vapply(x, `[[`, name, FUN.VALUE = type) } +# Like as.numeric, except that as.numeric(NULL) returns numeric(0), whereas +# as_numeric(NULL) returns NULL. +as_numeric <- function(x) { + if (is.null(x)) NULL + else as.numeric(x) +} deprecated <- function(old, new = NULL, msg = NULL, version = NULL) { text <- paste0( From 542fbf8923cd85cb1296653819eb81c08e408035 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 23 Sep 2014 23:41:44 -0500 Subject: [PATCH 5/8] Revamp compute_bin tests --- tests/testthat/test-compute-bin.r | 185 ++++++++++++++++++++++++------ 1 file changed, 147 insertions(+), 38 deletions(-) diff --git a/tests/testthat/test-compute-bin.r b/tests/testthat/test-compute-bin.r index 5d43e092..6482cc47 100644 --- a/tests/testthat/test-compute-bin.r +++ b/tests/testthat/test-compute-bin.r @@ -1,49 +1,158 @@ context("compute_bin") -test_that("bin_vector preserves dates and times", { - dates <- as.Date("2013-06-01") + 0:100 - UTCtimes <- as.POSIXct('2001-06-01 21:00', tz = 'UTC') + seq(0, 1000, by = 10) - NYtimes <- as.POSIXct('2001-06-01 21:00', tz = 'America/New_York') + 0:10 * 100 +comp_bin <- function(...) { + suppressMessages(compute_bin(...)) +} - res <- bin_vector(dates, width = 30) +test_that("compute_bin preserves dates and times", { + dates <- data.frame(val = as.Date("2013-06-01") + 0:100) + NYtimes <- data.frame( + val = as.POSIXct('2001-06-01 21:00', tz = 'America/New_York') + 0:10 * 100 + ) + UTCtimes <- data.frame( + val = as.POSIXct('2001-06-01 21:00', tz = 'UTC') + seq(0, 1000, by = 10) + ) + + res <- comp_bin(dates, ~val, width = 30) expect_true(inherits(res$x_, "Date")) expect_true(inherits(res$xmin_, "Date")) expect_true(inherits(res$xmax_, "Date")) - expect_identical(sum(res$count_), length(dates)) + expect_identical(sum(res$count_), length(dates$val)) - res <- bin_vector(NYtimes, width = 120) + res <- comp_bin(NYtimes, ~val, width = 120) expect_true(inherits(res$x_, "POSIXct")) expect_true(inherits(res$xmin_, "POSIXct")) expect_true(inherits(res$xmax_, "POSIXct")) - expect_identical(sum(res$count_), length(NYtimes)) - expect_identical(attr(NYtimes, "tzone"), attr(res$x_, "tzone")) - - res <- bin_vector(UTCtimes, width = 120) - expect_identical(sum(res$count_), length(UTCtimes)) - expect_identical(attr(UTCtimes, "tzone"), attr(res$x_, "tzone")) - - - # Can set boundary - res <- bin_vector(dates, width = 30, boundary = as.Date("2013-06-01"), - pad = FALSE) - expect_identical(sum(res$count_), length(dates)) - expect_identical(res$xmin_[1], as.Date("2013-06-01")) - - res <- bin_vector(UTCtimes, width = 120, - boundary = as.POSIXct('2001-06-01 21:07', tz = 'UTC'), - pad = FALSE) - expect_identical(sum(res$count_), length(UTCtimes)) - expect_identical(res$xmin_[5], as.POSIXct('2001-06-01 21:07', tz = 'UTC')) - - # Can set center - res <- bin_vector(dates, width = 30, center=as.Date("2013-07-01"), - pad = FALSE) - expect_identical(sum(res$count_), length(dates)) - expect_identical(res$x_[2], as.Date("2013-07-01")) - - res <- bin_vector(UTCtimes, width = 120, - center = as.POSIXct('2001-06-01 21:15', tz = 'UTC'), - pad = FALSE) - expect_identical(sum(res$count_), length(UTCtimes)) - expect_identical(res$x_[8], as.POSIXct('2001-06-01 21:15', tz = 'UTC')) + expect_identical(sum(res$count_), length(NYtimes$val)) + expect_identical(attr(NYtimes$val, "tzone"), attr(res$x_, "tzone")) + + res <- comp_bin(UTCtimes, ~val, width = 120) + expect_identical(sum(res$count_), length(UTCtimes$val)) + expect_identical(attr(UTCtimes$val, "tzone"), attr(res$x_, "tzone")) +}) + +test_that("width in lubridate::Period", { + UTCtimes <- data.frame( + val = as.POSIXct('2001-06-01 21:00', tz = 'UTC') + seq(0, 1000, by = 10) + ) + + # width specified as a Period from lubridate + expect_identical( + comp_bin(UTCtimes, ~val, width = lubridate::ms("1 42")), + comp_bin(UTCtimes, ~val, width = 102) + ) +}) + +test_that("Closed left or right", { + dat <- data.frame(x = c(0, 10)) + + res <- comp_bin(dat, ~x, width = 10, pad = FALSE) + expect_identical(res$count_, c(1L, 1L)) + res <- comp_bin(dat, ~x, width = 10, boundary = 5, pad = FALSE) + expect_identical(res$count_, c(1L, 1L)) + res <- comp_bin(dat, ~x, width = 10, boundary = 0, pad = FALSE) + expect_identical(res$count_, 2L) + res <- comp_bin(dat, ~x, width = 5, boundary = 0, pad = FALSE) + expect_identical(res$count_, c(1L, 1L)) + + res <- comp_bin(dat, ~x, width = 10, pad = FALSE, closed = "left") + expect_identical(res$count_, c(1L, 1L)) + res <- comp_bin(dat, ~x, width = 10, boundary = 5, pad = FALSE, closed = "left") + expect_identical(res$count_, c(1L, 1L)) + res <- comp_bin(dat, ~x, width = 10, boundary = 0, pad = FALSE, closed = "left") + expect_identical(res$count_, c(2L)) + res <- comp_bin(dat, ~x, width = 5, boundary = 0, pad = FALSE, closed = "left") + expect_identical(res$count_, c(1L, 1L)) +}) + + +test_that("Setting boundary and center", { + # numeric + dat <- data.frame(x = c(0, 30)) + + # Error if both boundary and center are specified + expect_error(comp_bin(dat, ~x, width = 10, bondary = 5, center = 0, pad = FALSE)) + + res <- comp_bin(dat, ~x, width = 10, boundary = 0, pad = FALSE) + expect_identical(res$count, c(1L, 0L, 1L)) + expect_identical(res$xmin_[1], 0) + expect_identical(res$xmax_[3], 30) + + res <- comp_bin(dat, ~x, width = 10, center = 0, pad = FALSE) + expect_identical(res$count, c(1L, 0L, 0L, 1L)) + expect_identical(res$xmin_[1], dat$x[1] - 5) + expect_identical(res$xmax_[4], dat$x[2] + 5) + + + # Date + dat <- data.frame(x = as.Date("2013-06-01") + c(0, 30)) + + res <- comp_bin(dat, ~x, width = 10, boundary = as.Date("2013-06-01"), pad = FALSE) + expect_identical(res$count_, c(1L, 0L, 1L)) + expect_identical(res$xmin_[1], dat$x[1]) + expect_identical(res$xmax_[3], dat$x[2]) + + res <- comp_bin(dat, ~x, width = 10, center = as.Date("2013-06-01"), pad = FALSE) + expect_identical(res$count, c(1L, 0L, 0L, 1L)) + expect_identical(res$xmin_[1], dat$x[1] - 5) + expect_identical(res$xmax_[4], dat$x[2] + 5) + + + # POSIXct + dat <- data.frame( + x = as.POSIXct('2001-06-01 21:00', tz = 'America/New_York') + c(0, 30000) + ) + + res <- comp_bin(dat, ~x, width = 10000, boundary = dat$x[1], pad = FALSE) + expect_identical(res$count_, c(1L, 0L, 1L)) + expect_identical(res$xmin_[1], dat$x[1]) + expect_identical(res$xmax_[3], dat$x[2]) + + res <- comp_bin(dat, ~x, width = 10000, center = dat$x[1], pad = FALSE) + expect_identical(res$count, c(1L, 0L, 0L, 1L)) + expect_identical(res$xmin_[1], dat$x[1] - 5000) + expect_identical(res$xmax_[4], dat$x[2] + 5000) +}) + + +test_that("Automatic width", { + dat <- data.frame( + num = c(0, 25.0), + num2 = c(0, 50.0), + int = c(1L, 25L), + int2 = c(1L, 50L), + date = as.Date("2013-06-01") + c(0, 100), + posixct = as.POSIXct('2001-06-01 21:00', tz = 'UTC') + c(0, 1000) + ) + + # numeric + res <- suppressMessages(compute_bin(dat, ~num)) + # Need to use expect_equal to deal with FP error + expect_equal(res$width_, rep(25/30, length(res$width_))) + res <- suppressMessages(compute_bin(dat, ~num2)) + expect_equal(res$width_, rep(50/30, length(res$width_))) + + # integer + res <- suppressMessages(compute_bin(dat, ~int)) + expect_true(all(res$width_ == 1L)) + res <- suppressMessages(compute_bin(dat, ~int2)) + expect_true(all(res$width_ == 2L)) + + # Date + res <- suppressMessages(compute_bin(dat, ~date)) + expect_equal(res$width_, rep(100/30, length(res$width_))) + + # POSIXct + res <- suppressMessages(compute_bin(dat, ~posixct)) + expect_equal(res$width_, rep(1000/30, length(res$width_))) +}) + + +test_that("Bin boundaries across groups", { + # Bins should be the same across groups + dat <- data.frame(x = c(0:2, 0:2+0.5), g=c('a','a','a', 'b','b','b')) + res <- dat %>% group_by(g) %>% compute_bin(~x, width = 1, pad = FALSE) + expect_identical(range(res$x_[res$g =='a']), range(res$x_[res$g =='b'])) + expect_identical(dplyr::groups(res), list(quote(g))) + expect_identical(res$count_, rep(1L, 6)) }) From bd88dce10f326e685f6a8b344f1bc1ff43cf6cb6 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 23 Sep 2014 23:44:08 -0500 Subject: [PATCH 6/8] Re-document --- man/bin_vector.Rd | 15 ++++++--------- man/compute_bin.Rd | 38 +++++++++++++++++++------------------- man/layer_histograms.Rd | 32 ++++++++++++++++---------------- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/man/bin_vector.Rd b/man/bin_vector.Rd index f02c0bb3..5463d494 100644 --- a/man/bin_vector.Rd +++ b/man/bin_vector.Rd @@ -6,24 +6,21 @@ bin_vector(x, weight = NULL, ...) } \arguments{ -\item{x}{a vector to bin} +\item{x}{A vector to bin} -\item{weight}{if specified, an integer vector of the same length as \code{x} +\item{weight}{If specified, an integer vector of the same length as \code{x} representing the number of occurances of each value in \code{x}} -\item{...}{additional arguments passed through to instances of the generic} +\item{...}{additional arguments passed through to methods.} -\item{width}{the width of a bin} +\item{width}{The width of a bin} -\item{center}{the center of a bin} - -\item{boundary}{the boundary of a bin. \code{center} and \code{boundary} should -not both be specified.} +\item{origin}{The left-most value for bins.} \item{closed}{One of \code{"right"} or \code{"left"} indicating whether right or left edges of bins are included in the bin.} -\item{pad}{a logical indicatign whether the bins should be padded to include +\item{pad}{A logical indicating whether the bins should be padded to include an empty bin on each side.} } \description{ diff --git a/man/compute_bin.Rd b/man/compute_bin.Rd index fa348052..ce195077 100644 --- a/man/compute_bin.Rd +++ b/man/compute_bin.Rd @@ -13,30 +13,30 @@ grouped data frames and ggvis visualisations.} \item{x_var,w_var}{Names of x and weight variables. The x variable must be continuous.} -\item{width}{The width of the bins. The default is \code{NULL}, which -yields 30 bins that cover the range of the data. You should always override -this value, exploring multiple widths to find the best to illustrate the -stories in your data.} +\item{width}{The width of the bins. The default is \code{NULL}, which yields +30 bins that cover the range of the data. You should always override this +value, exploring multiple widths to find the best to illustrate the stories +in your data.} -\item{center}{The center of one of the bins. Note that if center is above or below -the range of the data, things will be shifted by an appropriate number of \code{width}s. -To center on integers, -for example, use \code{width = 1} and \code{center = 0}, even if \code{0} is -outside the range of the data. At most one of \code{center} and \code{boundary} may be +\item{center}{The center of one of the bins. Note that if center is above or +below the range of the data, things will be shifted by an appropriate +number of \code{width}s. To center on integers, for example, use +\code{width=1} and \code{center=0}, even if \code{0} is outside the range +of the data. At most one of \code{center} and \code{boundary} may be specified.} -\item{boundary}{A boundary between two bins. As with \code{center}, things are shifted -when \code{boundary} is outside the range of the data. For example, to center on -integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside -the range of the data. At most one of \code{center} and \code{boundary} may be -specified.} +\item{boundary}{A boundary between two bins. As with \code{center}, things +are shifted when \code{boundary} is outside the range of the data. For +example, to center on integers, use \code{width = 1} and \code{boundary = +0.5}, even if \code{1} is outside the range of the data. At most one of +\code{center} and \code{boundary} may be specified.} -\item{closed}{One of \code{"right"} or \code{"left"} indicating whether -right or left edges of bins are included in the bin.} +\item{closed}{One of \code{"right"} or \code{"left"} indicating whether right +or left edges of bins are included in the bin.} -\item{pad}{If \code{TRUE}, adds empty bins at either end of x. This -ensures frequency polygons touch 0, and adds padding between the data -and axis for histograms.} +\item{pad}{If \code{TRUE}, adds empty bins at either end of x. This ensures +frequency polygons touch 0, and adds padding between the data and axis for +histograms.} } \value{ A data frame with columns: diff --git a/man/layer_histograms.Rd b/man/layer_histograms.Rd index 50d11a16..5f2bf06c 100644 --- a/man/layer_histograms.Rd +++ b/man/layer_histograms.Rd @@ -15,26 +15,26 @@ layer_freqpolys(vis, ..., width = NULL, center = NULL, boundary = NULL, \item{...}{Visual properties used to override defaults.} -\item{width}{The width of the bins. The default is \code{NULL}, which -yields 30 bins that cover the range of the data. You should always override -this value, exploring multiple widths to find the best to illustrate the -stories in your data.} +\item{width}{The width of the bins. The default is \code{NULL}, which yields +30 bins that cover the range of the data. You should always override this +value, exploring multiple widths to find the best to illustrate the stories +in your data.} -\item{center}{The center of one of the bins. Note that if center is above or below -the range of the data, things will be shifted by an appropriate number of \code{width}s. -To center on integers, -for example, use \code{width = 1} and \code{center = 0}, even if \code{0} is -outside the range of the data. At most one of \code{center} and \code{boundary} may be +\item{center}{The center of one of the bins. Note that if center is above or +below the range of the data, things will be shifted by an appropriate +number of \code{width}s. To center on integers, for example, use +\code{width=1} and \code{center=0}, even if \code{0} is outside the range +of the data. At most one of \code{center} and \code{boundary} may be specified.} -\item{boundary}{A boundary between two bins. As with \code{center}, things are shifted -when \code{boundary} is outside the range of the data. For example, to center on -integers, use \code{width = 1} and \code{boundary = 0.5}, even if \code{1} is outside -the range of the data. At most one of \code{center} and \code{boundary} may be -specified.} +\item{boundary}{A boundary between two bins. As with \code{center}, things +are shifted when \code{boundary} is outside the range of the data. For +example, to center on integers, use \code{width = 1} and \code{boundary = +0.5}, even if \code{1} is outside the range of the data. At most one of +\code{center} and \code{boundary} may be specified.} -\item{closed}{One of \code{"right"} or \code{"left"} indicating whether -right or left edges of bins are included in the bin.} +\item{closed}{One of \code{"right"} or \code{"left"} indicating whether right +or left edges of bins are included in the bin.} \item{stack}{If \code{TRUE}, will automatically stack overlapping bars.} } From 7e138ad08540f86d3c64455eddee557d2cc1ace5 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 23 Sep 2014 23:51:30 -0500 Subject: [PATCH 7/8] Deprecate binwidth argument --- R/compute_bin.R | 28 ++++++++++++++++++++++++---- R/layer_bins.R | 14 ++++++++++++-- R/utils.R | 2 +- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/R/compute_bin.R b/R/compute_bin.R index fe8d60f5..250fae13 100644 --- a/R/compute_bin.R +++ b/R/compute_bin.R @@ -44,14 +44,22 @@ #' mtcars %>% ggvis(~ x_, ~ count_) %>% compute_bin(~mpg) %>% layer_paths() compute_bin <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - closed = c("right", "left"), pad = TRUE) { + closed = c("right", "left"), pad = TRUE, + binwidth) { UseMethod("compute_bin") } #' @export compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - closed = c("right", "left"), pad = TRUE) { + closed = c("right", "left"), pad = TRUE, + binwidth) { + + if (!missing(binwidth)) { + width <- binwidth + deprecated("binwidth", "width", version = "0.3.0") + } + closed <- match.arg(closed) assert_that(is.formula(x_var)) @@ -79,7 +87,13 @@ compute_bin.data.frame <- function(x, x_var, w_var = NULL, width = NULL, #' @export compute_bin.grouped_df <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - closed = c("right", "left"), pad = TRUE) { + closed = c("right", "left"), pad = TRUE, + binwidth) { + + if (!missing(binwidth)) { + width <- binwidth + deprecated("binwidth", "width", version = "0.3.0") + } closed <- match.arg(closed) x_val <- eval_vector(x, x_var) @@ -103,7 +117,13 @@ compute_bin.grouped_df <- function(x, x_var, w_var = NULL, width = NULL, #' @export compute_bin.ggvis <- function(x, x_var, w_var = NULL, width = NULL, center = NULL, boundary = NULL, - closed = c("right", "left"), pad = TRUE) { + closed = c("right", "left"), pad = TRUE, + binwidth) { + if (!missing(binwidth)) { + width <- binwidth + deprecated("binwidth", "width", version = "0.3.0") + } + closed <- match.arg(closed) args <- list(x_var = x_var, w_var = w_var, width = width, center = center, boundary = boundary, closed = closed, pad = pad) diff --git a/R/layer_bins.R b/R/layer_bins.R index a780f19b..28eaf073 100644 --- a/R/layer_bins.R +++ b/R/layer_bins.R @@ -24,7 +24,12 @@ #' layer_freqpolys(width = 2) layer_histograms <- function(vis, ..., width = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), - stack = TRUE) { + stack = TRUE, binwidth) { + if (!missing(binwidth)) { + width <- binwidth + deprecated("binwidth", "width", version = "0.3.0") + } + closed <- match.arg(closed) new_props <- merge_props(cur_props(vis), props(...)) @@ -65,7 +70,12 @@ layer_histograms <- function(vis, ..., width = NULL, center = NULL, #' @rdname layer_histograms #' @export layer_freqpolys <- function(vis, ..., width = NULL, center = NULL, boundary = NULL, - closed = c("right", "left")) { + closed = c("right", "left"), binwidth) { + if (!missing(binwidth)) { + width <- binwidth + deprecated("binwidth", "width", version = "0.3.0") + } + closed <- match.arg(closed) new_props <- merge_props(cur_props(vis), props(...)) diff --git a/R/utils.R b/R/utils.R index a917bebc..efd8f955 100644 --- a/R/utils.R +++ b/R/utils.R @@ -242,5 +242,5 @@ deprecated <- function(old, new = NULL, msg = NULL, version = NULL) { msg, if (!is.null(version)) sprintf(" (Last used in version %s)", version) ) - warning(text) + warning(text, call. = FALSE) } From ef55e003580e89679abaa0ae4f26c26f4ca0a2fd Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 23 Sep 2014 23:58:15 -0500 Subject: [PATCH 8/8] Replace remaining 'binwidth' with 'width' --- demo/apps/brush-linked/server.r | 4 ++-- demo/apps/linked-hover/server.r | 4 ++-- demo/histogram.r | 8 ++++---- demo/rmarkdown/interactive_doc.Rmd | 2 +- demo/rmarkdown/linked_brush.Rmd | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/demo/apps/brush-linked/server.r b/demo/apps/brush-linked/server.r index f5468bc8..a8336549 100644 --- a/demo/apps/brush-linked/server.r +++ b/demo/apps/brush-linked/server.r @@ -24,9 +24,9 @@ shinyServer(function(input, output, session) { cocaine %>% ggvis(~potency) %>% - layer_histograms(binwidth = 5, origin = 0) %>% + layer_histograms(width = 5, origin = 0) %>% add_data(cocaine_selected) %>% - layer_histograms(binwidth = 5, origin = 0, fill := "#dd3333") %>% + layer_histograms(width = 5, origin = 0, fill := "#dd3333") %>% set_options(width = 300, height = 300) %>% bind_shiny("plot2") }) diff --git a/demo/apps/linked-hover/server.r b/demo/apps/linked-hover/server.r index d2bc6ba0..bbddf578 100644 --- a/demo/apps/linked-hover/server.r +++ b/demo/apps/linked-hover/server.r @@ -6,7 +6,7 @@ shinyServer(function(input, output, session) { values <- reactiveValues(selected = rep(TRUE, nrow(diamonds))) diamonds %>% ggvis(~carat) %>% - layer_histograms(fill.hover := "red", binwidth = 0.1) %>% + layer_histograms(fill.hover := "red", width = 0.1) %>% handle_hover(function(data, ...) { values$selected <- diamonds$carat >= data$xmin_ & diamonds$carat < data$xmax_ @@ -17,7 +17,7 @@ shinyServer(function(input, output, session) { # Sub-histogram reactive(diamonds[values$selected, , drop = FALSE]) %>% ggvis(~carat) %>% - layer_histograms(binwidth = 0.01) %>% + layer_histograms(width = 0.01) %>% set_options(width = 400, height = 200) %>% bind_shiny("plot2") diff --git a/demo/histogram.r b/demo/histogram.r index 75aca7f9..a94850a1 100644 --- a/demo/histogram.r +++ b/demo/histogram.r @@ -2,17 +2,17 @@ library(ggvis) # Histogram, fully specified mtcars %>% ggvis(x = ~wt) %>% - compute_bin(~wt, binwidth = 1, pad = FALSE) %>% + compute_bin(~wt, width = 1, pad = FALSE) %>% layer_rects(x = ~xmin_, x2 = ~xmax_, y = ~count_, y2 = 0) # Or using shorthand layer mtcars %>% ggvis(x = ~wt) %>% layer_histograms() -mtcars %>% ggvis(x = ~wt) %>% layer_histograms(binwidth = 1) +mtcars %>% ggvis(x = ~wt) %>% layer_histograms(width = 1) # Histogram, filled by cyl mtcars %>% ggvis(x = ~wt, fill = ~factor(cyl)) %>% group_by(cyl) %>% - layer_histograms(binwidth = 1) + layer_histograms(width = 1) # Bigger dataset data(diamonds, package = "ggplot2") @@ -21,7 +21,7 @@ diamonds %>% ggvis(x = ~table) %>% layer_histograms() # Stacked histogram diamonds %>% ggvis(x = ~table, fill = ~cut) %>% group_by(cut) %>% - layer_histograms(binwidth = 1) + layer_histograms(width = 1) # Histogram of dates set.seed(2934) diff --git a/demo/rmarkdown/interactive_doc.Rmd b/demo/rmarkdown/interactive_doc.Rmd index 6ac55ad6..9d63b5f7 100644 --- a/demo/rmarkdown/interactive_doc.Rmd +++ b/demo/rmarkdown/interactive_doc.Rmd @@ -16,7 +16,7 @@ An interactive plot: ```{r, message = FALSE, fig.width = 6, fig.height = 4} cocaine %>% ggvis(x = ~potency) %>% - layer_histograms(binwidth = input_slider(1, 20, value = 5)) + layer_histograms(width = input_slider(1, 20, value = 5)) ``` diff --git a/demo/rmarkdown/linked_brush.Rmd b/demo/rmarkdown/linked_brush.Rmd index 9dc8a47c..abfdcaf5 100644 --- a/demo/rmarkdown/linked_brush.Rmd +++ b/demo/rmarkdown/linked_brush.Rmd @@ -31,9 +31,9 @@ cocaine_selected <- reactive({ cocaine %>% ggvis(~potency) %>% - layer_histograms(binwidth = 5, origin = 0) %>% + layer_histograms(width = 5, origin = 0) %>% add_data(cocaine_selected) %>% - layer_histograms(binwidth = 5, origin = 0, fill := "#dd3333") + layer_histograms(width = 5, origin = 0, fill := "#dd3333") ``` A summary of the selected points: