Skip to content

Commit

Permalink
Preliminary support for caching.
Browse files Browse the repository at this point in the history
Closes #129
  • Loading branch information
hadley committed Aug 24, 2014
1 parent f9c7a55 commit f10e957
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 20 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ export(parse_media)
export(parse_url)
export(parsed_content)
export(progress)
export(rerequest)
export(reset_config)
export(set_config)
export(set_cookies)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# httr 0.4.0.99

* Preliminary and experimental support for caching with `cache_info()` and
`rerequest()` (#129).

* Requests now print the time they were made.

* You can now save response bodies directly to disk by using the `write_disk()`
config. This is useful if you want to capture large files that don't fit in
memory (#44).
Expand Down
91 changes: 76 additions & 15 deletions R/cache.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,41 @@
#' Compute caching information for a response.
#'
#' \code{cache_info()} gives details of cacheability of a response,
#' \code{rerequest()} re-performs the original request doing as little work
#' as possible (if not expired, returns response as is, or performs
#' revalidation if Etag or Last-Modified headers are present).
#'
#' @param r A response
#' @export
#' @examples
#' # Never cached, always causes redownload
#' r1 <- GET("https://www.google.com")
#' cache_info(r1)
#' r1$date
#' rerequest(r1)$date
#'
#' # Expires in a year
#' r2 <- GET("https://www.google.com/images/srpr/logo11w.png")
#' cache_info(r2)
#' r2$date
#' rerequest(r2)$date
#'
#' # Has last-modified and etag, so does revalidation
#' r3 <- GET("http://httpbin.org/cache")
#' cache_info(r3)
#' r3$date
#' rerequest(r3)$date
#'
#' # Expires after 5 seconds
#' \dontrun{
#' r4 <- GET("http://httpbin.org/cache/5")
#' cache_info(r4)
#' r4$date
#' rerequest(r4)$date
#' Sys.sleep(5)
#' cache_info(r4)
#' rerequest(r4)$date
#' }
cache_info <- function(r) {
stopifnot(is.response(r))

Expand All @@ -16,33 +44,31 @@ cache_info <- function(r) {

# Parse cache control
control <- parse_cache_control(r$headers$`cache-control`)
max_age <- as.integer(control$`max-age`) %||% NA
if (any(c("no-store", "no-cache") %in% control)) {
override <- FALSE
} else if (any(c("public", "private") %in% control)) {
override <- TRUE
} else {
override <- NA
}
max_age <- as.integer(control$`max-age`) %||% NULL

# Compute expiry
if (!is.null(max_age)) {
# If date missing, use request date
date <- parse_http_date(r$headers$date) %||% r$date
expires <- date + max_age
expires <- r$date + max_age
} else if (!is.null(r$headers$expires)) {
expires <- parse_http_date(r$headers$expires, -Inf)
} else {
expires <- NULL
}

# Is this cacheable?
cacheable <- r$request$method %in% c("GET", "HEAD") &&
status_code(r) %in% c(200L, 203L, 300L, 301L, 410L) &&
(!is.null(expires) || !is.null(r$headers$etag) ||
!is.null(r$headers$`last-modified`)) &&
!any(c("no-store", "no-cache") %in% control$flags)
# What impact should any(c("public", "private") %in% control$flags) have?

structure(
list(
method = r$method,
url = r$url,
status = r$status_code,

override = override,
cacheable = cacheable,
expires = expires,
etag = r$headers$etag %||% NULL,
modified = parse_http_date(r$headers$`last-modified`, NULL)
Expand All @@ -53,21 +79,22 @@ cache_info <- function(r) {

is.cache_info <- function(x) inherits(x, "cache_info")


#' @export
print.cache_info <- function(x, ...) {
cat("<cache_info> ", x$method, " ", x$url, "\n", sep = "")
cat(" Cacheable: ", x$cacheable, "\n", sep = "")
if (!is.null(x$expires)) {
cat(" Expires: ", http_date(x$expires), sep = "")
if (x$expires < Sys.time()) cat(" <expired>")
cat("\n")
}
cat(" Last-Modified: ", http_date(x$modified), "\n", sep = "")
cat(" Etag: ", x$etag, "\n", sep = "")
cat(" Force caching: ", x$override, "\n", sep = "")
}

parse_cache_control <- function(x) {
if (is.null(x)) return(list())

pieces <- strsplit(x, ",")[[1]]
pieces <- gsub("^\\s+|\\s+$", "", pieces)
pieces <- tolower(pieces)
Expand All @@ -81,3 +108,37 @@ parse_cache_control <- function(x) {

setNames(values, keys)
}

#' @rdname cache_info
#' @export
rerequest <- function(r) {
x <- cache_info(r)
if (!x$cacheable) {
return(reperform(r$request))
}

# Cacheable, and hasn't expired
if (!is.null(x$expires) && x$expires >= Sys.time()) {
return(r)
}

# Requires validation
r$request$opts <- c(
r$request$opts,
add_headers(
`If-Modified-Since` = http_date(x$modified),
`If-None-Match` = x$etag
)
)
validated <- reperform(r$request)

if (status_code(validated) == 304L) {
r
} else {
validated
}
}

reperform <- function(x) {
perform(x$handle, x$writer, x$method, x$opts, x$body)
}
21 changes: 17 additions & 4 deletions R/perform.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
# Abstract over the differences in RCurl API depending on whether or not
# you send a body.
perform <- function(handle, writer, method, opts, body) {
# Cache exact request so it can easily be replayed.
request <- list(
handle = handle,
writer = writer,
method = toupper(method),
opts = opts,
body = body
)

headers <- character()
add_header <- function(text) {
headers <<- c(headers, text)
Expand Down Expand Up @@ -28,18 +37,22 @@ perform <- function(handle, writer, method, opts, body) {
headers <- parse_headers(headers)
content <- write_term(writer)

if (!is.null(headers$date)) {
date <- parse_http_date(headers$Date)
} else {
date <- Sys.time()
}

response(
method = toupper(method),
url = last_request(handle)$effective.url,
handle = handle,
status_code = last(headers)$status,
headers = last(headers)$headers,
all_headers = headers,
cookies = cookies(handle),
content = content,
date = Sys.time(),
date = date,
times = request_times(handle),
config = config
request = request
)
}

Expand Down
1 change: 1 addition & 0 deletions R/response.r
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ print.response <- function(x, ..., max.lines = 10, width = getOption("width")) {
content_type <- x$headers$`content-type`

cat("Response [", x$url, "]\n", sep = "")
cat(" Date: ", format(x$date, "%Y-%m-%d %H:%M"), "\n", sep = "")
cat(" Status: ", x$status_code, "\n", sep = "")
cat(" Content-type: ", content_type %||% "<unknown>", "\n", sep = "")

Expand Down
31 changes: 30 additions & 1 deletion man/cache_info.Rd
Original file line number Diff line number Diff line change
@@ -1,21 +1,50 @@
% Generated by roxygen2 (4.0.1): do not edit by hand
\name{cache_info}
\alias{cache_info}
\alias{rerequest}
\title{Compute caching information for a response.}
\usage{
cache_info(r)

rerequest(r)
}
\arguments{
\item{r}{A response}
}
\description{
Compute caching information for a response.
\code{cache_info()} gives details of cacheability of a response,
\code{rerequest()} re-performs the original request doing as little work
as possible (if not expired, returns response as is, or performs
revalidation if Etag or Last-Modified headers are present).
}
\examples{
# Never cached, always causes redownload
r1 <- GET("https://www.google.com")
cache_info(r1)
r1$date
rerequest(r1)$date

# Expires in a year
r2 <- GET("https://www.google.com/images/srpr/logo11w.png")
cache_info(r2)
r2$date
rerequest(r2)$date

# Has last-modified and etag, so does revalidation
r3 <- GET("http://httpbin.org/cache")
cache_info(r3)
r3$date
rerequest(r3)$date

# Expires after 5 seconds
\dontrun{
r4 <- GET("http://httpbin.org/cache/5")
cache_info(r4)
r4$date
rerequest(r4)$date
Sys.sleep(5)
cache_info(r4)
rerequest(r4)$date
}
}

0 comments on commit f10e957

Please sign in to comment.