diff --git a/NAMESPACE b/NAMESPACE index 9b303d2e..01598d8b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -144,6 +144,7 @@ export(secret_write_rds) export(signal_total_pages) export(throttle_status) export(url_build) +export(url_modify) export(url_parse) export(with_mock) export(with_mocked_responses) diff --git a/NEWS.md b/NEWS.md index 58822493..bc1fb892 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # httr2 (development version) +* New `url_modify()` makes it easier to modify an existing url (#464). * New `req_url_relative()` for constructing relative urls (#449). * `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449). * `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577). diff --git a/R/url.R b/R/url.R index 9b249fe0..cd07befc 100644 --- a/R/url.R +++ b/R/url.R @@ -1,18 +1,16 @@ -#' Parse and build URLs +#' Parse a URL into its component pieces #' -#' `url_parse()` parses a URL into its component pieces; `url_build()` does -#' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)` -#' for the details of the parsing algorithm. +#' `url_parse()` parses a URL into its component parts, powered by +#' [curl::curl_parse_url()]. The parsing algorithm follows the specifications +#' detailed in `r rfc(3986)`. #' -#' @param url For `url_parse()` a string to parse into a URL; -#' for `url_build()` a URL to turn back into a string. +#' @param url A string containing the URL to parse. #' @param base_url Use this as a parent, if `url` is a relative URL. -#' @returns -#' * `url_build()` returns a string. -#' * `url_parse()` returns a URL: a S3 list with class `httr2_url` -#' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`, -#' `username`, `password`. +#' @returns An S3 object of class `httr2_url` with the following components: +#' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and +#' `fragment`. #' @export +#' @family URL manipulation #' @examples #' url_parse("http://google.com/") #' url_parse("http://google.com:80/") @@ -22,12 +20,6 @@ #' # You can parse a relative URL if you also provide a base url #' url_parse("foo", "http://google.com/bar/") #' url_parse("..", "http://google.com/bar/") -#' -#' url <- url_parse("http://google.com/") -#' url$port <- 80 -#' url$hostname <- "example.com" -#' url$query <- list(a = 1, b = 2, c = 3) -#' url_build(url) url_parse <- function(url, base_url = NULL) { check_string(url) check_string(base_url, allow_null = TRUE) @@ -48,10 +40,88 @@ url_parse <- function(url, base_url = NULL) { parsed } -url_modify <- function(url, ..., error_call = caller_env()) { - url <- url_parse(url) - url <- modify_list(url, ..., error_call = error_call) - url_build(url) +#' Modify a URL +#' +#' Modify components of a URL. The default value of each argument, `NULL`, +#' means leave the component as is. If you want to remove a component, +#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will +#' create a relative URL. +#' +#' @param url A string or [parsed URL](url_parse). +#' @param scheme The scheme, typically either `http` or `https`. +#' @param hostname The hostname, e.g., `www.google.com` or `posit.co`. +#' @param username,password Username and password to embed in the URL. +#' Not generally recommended but needed for some legacy applications. +#' @param port An integer port number. +#' @param path The path, e.g., `/search`. Paths must start with `/`, so this +#' will be automatically added if omitted. +#' @param query Either a query string or a named list of query components. +#' @param fragment The fragment, e.g., `#section-1`. +#' @return An object of the same type as `url`. +#' @export +#' @family URL manipulation +#' @examples +#' url_modify("http://hadley.nz", path = "about") +#' url_modify("http://hadley.nz", scheme = "https") +#' url_modify("http://hadley.nz/abc", path = "/cde") +#' url_modify("http://hadley.nz/abc", path = "") +#' url_modify("http://hadley.nz?a=1", query = "b=2") +#' url_modify("http://hadley.nz?a=1", query = list(c = 3)) +url_modify <- function(url, + scheme = NULL, + hostname = NULL, + username = NULL, + password = NULL, + port = NULL, + path = NULL, + query = NULL, + fragment = NULL) { + + if (!is_string(url) && !is_url(url)) { + stop_input_type(url, "a string or parsed URL") + } + string_url <- is_string(url) + if (string_url) { + url <- url_parse(url) + } + + check_string(scheme, allow_null = TRUE) + check_string(hostname, allow_null = TRUE) + check_string(username, allow_null = TRUE) + check_string(password, allow_null = TRUE) + check_number_whole(port, min = 1, allow_null = TRUE) + check_string(path, allow_null = TRUE) + check_string(fragment, allow_null = TRUE) + + if (is_string(query)) { + query <- query_parse(query) + } else if (is.list(query) && (is_named(query) || length(query) == 0)) { + for (nm in names(query)) { + check_query_param(query[[nm]], paste0("query$", nm)) + } + } else if (!is.null(query)) { + stop_input_type(query, "a character vector, named list, or NULL") + } + + new <- compact(list( + scheme = scheme, + hostname = hostname, + username = username, + password = password, + port = port, + path = path, + query = query, + fragment = fragment + )) + is_empty <- map_lgl(new, identical, "") + new[is_empty] <- list(NULL) + url[names(new)] <- new + + if (string_url) { + url_build(url) + } else { + url + } } is_url <- function(x) inherits(x, "httr2_url") @@ -91,9 +161,19 @@ print.httr2_url <- function(x, ...) { invisible(x) } +#' Build a string from a URL object +#' +#' This is the inverse of [url_parse()], taking a parsed URL object and +#' turning it back into a string. +#' +#' @param url An URL object created by [url_parse]. +#' @family URL manipulation #' @export -#' @rdname url_parse url_build <- function(url) { + if (!is_url(url)) { + stop_input_type(url, "a parsed URL") + } + if (!is.null(url$query)) { query <- query_build(url$query) } else { @@ -119,7 +199,7 @@ url_build <- function(url) { authority <- NULL } - if (!is.null(url$path) && !startsWith(url$path, "/")) { + if (is.null(url$path) || !startsWith(url$path, "/")) { url$path <- paste0("/", url$path) } diff --git a/_pkgdown.yml b/_pkgdown.yml index d92798f2..a17aae11 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -70,12 +70,15 @@ reference: contents: - starts_with("resp_") +- title: URL manipulation + contents: + - starts_with("url_") + - title: Miscellaenous helpers contents: - curl_translate - secrets - obfuscate - - url_parse - title: OAuth desc: > diff --git a/man/url_build.Rd b/man/url_build.Rd new file mode 100644 index 00000000..c2099509 --- /dev/null +++ b/man/url_build.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/url.R +\name{url_build} +\alias{url_build} +\title{Build a string from a URL object} +\usage{ +url_build(url) +} +\arguments{ +\item{url}{An URL object created by \link{url_parse}.} +} +\description{ +This is the inverse of \code{\link[=url_parse]{url_parse()}}, taking a parsed URL object and +turning it back into a string. +} +\seealso{ +Other URL manipulation: +\code{\link{url_modify}()}, +\code{\link{url_parse}()} +} +\concept{URL manipulation} diff --git a/man/url_modify.Rd b/man/url_modify.Rd new file mode 100644 index 00000000..2f19d1d7 --- /dev/null +++ b/man/url_modify.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/url.R +\name{url_modify} +\alias{url_modify} +\title{Modify a URL} +\usage{ +url_modify( + url, + scheme = NULL, + hostname = NULL, + username = NULL, + password = NULL, + port = NULL, + path = NULL, + query = NULL, + fragment = NULL +) +} +\arguments{ +\item{url}{A string or \href{url_parse}{parsed URL}.} + +\item{scheme}{The scheme, typically either \code{http} or \code{https}.} + +\item{hostname}{The hostname, e.g., \code{www.google.com} or \code{posit.co}.} + +\item{username, password}{Username and password to embed in the URL. +Not generally recommended but needed for some legacy applications.} + +\item{port}{An integer port number.} + +\item{path}{The path, e.g., \verb{/search}. Paths must start with \code{/}, so this +will be automatically added if omitted.} + +\item{query}{Either a query string or a named list of query components.} + +\item{fragment}{The fragment, e.g., \verb{#section-1}.} +} +\value{ +An object of the same type as \code{url}. +} +\description{ +Modify components of a URL. The default value of each argument, \code{NULL}, +means leave the component as is. If you want to remove a component, +set it to \code{""}. Note that setting \code{scheme} or \code{hostname} to \code{""} will +create a relative URL. +} +\examples{ +url_modify("http://hadley.nz", path = "about") +url_modify("http://hadley.nz", scheme = "https") +url_modify("http://hadley.nz/abc", path = "/cde") +url_modify("http://hadley.nz/abc", path = "") +url_modify("http://hadley.nz?a=1", query = "b=2") +url_modify("http://hadley.nz?a=1", query = list(c = 3)) +} +\seealso{ +Other URL manipulation: +\code{\link{url_build}()}, +\code{\link{url_parse}()} +} +\concept{URL manipulation} diff --git a/man/url_parse.Rd b/man/url_parse.Rd index f36adeeb..9a881c05 100644 --- a/man/url_parse.Rd +++ b/man/url_parse.Rd @@ -2,31 +2,24 @@ % Please edit documentation in R/url.R \name{url_parse} \alias{url_parse} -\alias{url_build} -\title{Parse and build URLs} +\title{Parse a URL into its component pieces} \usage{ url_parse(url, base_url = NULL) - -url_build(url) } \arguments{ -\item{url}{For \code{url_parse()} a string to parse into a URL; -for \code{url_build()} a URL to turn back into a string.} +\item{url}{A string containing the URL to parse.} \item{base_url}{Use this as a parent, if \code{url} is a relative URL.} } \value{ -\itemize{ -\item \code{url_build()} returns a string. -\item \code{url_parse()} returns a URL: a S3 list with class \code{httr2_url} -and elements \code{scheme}, \code{hostname}, \code{port}, \code{path}, \code{fragment}, \code{query}, -\code{username}, \code{password}. -} +An S3 object of class \code{httr2_url} with the following components: +\code{scheme}, \code{hostname}, \code{username}, \code{password}, \code{port}, \code{path}, \code{query}, and +\code{fragment}. } \description{ -\code{url_parse()} parses a URL into its component pieces; \code{url_build()} does -the reverse, converting a list of pieces into a string URL. See \href{https://datatracker.ietf.org/doc/html/rfc3986}{RFC 3986} -for the details of the parsing algorithm. +\code{url_parse()} parses a URL into its component parts, powered by +\code{\link[curl:curl_parse_url]{curl::curl_parse_url()}}. The parsing algorithm follows the specifications +detailed in \href{https://datatracker.ietf.org/doc/html/rfc3986}{RFC 3986}. } \examples{ url_parse("http://google.com/") @@ -37,10 +30,10 @@ url_parse("http://username@google.com:80/path;test?a=1&b=2#40") # You can parse a relative URL if you also provide a base url url_parse("foo", "http://google.com/bar/") url_parse("..", "http://google.com/bar/") - -url <- url_parse("http://google.com/") -url$port <- 80 -url$hostname <- "example.com" -url$query <- list(a = 1, b = 2, c = 3) -url_build(url) } +\seealso{ +Other URL manipulation: +\code{\link{url_build}()}, +\code{\link{url_modify}()} +} +\concept{URL manipulation} diff --git a/tests/testthat/_snaps/url.md b/tests/testthat/_snaps/url.md index a80e0ec1..36e5c565 100644 --- a/tests/testthat/_snaps/url.md +++ b/tests/testthat/_snaps/url.md @@ -24,6 +24,75 @@ Error in `url_build()`: ! Cannot set url `password` without `username`. +# url_build validates its input + + Code + url_build("abc") + Condition + Error in `url_build()`: + ! `url` must be a parsed URL, not the string "abc". + +# url_modify checks its inputs + + Code + url_modify(1) + Condition + Error in `url_modify()`: + ! `url` must be a string or parsed URL, not the number 1. + Code + url_modify(url, scheme = 1) + Condition + Error in `url_modify()`: + ! `scheme` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, hostname = 1) + Condition + Error in `url_modify()`: + ! `hostname` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, port = "x") + Condition + Error in `url_modify()`: + ! `port` must be a whole number or `NULL`, not the string "x". + Code + url_modify(url, username = 1) + Condition + Error in `url_modify()`: + ! `username` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, password = 1) + Condition + Error in `url_modify()`: + ! `password` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, path = 1) + Condition + Error in `url_modify()`: + ! `path` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, fragment = 1) + Condition + Error in `url_modify()`: + ! `fragment` must be a single string or `NULL`, not the number 1. + +# checks various query formats + + Code + url_modify(url, query = 1) + Condition + Error in `url_modify()`: + ! `query` must be a character vector, named list, or NULL, not the number 1. + Code + url_modify(url, query = list(1)) + Condition + Error in `url_modify()`: + ! `query` must be a character vector, named list, or NULL, not a list. + Code + url_modify(url, query = list(x = 1:2)) + Condition + Error in `url_modify()`: + ! Query value `query$x` must be a length-1 atomic vector, not an integer vector. + # validates inputs Code diff --git a/tests/testthat/test-url.R b/tests/testthat/test-url.R index 7cae86e7..b99d7224 100644 --- a/tests/testthat/test-url.R +++ b/tests/testthat/test-url.R @@ -6,7 +6,6 @@ test_that("can parse special cases", { test_that("can round trip urls", { urls <- list( - "file:///", "http://google.com/", "http://google.com/path", "http://google.com/path?a=1&b=2", @@ -35,18 +34,63 @@ test_that("can print all url details", { ) }) -test_that("ensures path always starts with /", { - expect_equal( - url_modify("https://example.com/abc", path = "def"), - "https://example.com/def" - ) -}) - test_that("password also requires username", { url <- url_parse("http://username:pwd@example.com") url$username <- NULL expect_snapshot(url_build(url), error = TRUE) +}) + +test_that("url_build validates its input", { + expect_snapshot(url_build("abc"), error = TRUE) +}) + +# modify url ------------------------------------------------------------- + +test_that("url_modify checks its inputs", { + url <- "http://example.com" + + expect_snapshot(error = TRUE, { + url_modify(1) + url_modify(url, scheme = 1) + url_modify(url, hostname = 1) + url_modify(url, port = "x") + url_modify(url, username = 1) + url_modify(url, password = 1) + url_modify(url, path = 1) + url_modify(url, fragment = 1) + }) +}) + +test_that("no arguments is idempotent", { + string <- "http://example.com/" + url <- url_parse(string) + + expect_equal(url_modify(string), string) + expect_equal(url_modify(url), url) +}) + +test_that("can accept query as a string or list", { + url <- "http://test/" + + expect_equal(url_modify(url, query = "a=1&b=2"), "http://test/?a=1&b=2") + expect_equal(url_modify(url, query = list(a = 1, b = 2)), "http://test/?a=1&b=2") + + expect_equal(url_modify(url, query = ""), "http://test/") + expect_equal(url_modify(url, query = list()), "http://test/") +}) +test_that("checks various query formats", { + url <- "http://example.com" + + expect_snapshot(error = TRUE, { + url_modify(url, query = 1) + url_modify(url, query = list(1)) + url_modify(url, query = list(x = 1:2)) + }) +}) +test_that("path always starts with /", { + expect_equal(url_modify("https://x.com/abc", path = "def"), "https://x.com/def") + expect_equal(url_modify("https://x.com/abc", path = ""), "https://x.com/") }) # query -------------------------------------------------------------------