From 056b17fa533db1d2b907e21f30cd799053835c23 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Wed, 3 Jun 2020 14:36:02 -0700 Subject: [PATCH 1/6] Allow auth of catalogue requests - split bcdc_http_client into wfs and catalogue - access BCDC_KEY env var for authorization of catalogue requests - add tests --- R/bcdc-web-services.R | 4 ++-- R/bcdc_search.R | 10 ++++----- R/describe-feature.R | 3 +-- R/utils.R | 35 ++++++++++++++++++++++++++------ tests/testthat/test-get_record.R | 14 +++++++++++++ 5 files changed, 50 insertions(+), 16 deletions(-) diff --git a/R/bcdc-web-services.R b/R/bcdc-web-services.R index aac5a0bd..5216e462 100644 --- a/R/bcdc-web-services.R +++ b/R/bcdc-web-services.R @@ -94,7 +94,7 @@ bcdc_query_geodata.character <- function(record, crs = 3005) { query_list <- compact(query_list) ## GET and parse data to sf object - cli <- bcdc_http_client(url = "https://openmaps.gov.bc.ca/geo/pub/wfs") + cli <- bcdc_wfs_client() cols_df <- feature_helper(record) @@ -134,7 +134,7 @@ bcdc_query_geodata.bcdc_record <- function(record, crs = 3005) { query_list <- compact(query_list) ## GET and parse data to sf object - cli <- bcdc_http_client(url = "https://openmaps.gov.bc.ca/geo/pub/wfs") + cli <- bcdc_wfs_client() cols_df <- feature_helper(query_list$typeNames) diff --git a/R/bcdc_search.R b/R/bcdc_search.R index 733a5325..add356ca 100644 --- a/R/bcdc_search.R +++ b/R/bcdc_search.R @@ -32,8 +32,7 @@ bcdc_search_facets <- function(facet = c("license_id", "download_audience", query <- paste0("\"", facet, "\"", collapse = ",") query <- paste0("[", query, "]") - cli <- bcdc_http_client(paste0(base_url(), - "action/package_search")) + cli <- bcdc_catalogue_client("action/package_search") r <- cli$get(query = list(facet.field = query, rows = 0)) r$raise_for_status() @@ -66,7 +65,7 @@ bcdc_list <- function() { limit <- 1000 while (l_new_ret) { - cli <- bcdc_http_client(paste0(base_url(), "action/package_list")) + cli <- bcdc_catalogue_client("action/package_list") r <- cli$get(query = list(offset = offset, limit = limit)) r$raise_for_status() @@ -138,7 +137,7 @@ bcdc_search <- function(..., license_id = NULL, query <- gsub("\\s+", "%20", query) - cli <- bcdc_http_client(paste0(base_url(), "action/package_search")) + cli <- bcdc_catalogue_client("action/package_search") # Use I(query) to treat query as is, so that things like + and : # aren't encoded as %2B, %3A etc @@ -190,8 +189,7 @@ bcdc_get_record <- function(id) { id <- slug_from_url(id) - cli <- bcdc_http_client(paste0(base_url(), - "action/package_show")) + cli <- bcdc_catalogue_client("action/package_show") r <- cli$get(query = list(id = id)) diff --git a/R/describe-feature.R b/R/describe-feature.R index 45ff7ddf..70b44130 100644 --- a/R/describe-feature.R +++ b/R/describe-feature.R @@ -70,8 +70,7 @@ bcdc_describe_feature.bcdc_record <- function(record){ parse_raw_feature_tbl <- function(query_list){ ## GET and parse data to sf object - cli <- - bcdc_http_client(url = "https://openmaps.gov.bc.ca/geo/pub/wfs") + cli <- bcdc_wfs_client() cc <- cli$post(body = query_list, encode = "form") diff --git a/R/utils.R b/R/utils.R index b8a5f6b9..aefaa3a4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,7 +10,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -base_url <- function() "https://catalogue.data.gov.bc.ca/api/3/" +catalogue_base_url <- function() "https://catalogue.data.gov.bc.ca/api/3/" +wfs_base_url <- function() "https://openmaps.gov.bc.ca/geo/pub/wfs/" bcdata_user_agent <- function(){ "https://github.com/bcgov/bcdata" @@ -74,11 +75,33 @@ formats_supported <- function(){ c(bcdc_read_functions()[["format"]], "zip") } -bcdc_http_client <- function(url = NULL) { +bcdc_catalogue_client <- function(endpoint = NULL) { + url <- paste0(catalogue_base_url(), endpoint) + bcdc_http_client(url, auth = TRUE) +} + +bcdc_wfs_client <- function(endpoint = NULL) { + url <- paste0(wfs_base_url(), endpoint) + bcdc_http_client(url, auth = FALSE) +} + +bcdc_http_client <- function(url, auth = FALSE) { + headers <- list( + `User-Agent` = bcdata_user_agent(), + Authorization = if (auth) bcdc_auth() else NULL + ) - crul::HttpClient$new(url = url, - headers = list(`User-Agent` = bcdata_user_agent())) + crul::HttpClient$new( + url = url, + headers = compact(headers) + ) +} +bcdc_auth <- function() { + key <- Sys.getenv("BCDC_KEY") + if (!nzchar(key)) return(NULL) + message("Authorizing with your stored API key") + key } ## Check if there is internet @@ -125,7 +148,6 @@ wfs_to_r_col_type <- function(col){ ) } - ##from a record formats_from_record <- function(x, trim = TRUE){ @@ -197,7 +219,8 @@ read_from_url <- function(resource, ...){ if (!reported_format %in% formats_supported()) { stop("Reading ", reported_format, " files is not currently supported in bcdata.") } - cli <- bcdc_http_client(file_url) + auth <- grepl("(catalogue.data.gov.bc.ca)|(pub.data.gov.bc.ca)", file_url) + cli <- bcdc_http_client(file_url, auth = auth) ## Establish where to download file tmp <- tempfile(tmpdir = unique_temp_dir(), diff --git a/tests/testthat/test-get_record.R b/tests/testthat/test-get_record.R index 2e4ef2a8..1ab61b23 100644 --- a/tests/testthat/test-get_record.R +++ b/tests/testthat/test-get_record.R @@ -95,4 +95,18 @@ expect_error(bcdc_tidy_resources("WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW"), "No bcdc_tidy_resources method for a BCGW object name") }) +test_that("bcdc_get_record works with/without authentication", { + skip_if_net_down() + skip_on_cran() + + key_val <- Sys.getenv("BCDC_KEY") + skip_if_not(nzchar(key_val)) + on.exit(Sys.setenv(BCDC_KEY = key_val)) + expect_message(res <- bcdc_get_record('76b1b7a3-2112-4444-857a-afccf7b20da8'), + "Authorizing with your stored API key") + expect_is(res, "bcdc_record") + + Sys.unsetenv("BCDC_KEY") + expect_silent(bcdc_get_record('76b1b7a3-2112-4444-857a-afccf7b20da8')) +}) From 95947ece4df93caeb1a9725e11844d0173df9899 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Wed, 3 Jun 2020 15:34:42 -0700 Subject: [PATCH 2/6] Install dev rmarkdown for R < 3.6 --- .github/workflows/cmd-check.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index 195ccc33..f0f92d34 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -82,6 +82,8 @@ jobs: run: | remotes::install_deps(dependencies = TRUE, configure.args = c('sf' = '${{ matrix.config.sf_args }}')) install.packages("rcmdcheck") + # Install dev rmarkdown (https://github.com/rstudio/rmarkdown/pull/1832) for R < 3.6 + if (getRversion() < 3.6) remotes::install_github("rstudio/rmarkdown") shell: Rscript {0} - name: Check From 1678166c2336672f95927db8c6cef6c579b568fb Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Wed, 3 Jun 2020 15:49:46 -0700 Subject: [PATCH 3/6] escape dots in regex & simplify --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index aefaa3a4..9e9dab6e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -219,7 +219,7 @@ read_from_url <- function(resource, ...){ if (!reported_format %in% formats_supported()) { stop("Reading ", reported_format, " files is not currently supported in bcdata.") } - auth <- grepl("(catalogue.data.gov.bc.ca)|(pub.data.gov.bc.ca)", file_url) + auth <- grepl("(catalogue|pub)\\.data\\.gov\\.bc\\.ca", file_url) cli <- bcdc_http_client(file_url, auth = auth) ## Establish where to download file From 968c04f9b894e03e232554d7c4a1e087492c1f45 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Thu, 4 Jun 2020 11:18:27 -0700 Subject: [PATCH 4/6] Add authentication instructions to README --- README.Rmd | 28 ++++++++++++++++++++++++++++ README.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/README.Rmd b/README.Rmd index 4d47feb3..62edbd3a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -83,6 +83,34 @@ library(bcdata) - [Exploring Silviculture Data with bcdata](https://bcgov.github.io/bcdata/articles/explore-silviculture-data-using-bcdata.html) - Using bcdata with [bcmaps](https://github.com/bcgov/bcmaps) (Coming Soon!) +### BCDC Authentication + +If you are an authorized user of the B.C. Data Catalogue you may want to access +records/data that are not publicly available (e.g., in DRAFT, waiting to be +published). This can be done by authenticating with the catalogue with an API +key. + +_**Important Note:**_ *Your API key is like a password and you must take care to +keep it private. Do not share it, and be careful to not include it in any +scripts or accidentally commit it to GitHub.* + +You can log in to the catalogue to obtain your API key, then store it as an +environment variable in your [`.Renviron` file](https://rstats.wtf/r-startup.html#renviron). +The environment variable must be called `BCDC_KEY`, set like this: + +``` +BCDC_KEY=your-api-key +``` + +This way, the relevant bcdata functions will read that key and use it to +authorize your calls to the catalogue, allowing you to access additional records +that you are authorized to see if you were logged into the catalogue web +interface. Functions that benefit from this are: + +- `bcdc_search()` +- `bcdc_list()` +- `bcdc_get_record()` +- `bcdc_get_data()` ### Getting Help or Reporting an Issue diff --git a/README.md b/README.md index a1dbbaa9..e606ff38 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,34 @@ library(bcdata) - Using bcdata with [bcmaps](https://github.com/bcgov/bcmaps) (Coming Soon\!) +### BCDC Authentication + +If you are an authorized user of the B.C. Data Catalogue you may want to +access records/data that are not publicly available (e.g., in DRAFT, +waiting to be published). This can be done by authenticating with the +catalogue with an API key. + +***Important Note:*** *Your API key is like a password and you must take +care to keep it private. Do not share it, and be careful to not include +it in any scripts or accidentally commit it to GitHub.* + +You can log in to the catalogue to obtain your API key, then store it as +an environment variable in your [`.Renviron` +file](https://rstats.wtf/r-startup.html#renviron). The environment +variable must be called `BCDC_KEY`, set like this: + + BCDC_KEY=your-api-key + +This way, the relevant bcdata functions will read that key and use it to +authorize your calls to the catalogue, allowing you to access additional +records that you are authorized to see if you were logged into the +catalogue web interface. Functions that benefit from this are: + + - `bcdc_search()` + - `bcdc_list()` + - `bcdc_get_record()` + - `bcdc_get_data()` + ### Getting Help or Reporting an Issue To report bugs/issues/feature requests, please file an From 336e09a27e919c83d607a69f8dfde0b9b39b4f42 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Thu, 4 Jun 2020 15:08:18 -0700 Subject: [PATCH 5/6] update README --- README.Rmd | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.Rmd b/README.Rmd index 62edbd3a..f56492b7 100644 --- a/README.Rmd +++ b/README.Rmd @@ -85,8 +85,8 @@ library(bcdata) ### BCDC Authentication -If you are an authorized user of the B.C. Data Catalogue you may want to access -records/data that are not publicly available (e.g., in DRAFT, waiting to be +If you are an authorized editor of the B.C. Data Catalogue you may want to +access records that are not publicly available (e.g., in DRAFT, waiting to be published). This can be done by authenticating with the catalogue with an API key. diff --git a/README.md b/README.md index e606ff38..8fb97abe 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,8 @@ library(bcdata) ### BCDC Authentication -If you are an authorized user of the B.C. Data Catalogue you may want to -access records/data that are not publicly available (e.g., in DRAFT, +If you are an authorized editor of the B.C. Data Catalogue you may want +to access records that are not publicly available (e.g., in DRAFT, waiting to be published). This can be done by authenticating with the catalogue with an API key. From 55210b9488509ccee970783256dc40114cca11f8 Mon Sep 17 00:00:00 2001 From: Andy Teucher Date: Thu, 4 Jun 2020 15:51:22 -0700 Subject: [PATCH 6/6] bump cache version --- .github/workflows/cmd-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmd-check.yaml b/.github/workflows/cmd-check.yaml index f0f92d34..9e50c238 100644 --- a/.github/workflows/cmd-check.yaml +++ b/.github/workflows/cmd-check.yaml @@ -26,7 +26,7 @@ jobs: env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true CRAN: ${{ matrix.config.cran }} - cache-version: v2 + cache-version: v3 steps: - uses: actions/checkout@v2