diff --git a/NEWS.md b/NEWS.md index 862c8da5..8615db36 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # bcdata 0.2.2.9000 ### IMPROVEMENTS - Setting the `bcdata.single_download_limit` limit dynamically from the getCapabilities endpoint. +- `bcdc_describe_feature` now joins a object description column to the returned object to provide more information about a field +directly in R. #241 - Better documentation and information surrounding the `bcdata.max_geom_pred_size` option. #243 - Add new function `bcdc_check_geom_size` so users can check prior to submitting a WFS request with `filter` to see if the request will require a bounding box conversion. diff --git a/R/bcdc_options.R b/R/bcdc_options.R index 0d7d7062..8597674e 100644 --- a/R/bcdc_options.R +++ b/R/bcdc_options.R @@ -18,7 +18,7 @@ #' failed calls to the data catalogue. Options in R are reset every time R is re-started. See examples for #' addition ways to restore your initial state. #' -#' `bcdata.max_geom_pred_size` is the maximum size of an object used for a geometric operation in bytes. Objects +#' `bcdata.max_geom_pred_size` is the maximum size in bytes of an object used for a geometric operation. Objects #' that are bigger than this value will have a bounding box drawn and apply the geometric operation #' on that simpler polygon. The [bcdc_check_geom_size] function can be used to assess whether a given spatial object #' exceed the value of this option. Users can iteratively try to increase the maximum geometric predicate size and see @@ -75,25 +75,24 @@ bcdc_options <- function() { "bcdata.chunk_limit", null_to_na(getOption("bcdata.chunk_limit")), 1000, "bcdata.single_download_limit", null_to_na(getOption("bcdata.single_download_limit", - default = ._bcdataenv_$bcdata_dl_limit)), 10000 + default = bcdc_single_download_limit())), 10000 ) } check_chunk_limit <- function(){ chunk_value <- getOption("bcdata.chunk_limit") - chunk_limit <- getOption("bcdata.single_download_limit", default = ._bcdataenv_$bcdata_dl_limit) + chunk_limit <- getOption("bcdata.single_download_limit", default = bcdc_single_download_limit()) if(!is.null(chunk_value) && chunk_value >= chunk_limit){ stop(glue::glue("Your chunk value of {chunk_value} exceed the BC Data Catalogue chunk limit of {chunk_limit}"), call. = FALSE) } } - -bcdc_single_download_limit <- function() { +bcdc_get_wfs_records_xml <- function() { if (has_internet()) { url <- "http://openmaps.gov.bc.ca/geo/pub/ows?service=WFS&version=2.0.0&request=Getcapabilities" - cli <- bcdata:::bcdc_http_client(url, auth = FALSE) + cli <- bcdc_http_client(url, auth = FALSE) cc <- cli$get(query = list( SERVICE = "WFS", @@ -102,13 +101,43 @@ bcdc_single_download_limit <- function() { )) res <- cc$parse("UTF-8") - doc <- xml2::read_xml(res) + xml2::read_xml(res) - constraints <- xml2::xml_find_all(doc, ".//ows:Constraint") - count_defaults <- constraints[which(xml2::xml_attrs(constraints) %in% "CountDefault")] - xml2::xml_double(count_defaults) } else { message("No access to internet") - 10000L + invisible(NULL) + } +} + +bcdc_get_wfs_records <- function() { + doc <- ._bcdataenv_$get_capabilities_xml + + if (is.null(doc)) { + # Try again to get the xml + doc <- ._bcdataenv_$get_capabilities_xml <- suppressMessages(bcdc_get_wfs_records_xml()) + if (is.null(doc)) stop("No access to internet", call. = FALSE) + } + + # d1 is the default xml namespace (see xml2::xml_ns(doc)) + features <- xml2::xml_find_all(doc, "./d1:FeatureTypeList/d1:FeatureType") + + tibble::tibble( + whse_name = gsub("^pub:", "", xml2::xml_text(xml2::xml_find_first(features, "./d1:Name"))), + title = xml2::xml_text(xml2::xml_find_first(features, "./d1:Title")), + cat_url = xml2::xml_attr(xml2::xml_find_first(features, "./d1:MetadataURL"), "href") + ) +} + +bcdc_single_download_limit <- function() { + doc <- ._bcdataenv_$get_capabilities_xml + + if (is.null(doc)) { + message("No access to internet") + return(10000L) } + + count_default_xpath <- "./ows:OperationsMetadata/ows:Operation[@name='GetFeature']/ows:Constraint[@name='CountDefault']" + # Looking globally also works but is slower: ".//ows:Constraint[@name='CountDefault']" + count_defaults <- xml2::xml_find_first(doc, count_default_xpath) + xml2::xml_integer(count_defaults) } diff --git a/R/describe-feature.R b/R/describe-feature.R index af9d2301..3863a302 100644 --- a/R/describe-feature.R +++ b/R/describe-feature.R @@ -22,6 +22,7 @@ #' - sticky: whether a column can be separated from the record in a Web Service call via the `dplyr::select` method #' - remote_col_type: class of what is return by the web feature service #' - local_col_type: the column class in R +#' - column_comments: additional metadata specific to that column #' #' @inheritParams bcdc_query_geodata #' @export @@ -53,7 +54,9 @@ bcdc_describe_feature.default <- function(record) { bcdc_describe_feature.character <- function(record){ if (is_whse_object_name(record)) { - return(feature_helper(record)) + bgc <- bcdc_get_wfs_records() + cat_record <- bcdc_get_record(bgc$cat_url[grepl(record, bgc$whse_name)]) + return(obj_desc_join(record, cat_record$details)) } bcdc_describe_feature(bcdc_get_record(record)) @@ -68,8 +71,8 @@ bcdc_describe_feature.bcdc_record <- function(record){ call. = FALSE ) } + obj_desc_join(record$layer_name, record$details) - feature_helper(record$layer_name) } parse_raw_feature_tbl <- function(query_list){ @@ -110,15 +113,11 @@ feature_helper <- function(whse_name){ xml_df <- parse_raw_feature_tbl(query_list) geom_type <- attr(xml_df, "geom_type") - ## Identify geometry column and move to last - # xml_df[xml_df$type == geom_type, "name"] <- "geometry" - # xml_df <- dplyr::bind_rows(xml_df[xml_df$name != "geometry",], - # xml_df[xml_df$name == "geometry",]) ## Fix logicals xml_df$nillable = ifelse(xml_df$nillable == "true", TRUE, FALSE) - xml_df <- xml_df[, c("name", "nillable", "type")] + ## Add the id_row back into the front xml_df <- dplyr::bind_rows(id_row, xml_df) colnames(xml_df) <- c("col_name", "sticky", "remote_col_type") @@ -129,4 +128,10 @@ feature_helper <- function(whse_name){ - +obj_desc_join <- function(x, y) { + dplyr::left_join( + feature_helper(x), + y[,c("column_comments", "column_name")], + by = c("col_name" = "column_name") + ) +} diff --git a/R/utils-classes.R b/R/utils-classes.R index c93a259e..97fcb07c 100644 --- a/R/utils-classes.R +++ b/R/utils-classes.R @@ -379,7 +379,7 @@ collect.bcdc_promise <- function(x, ...){ ## Determine total number of records for pagination purposes number_of_records <- bcdc_number_wfs_records(query_list, cli) - if (number_of_records < getOption("bcdata.single_download_limit", default = ._bcdataenv_$bcdata_dl_limit)) { + if (number_of_records < getOption("bcdata.single_download_limit", default = bcdc_single_download_limit())) { cc <- tryCatch(cli$post(body = query_list, encode = "form"), error = function(e) { stop("There was an issue processing this request. diff --git a/R/zzz.R b/R/zzz.R index d72501f5..e5d9a547 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -14,7 +14,7 @@ .onLoad <- function(...) { ._bcdataenv_$named_get_record_warned <- FALSE # nocov - ._bcdataenv_$bcdata_dl_limit <- bcdc_single_download_limit() # nocov + ._bcdataenv_$get_capabilities_xml <- bcdc_get_wfs_records_xml() # nocov } diff --git a/man/bcdc_describe_feature.Rd b/man/bcdc_describe_feature.Rd index 46997b95..e89db7ce 100644 --- a/man/bcdc_describe_feature.Rd +++ b/man/bcdc_describe_feature.Rd @@ -26,6 +26,7 @@ The tibble returns the following columns: \item sticky: whether a column can be separated from the record in a Web Service call via the \code{dplyr::select} method \item remote_col_type: class of what is return by the web feature service \item local_col_type: the column class in R +\item column_comments: additional metadata specific to that column } } \description{ diff --git a/man/bcdc_options.Rd b/man/bcdc_options.Rd index 63aee9e0..49ce1281 100644 --- a/man/bcdc_options.Rd +++ b/man/bcdc_options.Rd @@ -14,7 +14,7 @@ failed calls to the data catalogue. Options in R are reset every time R is re-st addition ways to restore your initial state. } \details{ -\code{bcdata.max_geom_pred_size} is the maximum size of an object used for a geometric operation in bytes. Objects +\code{bcdata.max_geom_pred_size} is the maximum size in bytes of an object used for a geometric operation. Objects that are bigger than this value will have a bounding box drawn and apply the geometric operation on that simpler polygon. The \link{bcdc_check_geom_size} function can be used to assess whether a given spatial object exceed the value of this option. Users can iteratively try to increase the maximum geometric predicate size and see diff --git a/tests/testthat/test-describe-feature.R b/tests/testthat/test-describe-feature.R index 9cfb251d..0cb4a3bd 100644 --- a/tests/testthat/test-describe-feature.R +++ b/tests/testthat/test-describe-feature.R @@ -16,7 +16,7 @@ test_that("Test that bcdc_describe feature returns the correct columns",{ skip_on_cran() skip_if_net_down() airport_feature <- bcdc_describe_feature("bc-airports") - expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type")) + expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type", "column_comments")) }) @@ -39,14 +39,14 @@ test_that("bcdc_describe_feature accepts a bcdc_record object", { skip_if_net_down() airports <- bcdc_get_record('76b1b7a3-2112-4444-857a-afccf7b20da8') airport_feature <- bcdc_describe_feature(airports) - expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type")) + expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type", "column_comments")) }) test_that("bcdc_describe_feature accepts BCGW name",{ skip_on_cran() skip_if_net_down() airport_feature <- bcdc_describe_feature("WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW") - expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type")) + expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type", "column_comments")) }) test_that("bcdc_describe_feature fails on unsupported classes", { @@ -63,3 +63,15 @@ test_that("bcdc_describe_feature fails with non-wfs record", { "No WMS/WFS resource available for this data set") }) +test_that("bcdc_get_wfs_records works", { + skip_if_net_down() + skip_on_cran() + + wfs_records <- bcdc_get_wfs_records() + + expect_equal(names(wfs_records), c("whse_name", "title", "cat_url")) + expect_true(nrow(wfs_records) > 0L) + lapply(wfs_records, function(x) { + expect_true(any(nzchar(x, keepNA = TRUE)) & any(!is.na(x))) + }) +}) diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index e6d9c9fd..7826ae7c 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -34,7 +34,7 @@ test_that("bcdata.single_download_limit", { }) -test_that("bcdata.single_download_limit can be changes",{ +test_that("bcdata.single_download_limit can be changed",{ withr::local_options(list(bcdata.single_download_limit = 13)) expect_equal(getOption("bcdata.single_download_limit"), 13) }) @@ -43,5 +43,5 @@ test_that("bcdc_single_download_limit returns a number",{ skip_on_cran() skip_if_net_down() lt <- bcdc_single_download_limit() - expect_type(lt, "double") + expect_type(lt, "integer") })