Skip to content

Commit

Permalink
Add full XML get capabilities into pkg env to access object descripti…
Browse files Browse the repository at this point in the history
…on (#259)
  • Loading branch information
boshek authored Apr 2, 2021
1 parent cfb5d99 commit 11d2c5c
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 27 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# bcdata 0.2.2.9000
### IMPROVEMENTS
- Setting the `bcdata.single_download_limit` limit dynamically from the getCapabilities endpoint.
- `bcdc_describe_feature` now joins a object description column to the returned object to provide more information about a field
directly in R. #241
- Better documentation and information surrounding the `bcdata.max_geom_pred_size` option. #243
- Add new function `bcdc_check_geom_size` so users can check prior to submitting a WFS request with `filter` to see
if the request will require a bounding box conversion.
Expand Down
51 changes: 40 additions & 11 deletions R/bcdc_options.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' failed calls to the data catalogue. Options in R are reset every time R is re-started. See examples for
#' addition ways to restore your initial state.
#'
#' `bcdata.max_geom_pred_size` is the maximum size of an object used for a geometric operation in bytes. Objects
#' `bcdata.max_geom_pred_size` is the maximum size in bytes of an object used for a geometric operation. Objects
#' that are bigger than this value will have a bounding box drawn and apply the geometric operation
#' on that simpler polygon. The [bcdc_check_geom_size] function can be used to assess whether a given spatial object
#' exceed the value of this option. Users can iteratively try to increase the maximum geometric predicate size and see
Expand Down Expand Up @@ -75,25 +75,24 @@ bcdc_options <- function() {
"bcdata.chunk_limit", null_to_na(getOption("bcdata.chunk_limit")), 1000,
"bcdata.single_download_limit",
null_to_na(getOption("bcdata.single_download_limit",
default = ._bcdataenv_$bcdata_dl_limit)), 10000
default = bcdc_single_download_limit())), 10000
)
}


check_chunk_limit <- function(){
chunk_value <- getOption("bcdata.chunk_limit")
chunk_limit <- getOption("bcdata.single_download_limit", default = ._bcdataenv_$bcdata_dl_limit)
chunk_limit <- getOption("bcdata.single_download_limit", default = bcdc_single_download_limit())

if(!is.null(chunk_value) && chunk_value >= chunk_limit){
stop(glue::glue("Your chunk value of {chunk_value} exceed the BC Data Catalogue chunk limit of {chunk_limit}"), call. = FALSE)
}
}


bcdc_single_download_limit <- function() {
bcdc_get_wfs_records_xml <- function() {
if (has_internet()) {
url <- "http://openmaps.gov.bc.ca/geo/pub/ows?service=WFS&version=2.0.0&request=Getcapabilities"
cli <- bcdata:::bcdc_http_client(url, auth = FALSE)
cli <- bcdc_http_client(url, auth = FALSE)

cc <- cli$get(query = list(
SERVICE = "WFS",
Expand All @@ -102,13 +101,43 @@ bcdc_single_download_limit <- function() {
))

res <- cc$parse("UTF-8")
doc <- xml2::read_xml(res)
xml2::read_xml(res)

constraints <- xml2::xml_find_all(doc, ".//ows:Constraint")
count_defaults <- constraints[which(xml2::xml_attrs(constraints) %in% "CountDefault")]
xml2::xml_double(count_defaults)
} else {
message("No access to internet")
10000L
invisible(NULL)
}
}

bcdc_get_wfs_records <- function() {
doc <- ._bcdataenv_$get_capabilities_xml

if (is.null(doc)) {
# Try again to get the xml
doc <- ._bcdataenv_$get_capabilities_xml <- suppressMessages(bcdc_get_wfs_records_xml())
if (is.null(doc)) stop("No access to internet", call. = FALSE)
}

# d1 is the default xml namespace (see xml2::xml_ns(doc))
features <- xml2::xml_find_all(doc, "./d1:FeatureTypeList/d1:FeatureType")

tibble::tibble(
whse_name = gsub("^pub:", "", xml2::xml_text(xml2::xml_find_first(features, "./d1:Name"))),
title = xml2::xml_text(xml2::xml_find_first(features, "./d1:Title")),
cat_url = xml2::xml_attr(xml2::xml_find_first(features, "./d1:MetadataURL"), "href")
)
}

bcdc_single_download_limit <- function() {
doc <- ._bcdataenv_$get_capabilities_xml

if (is.null(doc)) {
message("No access to internet")
return(10000L)
}

count_default_xpath <- "./ows:OperationsMetadata/ows:Operation[@name='GetFeature']/ows:Constraint[@name='CountDefault']"
# Looking globally also works but is slower: ".//ows:Constraint[@name='CountDefault']"
count_defaults <- xml2::xml_find_first(doc, count_default_xpath)
xml2::xml_integer(count_defaults)
}
21 changes: 13 additions & 8 deletions R/describe-feature.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#' - sticky: whether a column can be separated from the record in a Web Service call via the `dplyr::select` method
#' - remote_col_type: class of what is return by the web feature service
#' - local_col_type: the column class in R
#' - column_comments: additional metadata specific to that column
#'
#' @inheritParams bcdc_query_geodata
#' @export
Expand Down Expand Up @@ -53,7 +54,9 @@ bcdc_describe_feature.default <- function(record) {
bcdc_describe_feature.character <- function(record){

if (is_whse_object_name(record)) {
return(feature_helper(record))
bgc <- bcdc_get_wfs_records()
cat_record <- bcdc_get_record(bgc$cat_url[grepl(record, bgc$whse_name)])
return(obj_desc_join(record, cat_record$details))
}

bcdc_describe_feature(bcdc_get_record(record))
Expand All @@ -68,8 +71,8 @@ bcdc_describe_feature.bcdc_record <- function(record){
call. = FALSE
)
}
obj_desc_join(record$layer_name, record$details)

feature_helper(record$layer_name)
}

parse_raw_feature_tbl <- function(query_list){
Expand Down Expand Up @@ -110,15 +113,11 @@ feature_helper <- function(whse_name){
xml_df <- parse_raw_feature_tbl(query_list)
geom_type <- attr(xml_df, "geom_type")

## Identify geometry column and move to last
# xml_df[xml_df$type == geom_type, "name"] <- "geometry"
# xml_df <- dplyr::bind_rows(xml_df[xml_df$name != "geometry",],
# xml_df[xml_df$name == "geometry",])

## Fix logicals
xml_df$nillable = ifelse(xml_df$nillable == "true", TRUE, FALSE)

xml_df <- xml_df[, c("name", "nillable", "type")]

## Add the id_row back into the front
xml_df <- dplyr::bind_rows(id_row, xml_df)
colnames(xml_df) <- c("col_name", "sticky", "remote_col_type")
Expand All @@ -129,4 +128,10 @@ feature_helper <- function(whse_name){




obj_desc_join <- function(x, y) {
dplyr::left_join(
feature_helper(x),
y[,c("column_comments", "column_name")],
by = c("col_name" = "column_name")
)
}
2 changes: 1 addition & 1 deletion R/utils-classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ collect.bcdc_promise <- function(x, ...){
## Determine total number of records for pagination purposes
number_of_records <- bcdc_number_wfs_records(query_list, cli)

if (number_of_records < getOption("bcdata.single_download_limit", default = ._bcdataenv_$bcdata_dl_limit)) {
if (number_of_records < getOption("bcdata.single_download_limit", default = bcdc_single_download_limit())) {
cc <- tryCatch(cli$post(body = query_list, encode = "form"),
error = function(e) {
stop("There was an issue processing this request.
Expand Down
2 changes: 1 addition & 1 deletion R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

.onLoad <- function(...) {
._bcdataenv_$named_get_record_warned <- FALSE # nocov
._bcdataenv_$bcdata_dl_limit <- bcdc_single_download_limit() # nocov
._bcdataenv_$get_capabilities_xml <- bcdc_get_wfs_records_xml() # nocov

}

Expand Down
1 change: 1 addition & 0 deletions man/bcdc_describe_feature.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/bcdc_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 15 additions & 3 deletions tests/testthat/test-describe-feature.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ test_that("Test that bcdc_describe feature returns the correct columns",{
skip_on_cran()
skip_if_net_down()
airport_feature <- bcdc_describe_feature("bc-airports")
expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type"))
expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type", "column_comments"))
})


Expand All @@ -39,14 +39,14 @@ test_that("bcdc_describe_feature accepts a bcdc_record object", {
skip_if_net_down()
airports <- bcdc_get_record('76b1b7a3-2112-4444-857a-afccf7b20da8')
airport_feature <- bcdc_describe_feature(airports)
expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type"))
expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type", "column_comments"))
})

test_that("bcdc_describe_feature accepts BCGW name",{
skip_on_cran()
skip_if_net_down()
airport_feature <- bcdc_describe_feature("WHSE_IMAGERY_AND_BASE_MAPS.GSR_AIRPORTS_SVW")
expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type"))
expect_identical(names(airport_feature), c("col_name", "sticky", "remote_col_type","local_col_type", "column_comments"))
})

test_that("bcdc_describe_feature fails on unsupported classes", {
Expand All @@ -63,3 +63,15 @@ test_that("bcdc_describe_feature fails with non-wfs record", {
"No WMS/WFS resource available for this data set")
})

test_that("bcdc_get_wfs_records works", {
skip_if_net_down()
skip_on_cran()

wfs_records <- bcdc_get_wfs_records()

expect_equal(names(wfs_records), c("whse_name", "title", "cat_url"))
expect_true(nrow(wfs_records) > 0L)
lapply(wfs_records, function(x) {
expect_true(any(nzchar(x, keepNA = TRUE)) & any(!is.na(x)))
})
})
4 changes: 2 additions & 2 deletions tests/testthat/test-options.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("bcdata.single_download_limit", {

})

test_that("bcdata.single_download_limit can be changes",{
test_that("bcdata.single_download_limit can be changed",{
withr::local_options(list(bcdata.single_download_limit = 13))
expect_equal(getOption("bcdata.single_download_limit"), 13)
})
Expand All @@ -43,5 +43,5 @@ test_that("bcdc_single_download_limit returns a number",{
skip_on_cran()
skip_if_net_down()
lt <- bcdc_single_download_limit()
expect_type(lt, "double")
expect_type(lt, "integer")
})

0 comments on commit 11d2c5c

Please sign in to comment.