Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function for #154 #155

Merged
merged 17 commits into from
Jan 13, 2021
11 changes: 3 additions & 8 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@
#' Check Details and Examples in [oe_match()] to understand why this parameter
#' is important. Ignored if `place` is not a character vector since the
#' matching is performed through a spatial operation.
#' @param interactive_ask Boolean. If `TRUE` the function creates an interactive
#' menu in case the best match is further than `max_string_dist`, otherwise it
#' fails with `stop()`. Check details and examples in [oe_match()] to
#' understand why this parameter is important. Ignored if `place` is not a
#' character vector since the matching is performed through a spatial
#' operation.
#' @param download_directory Where to download the file containing the OSM data?
#' By default this is equal to [oe_download_directory()], which is equal to
#' [`tempdir()`] and it changes each time you restart R. You can set a
Expand Down Expand Up @@ -128,6 +122,9 @@
#' west_yorkshire = oe_get("West Yorkshire", quiet = FALSE)
#' # If you run it again, the function will not download the file or convert it
#' west_yorkshire = oe_get("West Yorkshire", quiet = FALSE)
#' # Match with place name
#' oe_get("Milan", quiet = FALSE) # Warning: the .pbf file is 400MB
#'
#' # Match with coordinates (any EPSG)
#' milan_duomo = sf::st_sfc(sf::st_point(c(1514924, 5034552)), crs = 3003)
#' oe_get(milan_duomo, quiet = FALSE) # Warning: the .pbf file is 400MB
Expand All @@ -147,7 +144,6 @@ oe_get = function(
provider = "geofabrik",
match_by = "name",
max_string_dist = 1,
interactive_ask = FALSE,
download_directory = oe_download_directory(),
force_download = FALSE,
max_file_size = 5e+8,
Expand Down Expand Up @@ -175,7 +171,6 @@ oe_get = function(
provider = provider,
match_by = match_by,
max_string_dist = max_string_dist,
interactive_ask = interactive_ask,
quiet = quiet
)

Expand Down
91 changes: 62 additions & 29 deletions R/match.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,6 @@
#' # (in which case crs = 4326 is assumed)
#' oe_match(c(9.1916, 45.4650)) # Milan, Duomo using CRS = 4326
#'
#' # Check interactive_ask:
#' if (interactive()) {
#' oe_match("London", interactive_ask = TRUE)
#' }
#'
#' # It returns a warning since Berin is matched both with Benin and Berlin
#' oe_match("Berin", quiet = FALSE)
oe_match = function(place, ...) {
Expand Down Expand Up @@ -205,7 +200,6 @@ oe_match.character = function(
quiet = FALSE,
match_by = "name",
max_string_dist = 1,
interactive_ask = FALSE,
...
) {
# For the moment we support only length-one character vectors
Expand Down Expand Up @@ -250,6 +244,7 @@ oe_match.character = function(
ignore.case = TRUE
)
best_match_id = which(matching_dists == min(matching_dists, na.rm = TRUE))

if (length(best_match_id) > 1L) {
warning(
"The input place was matched with multiple geographical zones: ",
Expand All @@ -265,36 +260,74 @@ oe_match.character = function(
# Check if the best match is still too far
high_distance = matching_dists[best_match_id, 1] > max_string_dist

# If the approximate string distance between the best match is greater than
# the max_string_dist threshold, then:
if (isTRUE(high_distance)) {
if (isFALSE(quiet) || isTRUE(interactive_ask)) {

# 1. Raise a message
if (isFALSE(quiet)) {
message(
"No exact matching found for place = ", place, ". ",
"Best match is ", best_matched_place[[match_by]], "."
"No exact match found for place = ", place, " and provider = ",
provider, ". ", "Best match is ", best_matched_place[[match_by]], ".",
" \nChecking the other providers."
)
}
if (interactive() && isTRUE(interactive_ask)) {
continue = utils::menu(
choices = c("Yes", "No"),
title = "Do you confirm that this is the right match?"
)
# since the options are Yes/No, then Yes == 1L
if (continue != 1L) {
stop("Search for a closer match in the chosen provider's database.",
call. = FALSE

# 2. Check the other providers and, if there is an exact match, just return
# the matched value from that other provider:
other_providers = setdiff(oe_available_providers(), provider)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

exact_match = FALSE
for (other_provider in other_providers) {
if (match_by %!in% colnames(load_provider_data(other_provider))) {
next
}
all_match_by = load_provider_data(other_provider)[[match_by]]

if (any(tolower(place) == tolower(all_match_by))) {
exact_match = TRUE
break
}
}

if (exact_match) {
if (isFALSE(quiet)) {
message(
"I found an exact string match using provider = ", other_provider,
" so I'm going to return that. "
)
}
} else {
stop(
"String distance between best match and the input place is ",
matching_dists[best_match_id, 1],
", while the maximum threshold distance is equal to ",
max_string_dist,
". You should increase the max_string_dist parameter, ",
"look for a closer match in the chosen provider database",
" or consider using a different match_by variable.",
call. = FALSE

return(
oe_match(
place = place,
provider = other_provider,
match_by = match_by,
quiet = TRUE,
max_string_dist = max_string_dist
)
)
}

# 3. Otherwise, we can use oe_search to look for the lat/long coordinates of the input place
if (isFALSE(quiet)) {
message(
"No exact match found in any OSM provider data.",
" Searching for the location online."
)
}

place_online = oe_search(place = place)
# I added Sys.sleep(1) since the usage policty of OSM nominatim (see
# https://operations.osmfoundation.org/policies/nominatim/) requires max 1
# request per second.
Sys.sleep(1)
return(
oe_match(
place = sf::st_geometry(place_online),
provider = provider,
quiet = quiet
)
)
}

if (isFALSE(quiet)) {
Expand Down Expand Up @@ -367,7 +400,7 @@ oe_match_pattern = function(

# Then we extract only the elements of the match_by_column that match the
# input pattern.
match_ID = grep(pattern, match_by_column)
match_ID = grep(pattern, match_by_column, ignore.case = TRUE)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


# If full_row is TRUE than return the corresponding row of provider_data,
# otherwise just the matched pattern.
Expand Down
25 changes: 25 additions & 0 deletions R/search.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#' Search for a place and return an sf data frame locating it
#'
#' This (at the moment internal) function provides a simple interface to the
#' [nominatim](https://nominatim.openstreetmap.org) service for finding the
#' geographical location of place names.
#'
#' @param place Text string containing the name of a place the location of
#' which is to be found, such as `"Leeds"` or `"Milan"`.
#' @param base_url The URL of the nominatim server to use. The main
#' open server hosted by OpenStreetMap is the default.
#' @param destfile The name of the destination file where the output
#' of the search query, a `.geojson` file, should be saved.
#' @param ... Extra arguments that are passed to `sf::st_read`.
oe_search = function(
place,
base_url = "https://nominatim.openstreetmap.org",
destfile = tempfile(fileext = ".geojson"),
...
) {
# See https://nominatim.org/release-docs/develop/api/Overview/ for more
# details realted to the URL
u = paste0(base_url, "/search?q=", place, "&limit=1&format=geojson")
utils::download.file(url = u, destfile = destfile, quiet = TRUE)
sf::st_read(destfile, quiet = TRUE, ...)
}
12 changes: 10 additions & 2 deletions R/update.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,14 @@ oe_update = function(

# Check if the .gpkg files should be deleted
if (isTRUE(delete_gpkg)) {
cat("The .gpkg files are going to be removed.\n")
if (isFALSE(quiet)) {
cat("The .gpkg files are going to be removed.\n")
}
file.remove(
file.path(download_directory, grep("\\.gpkg", all_files, value = TRUE))
)
if (isFALSE(quiet)) {
cat("\nThe .gpkg files in download_directory were removed.\n")
cat("The .gpkg files in download_directory were removed.\n")
}
}

Expand Down Expand Up @@ -169,6 +171,11 @@ oe_update = function(
id = paste0("us/", id)
}

# Print a message
if (isFALSE(quiet)) {
message("The function is processing the file ", file, ".")
}

# Update the .osm.pbf files, skipping the vectortranslate step
oe_get(
place = id,
Expand All @@ -178,6 +185,7 @@ oe_update = function(
download_only = TRUE,
skip_vectortranslate = TRUE,
max_file_size = max_file_size,
quiet = quiet,
...
)
}
Expand Down
11 changes: 3 additions & 8 deletions man/oe_get.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 0 additions & 13 deletions man/oe_match.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/oe_search.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions tests/testthat/test-find.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@ test_that("oe_find: simplest example works", {
its_leeds_find <- oe_find(
"ITS Leeds",
provider = "test",
download_if_missing = TRUE
download_if_missing = TRUE,
quiet = TRUE
)
expect_type(its_leeds_find, "character")

file.remove(its_leeds_find)
its_leeds_find <- oe_find(
"ITS Leeds",
provider = "test",
download_if_missing = TRUE
download_if_missing = TRUE,
quiet = TRUE
)
expect_type(its_leeds_find, "character")
expect_length(its_leeds_find, 2)
Expand Down
7 changes: 5 additions & 2 deletions tests/testthat/test-get.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
test_that("oe_get: simplest examples work", {
expect_s3_class(oe_get("ITS Leeds", provider = "test"), "sf")
expect_s3_class(oe_get("ITS Leeds", provider = "test", quiet = TRUE), "sf")
})

test_that("vectortranslate is not skipped if force_download is TRUE", {
Expand All @@ -17,13 +17,16 @@ test_that("vectortranslate is not skipped if force_download is TRUE", {
"-lco", "GEOMETRY_NAME=geometry",
"-where", "highway IN ('service')",
"lines"
))
),
quiet = TRUE
)

# Download it again
its_leeds <- oe_get(
"ITS Leeds",
download_directory = my_tempdir,
force_vectortranslate = TRUE,
quiet = TRUE
)

expect_gte(nrow(its_leeds), nrow(small_its_leeds))
Expand Down
6 changes: 4 additions & 2 deletions tests/testthat/test-match.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ test_that("oe_match: numeric input", {
expect_match(oe_match(c(9.1916, 45.4650))$url, "italy")
})

test_that("oe_match: different providers, match_by or max_string dist args", {
test_that("oe_match: different providers, match_by or max_string_dist args", {
expect_error(oe_match("Italy", provider = "XXX"))
expect_error(oe_match("Italy", match_by = "XXX"))
expect_match(oe_match("RU", match_by = "iso3166_1_alpha2")$url, "russia")

expect_error(oe_match("Isle Wight"))
# expect_null(oe_match("Isle Wight"))
# The previous test was removed in #155 since now oe_match calls nominatim servers in
# case it doesn't find an exact match, so it should never return NULL
expect_match(oe_match("Isle Wight", max_string_dist = 3)$url, "isle-of-wight")
expect_message(oe_match("London", max_string_dist = 3, quiet = FALSE))

Expand Down
Loading