-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add function for #154 #155
Changes from 4 commits
7a8da45
cbdb80e
673fb58
c5cebed
09e12a9
72bddbc
07d6544
246e06b
20cbe3a
f53f965
3c33004
2222280
dc64469
ceb1147
e65008f
3003a37
bd3e8b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,11 +67,6 @@ | |
#' # (in which case crs = 4326 is assumed) | ||
#' oe_match(c(9.1916, 45.4650)) # Milan, Duomo using CRS = 4326 | ||
#' | ||
#' # Check interactive_ask: | ||
#' if (interactive()) { | ||
#' oe_match("London", interactive_ask = TRUE) | ||
#' } | ||
#' | ||
#' # It returns a warning since Berin is matched both with Benin and Berlin | ||
#' oe_match("Berin", quiet = FALSE) | ||
oe_match = function(place, ...) { | ||
|
@@ -205,7 +200,6 @@ oe_match.character = function( | |
quiet = FALSE, | ||
match_by = "name", | ||
max_string_dist = 1, | ||
interactive_ask = FALSE, | ||
... | ||
) { | ||
# For the moment we support only length-one character vectors | ||
|
@@ -250,6 +244,7 @@ oe_match.character = function( | |
ignore.case = TRUE | ||
) | ||
best_match_id = which(matching_dists == min(matching_dists, na.rm = TRUE)) | ||
|
||
if (length(best_match_id) > 1L) { | ||
warning( | ||
"The input place was matched with multiple geographical zones: ", | ||
|
@@ -265,36 +260,74 @@ oe_match.character = function( | |
# Check if the best match is still too far | ||
high_distance = matching_dists[best_match_id, 1] > max_string_dist | ||
|
||
# If the approximate string distance between the best match is greater than | ||
# the max_string_dist threshold, then: | ||
if (isTRUE(high_distance)) { | ||
if (isFALSE(quiet) || isTRUE(interactive_ask)) { | ||
|
||
# 1. Raise a message | ||
if (isFALSE(quiet)) { | ||
message( | ||
"No exact matching found for place = ", place, ". ", | ||
"Best match is ", best_matched_place[[match_by]], "." | ||
"No exact match found for place = ", place, " and provider = ", | ||
provider, ". ", "Best match is ", best_matched_place[[match_by]], ".", | ||
" \nChecking the other providers." | ||
) | ||
} | ||
if (interactive() && isTRUE(interactive_ask)) { | ||
continue = utils::menu( | ||
choices = c("Yes", "No"), | ||
title = "Do you confirm that this is the right match?" | ||
) | ||
# since the options are Yes/No, then Yes == 1L | ||
if (continue != 1L) { | ||
stop("Search for a closer match in the chosen provider's database.", | ||
call. = FALSE | ||
|
||
# 2. Check the other providers and, if there is an exact match, just return | ||
# the matched value from that other provider: | ||
other_providers = setdiff(oe_available_providers(), provider) | ||
exact_match = FALSE | ||
for (other_provider in other_providers) { | ||
if (match_by %!in% colnames(load_provider_data(other_provider))) { | ||
next | ||
} | ||
all_match_by = load_provider_data(other_provider)[[match_by]] | ||
|
||
if (any(tolower(place) == tolower(all_match_by))) { | ||
exact_match = TRUE | ||
break | ||
} | ||
} | ||
|
||
if (exact_match) { | ||
if (isFALSE(quiet)) { | ||
message( | ||
"I found an exact string match using provider = ", other_provider, | ||
" so I'm going to return that. " | ||
) | ||
} | ||
} else { | ||
stop( | ||
"String distance between best match and the input place is ", | ||
matching_dists[best_match_id, 1], | ||
", while the maximum threshold distance is equal to ", | ||
max_string_dist, | ||
". You should increase the max_string_dist parameter, ", | ||
"look for a closer match in the chosen provider database", | ||
" or consider using a different match_by variable.", | ||
call. = FALSE | ||
|
||
return( | ||
oe_match( | ||
place = place, | ||
provider = other_provider, | ||
match_by = match_by, | ||
quiet = TRUE, | ||
max_string_dist = max_string_dist | ||
) | ||
) | ||
} | ||
|
||
# 3. Otherwise, we can use oe_search to look for the lat/long coordinates of the input place | ||
if (isFALSE(quiet)) { | ||
message( | ||
"No exact match found in any OSM provider data.", | ||
" Searching for the location online." | ||
) | ||
} | ||
|
||
place_online = oe_search(place = place) | ||
# I added Sys.sleep(1) since the usage policty of OSM nominatim (see | ||
# https://operations.osmfoundation.org/policies/nominatim/) requires max 1 | ||
# request per second. | ||
Sys.sleep(1) | ||
return( | ||
oe_match( | ||
place = sf::st_geometry(place_online), | ||
provider = provider, | ||
quiet = quiet | ||
) | ||
) | ||
} | ||
|
||
if (isFALSE(quiet)) { | ||
|
@@ -367,7 +400,7 @@ oe_match_pattern = function( | |
|
||
# Then we extract only the elements of the match_by_column that match the | ||
# input pattern. | ||
match_ID = grep(pattern, match_by_column) | ||
match_ID = grep(pattern, match_by_column, ignore.case = TRUE) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
# If full_row is TRUE than return the corresponding row of provider_data, | ||
# otherwise just the matched pattern. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#' Search for a place and return an sf data frame locating it | ||
#' | ||
#' This (at the moment internal) function provides a simple interface to the | ||
#' [nominatim](https://nominatim.openstreetmap.org) service for finding the | ||
#' geographical location of place names. | ||
#' | ||
#' @param place Text string containing the name of a place the location of | ||
#' which is to be found, such as `"Leeds"` or `"Milan"`. | ||
#' @param base_url The URL of the nominatim server to use. The main | ||
#' open server hosted by OpenStreetMap is the default. | ||
#' @param destfile The name of the destination file where the output | ||
#' of the search query, a `.geojson` file, should be saved. | ||
#' @param ... Extra arguments that are passed to `sf::st_read`. | ||
oe_search = function( | ||
place, | ||
base_url = "https://nominatim.openstreetmap.org", | ||
destfile = tempfile(fileext = ".geojson"), | ||
... | ||
) { | ||
# See https://nominatim.org/release-docs/develop/api/Overview/ for more | ||
# details realted to the URL | ||
u = paste0(base_url, "/search?q=", place, "&limit=1&format=geojson") | ||
utils::download.file(url = u, destfile = destfile, quiet = TRUE) | ||
sf::st_read(destfile, quiet = TRUE, ...) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍