Skip to content

Commit

Permalink
adjust regex in get-key-values.R since there might be empty values or…
Browse files Browse the repository at this point in the history
… new-line characters in OSM values

Fix #250
  • Loading branch information
agila5 committed Mar 16, 2022
1 parent b1c4988 commit 12f7e91
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
4 changes: 2 additions & 2 deletions R/get-key-values.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ get_keys = function(text, values = FALSE, which_keys = NULL) {
# 1. Follow ^" or ," (where ^ denotes the start of a line)
# and
# 2. Precede the character "=>" (i.e. the delimiter)
pattern = '(?<=^\\"|,\\").+?(?=\\"=>\\")',
pattern = '(?<=^\\"|\\",\\").+?(?=\\"=>\\")',
text = text,
perl = TRUE
)
Expand All @@ -266,7 +266,7 @@ get_keys = function(text, values = FALSE, which_keys = NULL) {
# 4. Otherwise, we need to extract the values. I will use a regex that is
# analogous to the previous query (inverting the lookahead and lookbehind)
regexp_values = gregexpr(
pattern = '(?<=(\\"=>\\")).+?(?=\\"$|\\",)',
pattern = '(?<=(\\"=>\\")).*?(?=\\"$|\\",\\"|,\\")',
text = text,
perl = TRUE
)
Expand Down
8 changes: 8 additions & 0 deletions tests/testthat/test-get-keys.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ test_that("get_keys (values): more complicated examples", {
expected = list(C = c("D", "E")),
ignore_attr = TRUE
)

# there might be empty values or newlines, see
# https://github.com/ropensci/osmextract/issues/250
expect_identical(
object = unclass(get_keys('"A"=>"\n","B"=>"C","D"=>""', values = TRUE)),
expected = list(A = "", B = "C", D = ""),
ignore_attr = TRUE
)
})

test_that("oe_get_keys: simplest examples work", {
Expand Down

0 comments on commit 12f7e91

Please sign in to comment.