-
Notifications
You must be signed in to change notification settings - Fork 0
/
match2.R
95 lines (68 loc) · 2.01 KB
/
match2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
test <- function(string, pattern) {
xxx <- gregexpr(pattern, string)
xxx_lengths <- lengths(xxx)
xxx_lengths[xxx %in% c(-1)] <- 0
yyy <- regmatches(string, xxx)
zzz <- lapply(yyy, function(m) do.call(rbind, regmatches(m, regexec(pattern, m))))
out2 <- lapply(xxx_lengths, function(r) matrix(NA_character_, nrow = r, ncol = max(lengths(zzz))))
out2[!xxx %in% c(-1, NA)] <- zzz[!xxx %in% c(-1, NA)]
out2
}
test <- function(string, pattern) {
xxx <- gregexpr(pattern, string)
str(xxx %in% c(-1))
st_length <- length(string)
xxx_lengths <- lengths(xxx)
xxx_lengths[xxx %in% c(-1)] <- 0
yyy <- regmatches(string, xxx)
zzz <- lapply(yyy, function(m) do.call(rbind, regmatches(m, regexec(pattern, m))))
#str(zzz)
print(which(is.null(zzz)))
zzz[xxx %in% c(-1)] <- matrix(NA_character_, nrow = 0, ncol = max(lengths(zzz)))
#out2 <- lapply(xxx_lengths, function(r) matrix(NA_character_, nrow = r, ncol = max(lengths(zzz))))
#out2[!xxx %in% c(-1, NA)] <- zzz[!xxx %in% c(-1, NA)]
#out2
zzz
}
st <- list("amber johnson", "anhar link ari")
re <- "\\ba[a-z]+\\b"
stringr::str_match_all(st, re)
test(st, re)
microbenchmark::microbenchmark(
stringr::str_match_all(st, re),
test(st, re)
)
st <- c("xy1234yz98xy567", "123xy098")
re <- "xy(\\d+)"
stringr::str_match_all(st, re)
test(st, re)
microbenchmark::microbenchmark(
stringr::str_match_all(st, re),
test(st, re)
)
st <- c("aristotle", "plato", "epictetus", "seneca the younger", "epicurus", "marcus aurelius")
re <- ".*us"
stringr::str_match_all(st, re)
#gregexpr(re, st)
test(st, re)
microbenchmark::microbenchmark(
stringr::str_match_all(st, re),
test(st, re)
)
st <- c(
"apple",
"219 733 8965",
"329-293-8753",
"Work: 579-499-7527; Home: 543.355.3679"
)
re <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
test(st, re)
#gregexpr(re, st)
#new_st <- regmatches(st, gregexpr(re, st))
#new_st
#regexec(re, new_st)
stringr::str_match_all(st, re)
microbenchmark::microbenchmark(
test2(st, re),
stringr::str_match_all(st, re)
)