Skip to content

Commit

Permalink
Merge pull request #65 from rutar-forks/prompt-parse-split
Browse files Browse the repository at this point in the history
  • Loading branch information
pascalkuthe authored Dec 7, 2024
2 parents f13f2dc + eb2e46f commit f64f3ee
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 3 deletions.
15 changes: 15 additions & 0 deletions matcher/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@ let matches = Pattern::new("^foo bar", CaseMatching::Ignore, Normalization::Smar
assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]);
```
Word segmentation is performed automatically on any unescaped character for which [`is_whitespace`](char::is_whitespace) returns true.
This is relevant, for instance, with non-english keyboard input.
```
# use nucleo_matcher::pattern::{Atom, Pattern, Normalization, CaseMatching};
assert_eq!(
// double-width 'Ideographic Space', i.e. `'\u{3000}'`
Pattern::parse("ほげ ふが", CaseMatching::Smart, Normalization::Smart).atoms,
vec![
Atom::parse("ほげ", CaseMatching::Smart, Normalization::Smart),
Atom::parse("ふが", CaseMatching::Smart, Normalization::Smart),
],
);
```
If word segmentation is also not desired, a single `Atom` can be constructed directly.
```
Expand Down
2 changes: 1 addition & 1 deletion matcher/src/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
let mut saw_backslash = false;
pattern.split(move |c| {
saw_backslash = match c {
' ' if !saw_backslash => return true,
c if c.is_whitespace() && !saw_backslash => return true,
'\\' => true,
_ => false,
};
Expand Down
37 changes: 36 additions & 1 deletion matcher/src/pattern/tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::pattern::{Atom, AtomKind, CaseMatching, Normalization};
use crate::pattern::{Atom, AtomKind, CaseMatching, Normalization, Pattern};

#[test]
fn negative() {
Expand Down Expand Up @@ -112,3 +112,38 @@ fn escape() {
assert_eq!(pat.needle.to_string(), "^foo$");
assert_eq!(pat.kind, AtomKind::Substring);
}

#[test]
fn pattern_atoms() {
assert_eq!(
Pattern::parse("a b", CaseMatching::Ignore, Normalization::Smart).atoms,
vec![
Atom::parse("a", CaseMatching::Ignore, Normalization::Smart),
Atom::parse("b", CaseMatching::Ignore, Normalization::Smart),
]
);

assert_eq!(
Pattern::parse("a\n b", CaseMatching::Ignore, Normalization::Smart).atoms,
vec![
Atom::parse("a", CaseMatching::Ignore, Normalization::Smart),
Atom::parse("b", CaseMatching::Ignore, Normalization::Smart),
]
);

assert_eq!(
Pattern::parse(" a b\r\n", CaseMatching::Ignore, Normalization::Smart).atoms,
vec![
Atom::parse("a", CaseMatching::Ignore, Normalization::Smart),
Atom::parse("b", CaseMatching::Ignore, Normalization::Smart),
]
);

assert_eq!(
Pattern::parse("ほ げ", CaseMatching::Smart, Normalization::Smart).atoms,
vec![
Atom::parse("ほ", CaseMatching::Smart, Normalization::Smart),
Atom::parse("げ", CaseMatching::Smart, Normalization::Smart),
],
)
}
2 changes: 1 addition & 1 deletion src/boxcar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ pub struct ParIter<'v, T> {
start: u32,
vec: &'v Vec<T>,
}
impl<'v, T> ParIter<'v, T> {
impl<T> ParIter<'_, T> {
pub fn end(&self) -> u32 {
self.end
}
Expand Down

0 comments on commit f64f3ee

Please sign in to comment.