From 0014ef4fb415999a69f1f7aa3c7e5d0d49b07456 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 5 Oct 2023 16:27:07 -0500 Subject: [PATCH] Update basic_search to support words taht must not appear --- hed/models/basic_search.py | 88 ++++++----- tests/models/test_basic_search.py | 243 +++++++++++++++++------------- 2 files changed, 189 insertions(+), 142 deletions(-) diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py index ae47b71e..9301a0cc 100644 --- a/hed/models/basic_search.py +++ b/hed/models/basic_search.py @@ -8,14 +8,18 @@ def find_matching(series, search_string, regex=False): """ Finds lines in the series that match the search string and returns a mask. Syntax Rules: - - '@': Prefixing a term in the search string means the object must appear anywhere within a line. + - '@': Prefixing a term in the search string means the term must appear anywhere within a line. + - '~': Prefixing a term in the search string means the term must NOT appear within a line. - Parentheses: Elements within parentheses must appear in the line with the same level of nesting. - eg: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't - start in the same group. + e.g.: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't + start in the same group. - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis) - An individual term can be arbitrary regex, but it is limited to single continuous words. Notes: + - Specific words only care about their level relative to other specific words, not overall. + e.g. "(A, B)" will find: "A, B", "(A, B)", (A, (C), B)", or ((A, B))" + - If you have no grouping or anywhere words in the search, it assumes all terms are anywhere words. - The format of the series should match the format of the search string, whether it's in short or long form. - To enable support for matching parent tags, ensure that both the series and search string are in long form. @@ -33,19 +37,19 @@ def find_matching(series, search_string, regex=False): if not regex: # Replace *'s with a reasonable value for people who don't know regex search_string = re.sub(r'(?