Skip to content

Commit

Permalink
Ignore markdown codeblocks for tags (dullage#66)
Browse files Browse the repository at this point in the history
* Ignore markdown codeblocks for tags

* Changed `content_ex_tags` to use `content` instead of `content_ex_codeblock`

* Bumped `INDEX_SCHEMA_VERSION`
  • Loading branch information
elmodor authored and Gedulis12 committed Aug 7, 2023
1 parent d841102 commit b258d98
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions flatnotes/flatnotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from logger import logger

MARKDOWN_EXT = ".md"
INDEX_SCHEMA_VERSION = "3"
INDEX_SCHEMA_VERSION = "4"

StemmingFoldingAnalyzer = StemmingAnalyzer() | CharsetFilter(accent_map)

Expand Down Expand Up @@ -170,6 +170,7 @@ def _get_matched_fields(matched_terms):

class Flatnotes(object):
TAGS_RE = re.compile(r"(?:(?<=^#)|(?<=\s#))\w+(?=\s|$)")
CODEBLOCK_RE = re.compile(r"`{1,3}.*?`{1,3}", re.DOTALL)
TAGS_WITH_HASH_RE = re.compile(r"(?:(?<=^)|(?<=\s))#\w+(?=\s|$)")

def __init__(self, dir: str) -> None:
Expand Down Expand Up @@ -211,7 +212,9 @@ def extract_tags(cls, content) -> Tuple[str, Set[str]]:
- The content without the tags.
- A set of tags converted to lowercase."""
content_ex_tags, tags = re_extract(cls.TAGS_RE, content)
content_ex_codeblock = re.sub(cls.CODEBLOCK_RE, '', content)
_, tags = re_extract(cls.TAGS_RE, content_ex_codeblock)
content_ex_tags, _ = re_extract(cls.TAGS_RE, content)
try:
tags = [tag.lower() for tag in tags]
return (content_ex_tags, set(tags))
Expand Down

0 comments on commit b258d98

Please sign in to comment.