From 0d02a2754d99870a6b903431cae5a0c449c26e4b Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Fri, 15 Nov 2024 09:14:48 -0800 Subject: [PATCH] Automatically segment rich text There was a new rule published today that had an abstract longer than 2,000 characters! Rich text blocks have to be broken up into segments of 2,000 characters or fewer, so now the rich text helper does that automatically. --- rule_scout.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/rule_scout.py b/rule_scout.py index 97764ac..debbbce 100644 --- a/rule_scout.py +++ b/rule_scout.py @@ -388,10 +388,20 @@ def main() -> None: print('Done!') -def notion_rich_text(text: str | None): +def notion_rich_text(text: str | None) -> dict: + segments = [] + if text: + segment_length = 2000 + max_segments = 100 + segments = [] + remainder = text + while remainder and len(segments) < max_segments: + segments.append(notion_text(remainder[:segment_length])) + remainder = remainder[segment_length:] + return { 'type': 'rich_text', - 'rich_text': None if text is None else [notion_text(text)] + 'rich_text': segments }