Skip to content

Commit

Permalink
fix: Max encoded offset optimization was potentially using the wrong …
Browse files Browse the repository at this point in the history
…byte offset

Potentially the fallback max byte offset was larger than it should have been. Now it properly uses whichever is smaller, either from the fallback or the semantic level.
  • Loading branch information
benbrandt committed May 7, 2024
1 parent 8cb0782 commit 0fc34dd
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 52 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## v0.13.1

Fix a bug in the fallback logic to make sure we are still respecting the maximum bytes we should be searching in. Again, this only affects Markdown splitting at very small sizes.

## v0.13.0

### What's New / Breaking Changes
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
members = ["bindings/*"]

[workspace.package]
version = "0.13.0"
version = "0.13.1"
authors = ["Ben Brandt <benjamin.j.brandt@gmail.com>"]
edition = "2021"
description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python."
Expand Down
5 changes: 4 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,10 @@ where
}),
);

let max_encoded_offset = fallback_max_encoded_offset.or(max_encoded_offset);
let max_encoded_offset = match (fallback_max_encoded_offset, max_encoded_offset) {
(Some(fallback), Some(max)) => Some(fallback.min(max)),
(fallback, max) => fallback.or(max),
};

let sections = semantic_level
.unwrap_or(FallbackLevel::Char)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14644,7 +14644,8 @@ expression: chunks
- "=-)"
- list_item
- paragraph
- "\"Qui *quodsi"
- "\"Qui"
- "*quodsi"
- "iracundia*\""
- "-> list_item"
- "-> paragraph"
Expand Down Expand Up @@ -14902,7 +14903,8 @@ expression: chunks
- "=-)"
- "-> list_item"
- "-> paragraph"
- "\"Qui *quodsi"
- "\"Qui"
- "*quodsi"
- "iracundia*\"\n```"
- "The fourth line,"
- "``` markdown"
Expand Down Expand Up @@ -14939,7 +14941,8 @@ expression: chunks
- "=-)"
- list_item
- paragraph
- "\"Qui *quodsi"
- "\"Qui"
- "*quodsi"
- "iracundia*\""
- "-> list_item"
- "-> paragraph"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4545,7 +4545,7 @@ expression: chunks
- "````````````````"
- "```````` example"
- ~~~~ ruby
- ruby startline=3
- ruby startline=
- "=3 $%@#$"
- def foo(x)
- " return 3\nend"
Expand Down Expand Up @@ -11096,7 +11096,7 @@ expression: chunks
- "baz`\n."
- "<p><code>foo"
- code>foo bar
- bar baz</code><
- bar baz</code>
- "</code></p>"
- "````````````````"
- "````````````````"
Expand Down Expand Up @@ -18599,6 +18599,7 @@ expression: chunks
- "=-)"
- list_item
- paragraph
- "\"Qui"
- "\"Qui *quodsi"
- quodsi iracundia
- "*\""
Expand Down Expand Up @@ -18917,6 +18918,7 @@ expression: chunks
- "=-)"
- "-> list_item"
- "-> paragraph"
- "\"Qui"
- "\"Qui *quodsi"
- quodsi iracundia
- "*\"\n```"
Expand Down Expand Up @@ -18961,6 +18963,7 @@ expression: chunks
- "=-)"
- list_item
- paragraph
- "\"Qui"
- "\"Qui *quodsi"
- quodsi iracundia
- "*\""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -661,22 +661,23 @@ expression: chunks
- "3021"
- namespace
- "MyApplication\n{"
- "[Obsolete(\"...\")"
- "]"
- "class Program :"
- "IInterface\n {"
- "[Obsolete(\"..."
- "\")]"
- class Program
- ": IInterface"
- "{"
- public static
- List<int>
- JustDoIt(int
- "count)\n {"
- Console.WriteLin
- "e($\"Hello {Name}"
- "!\");"
- return new List<
- "int>(new int[] {"
- "1, 2, 3 })"
- " }\n }"
- "}\n```"
- return
- new List<int>(
- "new int[] { 1, 2"
- ", 3 })\n }"
- " }\n}\n```"
- "```css"
- "@font-face {"
- "font-family:"
Expand Down Expand Up @@ -704,14 +705,14 @@ expression: chunks
- initHighlight(
- "block, cls) {"
- "try {"
- if (cls.search(/
- "\\bno\\-highlight\\"
- b/) != -1)
- if (cls.search(
- "/\\bno\\-highlight"
- "\\b/) != -1)"
- return process(
- "block, true,"
- "0x0F) +"
- "` class=\"${cls}\""
- "`;"
- "` class="
- "\"${cls}\"`;"
- "} catch (e) {"
- /* handle
- "exception */\n }"
Expand All @@ -723,9 +724,9 @@ expression: chunks
- checkCondition(
- "classes[i]) ==="
- undefined)
- "console.log('"
- "undefined');\n }"
- "}"
- console.log(
- "'undefined');"
- " }\n}"
- export $
- initHighlight;
- "```"
Expand All @@ -746,8 +747,8 @@ expression: chunks
- "Factory\n{"
- abstract
- function test();
- public static $
- st1 = 1;
- public static
- $st1 = 1;
- "const ME = \"Yo\";"
- var $list = NULL
- ;
Expand All @@ -763,8 +764,9 @@ expression: chunks
- "= array(), $uri"
- "= 'http')\n {"
- echo __METHOD__;
- "$uri = explode('"
- ":', $uri, 0b10);"
- $uri = explode(
- "':', $uri, 0b10)"
- ;
- $schemeSpecific
- "= isset($uri[1])"
- "? $uri[1] : '';"
Expand All @@ -776,19 +778,20 @@ expression: chunks
- if (!
- ctype_alnum($
- "scheme)) {"
- throw new
- throw
- new
- Zend_Uri_Excepti
- "on('Illegal"
- "scheme');"
- "}"
- $this->var = 0 -
- "self::$st;"
- $this->list =
- "list(Array(\"1\"=>"
- "2, 2=>self::ME,"
- "3 => \\Location\\"
- "Web\\URI::class))"
- ;
- $this->var
- "= 0 - self::$st;"
- $this->list
- "= list(Array(\"1\""
- "=> 2, 2=>self::"
- "ME, 3 => \\"
- "Location\\Web\\URI"
- "::class));"
- "return ["
- "'uri' => $uri,"
- "'value' => null,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -818,9 +818,10 @@ expression: chunks
- "414, 3021"
- namespace
- " MyApplication\n{"
- "[Obsolete(\"...\")"
- "[Obsolete(\"..."
- "(\"...\")]"
- "class Program :"
- class Program
- "Program :"
- ": IInterface"
- "{"
- public static
Expand All @@ -832,6 +833,7 @@ expression: chunks
- "WriteLine($\""
- "($\"Hello {Name}!"
- "{Name}!\");"
- return
- return new List<
- List<int>(new
- "int>(new int[] {"
Expand Down Expand Up @@ -867,14 +869,15 @@ expression: chunks
- $initHighlight(
- "(block, cls) {"
- "try {"
- if (cls.search(/
- if (cls.search(
- "(/\\bno\\-"
- "/\\bno\\-highlight"
- "\\b/) != -1)"
- return process(
- "process(block,"
- "(block, true,"
- ", true, 0x0F) +"
- "` class="
- "` class=\"${cls}\""
- "\"${cls}\"`;"
- "} catch (e) {"
Expand All @@ -890,7 +893,7 @@ expression: chunks
- "(classes[i]) ==="
- "]) === undefined"
- )
- "console.log('"
- console.log(
- "('undefined');"
- " }\n}"
- export $
Expand All @@ -914,7 +917,7 @@ expression: chunks
- " Factory\n{"
- abstract
- function test();
- public static $
- public static
- static $st1 = 1;
- "const ME = \"Yo\";"
- var $list = NULL
Expand All @@ -933,7 +936,8 @@ expression: chunks
- "(), $uri = 'http"
- " = 'http')\n {"
- echo __METHOD__;
- "$uri = explode('"
- $uri = explode(
- "explode(':', $"
- "(':', $uri, 0b10"
- ", 0b10);"
- $schemeSpecific
Expand All @@ -948,15 +952,17 @@ expression: chunks
- if (!
- ctype_alnum($
- "($scheme)) {"
- throw
- throw new
- Zend_Uri_Excepti
- "_Exception('"
- "('Illegal scheme"
- "scheme');"
- "}"
- $this->var = 0 -
- "= 0 - self::$st;"
- $this->list =
- $this->var
- "->var = 0 - self"
- "0 - self::$st;"
- $this->list
- "->list = list("
- "= list(Array(\"1\""
- "(\"1\"=> 2, 2=>"
Expand Down Expand Up @@ -1089,9 +1095,9 @@ expression: chunks
- "| Right-aligned"
- "-aligned |"
- "| :--- |"
- "| :---:"
- "| :--"
- ":---: |"
- "| ---:"
- "| -"
- "---: |"
- "| git status |"
- "| git status"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ expression: chunks
- "ctype_alnum($scheme)) {"
- " throw new Zend_Uri_Exception('Illegal scheme');\n }"
- "$this->var = 0 - self::$st;"
- "$this->list = list(Array(\"1\"=> 2, 2=>self"
- "::ME, 3 => \\Location\\Web\\URI::class));"
- "$this->list = list(Array(\"1\"=> 2, 2=>"
- "self::ME, 3 => \\Location\\Web\\URI::class));"
- " return [\n 'uri' => $uri,"
- " 'value' => null,\n ];\n }\n}"
- "echo URI::ME . URI::$st1;"
Expand Down

0 comments on commit 0fc34dd

Please sign in to comment.