diff --git a/CHANGELOG.md b/CHANGELOG.md index b045691..593d139 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.18.1 + +### What's New + +- Ensure tokenizer sizers with truncation parameters count their overflow encodings + ## v0.18.0 ### Breaking diff --git a/Cargo.lock b/Cargo.lock index a58cee9..dc6a064 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1882,7 +1882,7 @@ dependencies = [ [[package]] name = "semantic-text-splitter" -version = "0.18.0" +version = "0.18.1" dependencies = [ "pyo3", "text-splitter", @@ -2132,7 +2132,7 @@ dependencies = [ [[package]] name = "text-splitter" -version = "0.18.0" +version = "0.18.1" dependencies = [ "ahash", "auto_enums", diff --git a/Cargo.toml b/Cargo.toml index 547646b..d48738f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ members = ["bindings/*"] [workspace.package] -version = "0.18.0" +version = "0.18.1" authors = ["Ben Brandt "] edition = "2021" description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python." @@ -66,7 +66,7 @@ either = "1.6" itertools = "0.13" once_cell = "1.20" pulldown-cmark = { version = "0.12", default-features = false, optional = true } -regex = "1.11.1" +regex = "1.10.6" rust_tokenizers = { version = "8", optional = true } strum = { version = "0.26", features = ["derive"] } thiserror = "1.0.65"