diff --git a/CHANGELOG.md b/CHANGELOG.md index 593d139..f616a62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.19.0 + +### Breaking Changes + +- Update to tokenizers v0.21 + ## v0.18.1 ### What's New diff --git a/Cargo.lock b/Cargo.lock index ab96d3b..5acd492 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2019,7 +2019,7 @@ dependencies = [ [[package]] name = "semantic-text-splitter" -version = "0.18.1" +version = "0.19.0" dependencies = [ "pyo3", "text-splitter", @@ -2286,7 +2286,7 @@ dependencies = [ [[package]] name = "text-splitter" -version = "0.18.1" +version = "0.19.0" dependencies = [ "ahash", "auto_enums", diff --git a/Cargo.toml b/Cargo.toml index 876a7ac..afb2f9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ members = ["bindings/*"] [workspace.package] -version = "0.18.1" +version = "0.19.0" authors = ["Ben Brandt "] edition = "2021" description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens, and is callable from Rust and Python." diff --git a/README.md b/README.md index 8695a56..24b31f8 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ There are lots of methods of determining sentence breaks, all to varying degrees | ------------------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `rust_tokenizers` | `^8.0.0` | Enables `(Text/Markdown)Splitter::new` to take any of the provided tokenizers as an argument. | | `tiktoken-rs` | `^0.6.0` | Enables `(Text/Markdown)Splitter::new` to take `tiktoken_rs::CoreBPE` as an argument. This is useful for splitting text for `OpenAI` models. | -| `tokenizers` | `^0.20.0` | Enables `(Text/Markdown)Splitter::new` to take `tokenizers::Tokenizer` as an argument. This is useful for splitting text models that have a Hugging Face-compatible tokenizer. | +| `tokenizers` | `^0.21.0` | Enables `(Text/Markdown)Splitter::new` to take `tokenizers::Tokenizer` as an argument. This is useful for splitting text models that have a Hugging Face-compatible tokenizer. | ## Inspiration