diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a416e0f..a11cc42 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -213,7 +213,10 @@ jobs: cache-target: release bins: cargo-fuzz - - run: cargo fuzz run --fuzz-dir crates/fuzz compare_to_serde --release -- -max_total_time=300s + - run: | + # cargo fuzz defaults to musl targets, which is seeming incomatible with sanitizers according to CI failures + RUST_TARGET=$(rustc -Vv | grep host | cut -d ' ' -f 2) + cargo fuzz run --target=$RUST_TARGET --fuzz-dir crates/fuzz compare_to_serde --release -- -max_total_time=300s fuzz-skip: name: fuzz skip @@ -230,7 +233,10 @@ jobs: cache-target: release bins: cargo-fuzz - - run: cargo fuzz run --fuzz-dir crates/fuzz compare_skip --release -- -max_total_time=300s + - run: | + # cargo fuzz defaults to musl targets, which is seeming incomatible with sanitizers according to CI failures + RUST_TARGET=$(rustc -Vv | grep host | cut -d ' ' -f 2) + cargo fuzz run --target=$RUST_TARGET --fuzz-dir crates/fuzz compare_skip --release -- -max_total_time=300s lint: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 7c39a4a..d2b0781 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ resolver = "2" [workspace.package] authors = ["Samuel Colvin "] -version = "0.4.2" +version = "0.5.0" edition = "2021" license = "MIT" keywords = ["JSON", "parsing", "deserialization", "iter"] @@ -28,4 +28,5 @@ inherits = "release" debug = true [workspace.dependencies] -pyo3 = { version = "0.21.0", default-features = false} +pyo3 = { version = "0.22.0" } +pyo3-build-config = { version = "0.22.0" } diff --git a/README.md b/README.md index cfe6464..b73722f 100644 --- a/README.md +++ b/README.md @@ -20,19 +20,17 @@ See [the `JsonValue` docs](https://docs.rs/jiter/latest/jiter/enum.JsonValue.htm ```rust use jiter::JsonValue; -fn main() { - let json_data = r#" - { - "name": "John Doe", - "age": 43, - "phones": [ - "+44 1234567", - "+44 2345678" - ] - }"#; - let json_value = JsonValue::parse(json_data.as_bytes(), true).unwrap(); - println!("{:#?}", json_value); -} +let json_data = r#" + { + "name": "John Doe", + "age": 43, + "phones": [ + "+44 1234567", + "+44 2345678" + ] + }"#; +let json_value = JsonValue::parse(json_data.as_bytes(), true).unwrap(); +println!("{:#?}", json_value); ``` returns: @@ -59,35 +57,33 @@ To use [Jiter](https://docs.rs/jiter/latest/jiter/struct.Jiter.html), you need t ```rust use jiter::{Jiter, NumberInt, Peek}; -fn main() { - let json_data = r#" - { - "name": "John Doe", - "age": 43, - "phones": [ - "+44 1234567", - "+44 2345678" - ] - }"#; - let mut jiter = Jiter::new(json_data.as_bytes()); - assert_eq!(jiter.next_object().unwrap(), Some("name")); - assert_eq!(jiter.next_str().unwrap(), "John Doe"); - assert_eq!(jiter.next_key().unwrap(), Some("age")); - assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); - assert_eq!(jiter.next_key().unwrap(), Some("phones")); - assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); - // we know the next value is a string as we just asserted so - assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); - assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); - // same again - assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); - // next we'll get `None` from `array_step` as the array is finished - assert_eq!(jiter.array_step().unwrap(), None); - // and `None` from `next_key` as the object is finished - assert_eq!(jiter.next_key().unwrap(), None); - // and we check there's nothing else in the input - jiter.finish().unwrap(); -} +let json_data = r#" + { + "name": "John Doe", + "age": 43, + "phones": [ + "+44 1234567", + "+44 2345678" + ] + }"#; +let mut jiter = Jiter::new(json_data.as_bytes()); +assert_eq!(jiter.next_object().unwrap(), Some("name")); +assert_eq!(jiter.next_str().unwrap(), "John Doe"); +assert_eq!(jiter.next_key().unwrap(), Some("age")); +assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); +assert_eq!(jiter.next_key().unwrap(), Some("phones")); +assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); +// we know the next value is a string as we just asserted so +assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); +assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); +// same again +assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); +// next we'll get `None` from `array_step` as the array is finished +assert_eq!(jiter.array_step().unwrap(), None); +// and `None` from `next_key` as the object is finished +assert_eq!(jiter.next_key().unwrap(), None); +// and we check there's nothing else in the input +jiter.finish().unwrap(); ``` ## Benchmarks diff --git a/crates/jiter-python/Cargo.toml b/crates/jiter-python/Cargo.toml index 906a86b..86efab4 100644 --- a/crates/jiter-python/Cargo.toml +++ b/crates/jiter-python/Cargo.toml @@ -10,7 +10,7 @@ homepage = {workspace = true} repository = {workspace = true} [dependencies] -pyo3 = { workspace = true, default-features = true, features = ["num-bigint", "auto-initialize"] } +pyo3 = { workspace = true, features = ["num-bigint"] } jiter = { path = "../jiter", features = ["python"] } [features] diff --git a/crates/jiter-python/src/lib.rs b/crates/jiter-python/src/lib.rs index 7cd08ed..7983311 100644 --- a/crates/jiter-python/src/lib.rs +++ b/crates/jiter-python/src/lib.rs @@ -1,43 +1,5 @@ use std::sync::OnceLock; -use pyo3::prelude::*; - -use jiter::{map_json_error, LosslessFloat, PartialMode, PythonParse, StringCacheMode}; - -#[allow(clippy::fn_params_excessive_bools)] -#[pyfunction( - signature = ( - json_data, - /, - *, - allow_inf_nan=true, - cache_mode=StringCacheMode::All, - partial_mode=PartialMode::Off, - catch_duplicate_keys=false, - lossless_floats=false, - ) -)] -pub fn from_json<'py>( - py: Python<'py>, - json_data: &[u8], - allow_inf_nan: bool, - cache_mode: StringCacheMode, - partial_mode: PartialMode, - catch_duplicate_keys: bool, - lossless_floats: bool, -) -> PyResult> { - let parse_builder = PythonParse { - allow_inf_nan, - cache_mode, - partial_mode, - catch_duplicate_keys, - lossless_floats, - }; - parse_builder - .python_parse(py, json_data) - .map_err(|e| map_json_error(json_data, &e)) -} - pub fn get_jiter_version() -> &'static str { static JITER_VERSION: OnceLock = OnceLock::new(); @@ -52,23 +14,63 @@ pub fn get_jiter_version() -> &'static str { }) } -#[pyfunction] -pub fn cache_clear(py: Python<'_>) { - jiter::cache_clear(py); -} +#[pyo3::pymodule] +#[pyo3(name = "jiter")] +mod jiter_python { + use pyo3::prelude::*; -#[pyfunction] -pub fn cache_usage(py: Python<'_>) -> usize { - jiter::cache_usage(py) -} + use jiter::{map_json_error, LosslessFloat, PartialMode, PythonParse, StringCacheMode}; -#[pymodule] -#[pyo3(name = "jiter")] -fn jiter_python(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add("__version__", get_jiter_version())?; - m.add_function(wrap_pyfunction!(from_json, m)?)?; - m.add_function(wrap_pyfunction!(cache_clear, m)?)?; - m.add_function(wrap_pyfunction!(cache_usage, m)?)?; - m.add_class::()?; - Ok(()) + use super::get_jiter_version; + + #[allow(clippy::fn_params_excessive_bools)] + #[pyfunction( + signature = ( + json_data, + /, + *, + allow_inf_nan=true, + cache_mode=StringCacheMode::All, + partial_mode=PartialMode::Off, + catch_duplicate_keys=false, + lossless_floats=false, + ) + )] + pub fn from_json<'py>( + py: Python<'py>, + json_data: &[u8], + allow_inf_nan: bool, + cache_mode: StringCacheMode, + partial_mode: PartialMode, + catch_duplicate_keys: bool, + lossless_floats: bool, + ) -> PyResult> { + let parse_builder = PythonParse { + allow_inf_nan, + cache_mode, + partial_mode, + catch_duplicate_keys, + lossless_floats, + }; + parse_builder + .python_parse(py, json_data) + .map_err(|e| map_json_error(json_data, &e)) + } + + #[pyfunction] + pub fn cache_clear(py: Python<'_>) { + jiter::cache_clear(py); + } + + #[pyfunction] + pub fn cache_usage(py: Python<'_>) -> usize { + jiter::cache_usage(py) + } + + #[pymodule_init] + fn init_jiter_python(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("__version__", get_jiter_version())?; + m.add_class::()?; + Ok(()) + } } diff --git a/crates/jiter/Cargo.toml b/crates/jiter/Cargo.toml index 577d210..0e31aaa 100644 --- a/crates/jiter/Cargo.toml +++ b/crates/jiter/Cargo.toml @@ -16,7 +16,7 @@ num-bigint = "0.4.4" num-traits = "0.2.16" ahash = "0.8.0" smallvec = "1.11.0" -pyo3 = { version = "0.21.0", optional = true } +pyo3 = { workspace = true, optional = true } lexical-parse-float = { version = "0.8.5", features = ["format"] } bitvec = "1.0.1" @@ -28,11 +28,11 @@ bencher = "0.1.5" paste = "1.0.7" serde_json = {version = "1.0.87", features = ["preserve_order", "arbitrary_precision", "float_roundtrip"]} serde = "1.0.147" -pyo3 = { workspace = true, default-features=false, features = ["num-bigint", "auto-initialize"] } +pyo3 = { workspace = true, features = ["num-bigint", "auto-initialize"] } codspeed-bencher-compat = "2.3.1" [build-dependencies] -pyo3-build-config = { version = "0.21.0", optional = true } +pyo3-build-config = { workspace = true, optional = true } [[test]] name = "python" diff --git a/crates/jiter/README.md b/crates/jiter/README.md index 418e180..4e38415 100644 --- a/crates/jiter/README.md +++ b/crates/jiter/README.md @@ -20,19 +20,18 @@ See [the `JsonValue` docs][JsonValue] for more details. ```rust use jiter::JsonValue; -fn main() { - let json_data = r#" - { - "name": "John Doe", - "age": 43, - "phones": [ - "+44 1234567", - "+44 2345678" - ] - }"#; - let json_value = JsonValue::parse(json_data.as_bytes(), true).unwrap(); - println!("{:#?}", json_value); -} +let json_data = r#" + { + "name": "John Doe", + "age": 43, + "phones": [ + "+44 1234567", + "+44 2345678" + ] + }"#; +let json_value = JsonValue::parse(json_data.as_bytes(), true).unwrap(); +println!("{:#?}", json_value); + ``` returns: @@ -59,35 +58,33 @@ To use [Jiter], you need to know what schema you're expecting: ```rust use jiter::{Jiter, NumberInt, Peek}; -fn main() { - let json_data = r#" - { - "name": "John Doe", - "age": 43, - "phones": [ - "+44 1234567", - "+44 2345678" - ] - }"#; - let mut jiter = Jiter::new(json_data.as_bytes()).with_allow_inf_nan(); - assert_eq!(jiter.next_object().unwrap(), Some("name")); - assert_eq!(jiter.next_str().unwrap(), "John Doe"); - assert_eq!(jiter.next_key().unwrap(), Some("age")); - assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); - assert_eq!(jiter.next_key().unwrap(), Some("phones")); - assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); - // we know the next value is a string as we just asserted so - assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); - assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); - // same again - assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); - // next we'll get `None` from `array_step` as the array is finished - assert_eq!(jiter.array_step().unwrap(), None); - // and `None` from `next_key` as the object is finished - assert_eq!(jiter.next_key().unwrap(), None); - // and we check there's nothing else in the input - jiter.finish().unwrap(); -} +let json_data = r#" + { + "name": "John Doe", + "age": 43, + "phones": [ + "+44 1234567", + "+44 2345678" + ] + }"#; +let mut jiter = Jiter::new(json_data.as_bytes()).with_allow_inf_nan(); +assert_eq!(jiter.next_object().unwrap(), Some("name")); +assert_eq!(jiter.next_str().unwrap(), "John Doe"); +assert_eq!(jiter.next_key().unwrap(), Some("age")); +assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); +assert_eq!(jiter.next_key().unwrap(), Some("phones")); +assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); +// we know the next value is a string as we just asserted so +assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); +assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); +// same again +assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); +// next we'll get `None` from `array_step` as the array is finished +assert_eq!(jiter.array_step().unwrap(), None); +// and `None` from `next_key` as the object is finished +assert_eq!(jiter.next_key().unwrap(), None); +// and we check there's nothing else in the input +jiter.finish().unwrap(); ``` ## Benchmarks diff --git a/crates/jiter/benches/python.rs b/crates/jiter/benches/python.rs index ce3c6dd..acdd35b 100644 --- a/crates/jiter/benches/python.rs +++ b/crates/jiter/benches/python.rs @@ -5,7 +5,7 @@ use std::io::Read; use pyo3::Python; -use jiter::{cache_clear, PartialMode, PythonParse, StringCacheMode}; +use jiter::{cache_clear, PythonParse, StringCacheMode}; fn python_parse_numeric(bench: &mut Bencher) { Python::with_gil(|py| { diff --git a/crates/jiter/src/py_lossless_float.rs b/crates/jiter/src/py_lossless_float.rs index 39f32af..fed5c50 100644 --- a/crates/jiter/src/py_lossless_float.rs +++ b/crates/jiter/src/py_lossless_float.rs @@ -61,11 +61,6 @@ static DECIMAL_TYPE: GILOnceCell> = GILOnceCell::new(); pub fn get_decimal_type(py: Python) -> PyResult<&Bound<'_, PyType>> { DECIMAL_TYPE - .get_or_try_init(py, || { - py.import_bound("decimal")? - .getattr("Decimal")? - .extract::<&PyType>() - .map(Into::into) - }) + .get_or_try_init(py, || py.import_bound("decimal")?.getattr("Decimal")?.extract()) .map(|t| t.bind(py)) } diff --git a/crates/jiter/src/py_string_cache.rs b/crates/jiter/src/py_string_cache.rs index 9047ed4..96dcf66 100644 --- a/crates/jiter/src/py_string_cache.rs +++ b/crates/jiter/src/py_string_cache.rs @@ -186,7 +186,7 @@ impl PyStringCache { /// clear the cache by resetting all entries to `None` fn clear(&mut self) { - self.entries.fill(None); + self.entries.fill_with(|| None); } }