From 6b25e3277cbd1085fb66bd2c2624c194ddf3c81f Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Sat, 26 Oct 2024 18:05:26 +0200 Subject: [PATCH] perf(python): Faster building of error messages Signed-off-by: Dmitry Dygalo --- crates/jsonschema-py/CHANGELOG.md | 5 +++-- crates/jsonschema-py/src/lib.rs | 33 ++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/crates/jsonschema-py/CHANGELOG.md b/crates/jsonschema-py/CHANGELOG.md index 20a980be..c4f978f1 100644 --- a/crates/jsonschema-py/CHANGELOG.md +++ b/crates/jsonschema-py/CHANGELOG.md @@ -4,14 +4,15 @@ ### Performance -- Speedup Python -> Rust data serialization +- Speedup Python -> Rust data serialization. +- Speedup building error messages. ## [0.26.0] - 2024-10-26 ### Performance - Optimize error formatting in some cases. -- Speedup Python -> Rust data serialization +- Speedup Python -> Rust data serialization. - Significant improvement for the `validate` function. ## [0.25.1] - 2024-10-25 diff --git a/crates/jsonschema-py/src/lib.rs b/crates/jsonschema-py/src/lib.rs index a2339164..e5e8ed91 100644 --- a/crates/jsonschema-py/src/lib.rs +++ b/crates/jsonschema-py/src/lib.rs @@ -1,6 +1,7 @@ use std::{ any::Any, cell::RefCell, + io::Write, panic::{self, AssertUnwindSafe}, }; @@ -80,7 +81,7 @@ impl ValidationErrorIter { fn into_py_err(py: Python<'_>, error: jsonschema::ValidationError<'_>) -> PyResult { let pyerror_type = PyType::new_bound::(py); let message = error.to_string(); - let verbose_message = to_error_message(&error); + let verbose_message = to_error_message(&error, message.clone()); let into_path = |segment: &str| { if let Ok(idx) = segment.parse::() { idx.into_py(py) @@ -124,8 +125,7 @@ fn get_draft(draft: u8) -> PyResult { DRAFT201909 => Ok(Draft::Draft201909), DRAFT202012 => Ok(Draft::Draft202012), _ => Err(exceptions::PyValueError::new_err(format!( - "Unknown draft: {}", - draft + "Unknown draft: {draft}" ))), } } @@ -216,8 +216,11 @@ fn raise_on_error( error.map_or_else(|| Ok(()), |err| Err(into_py_err(py, err)?)) } -fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String { - let mut message = error.to_string(); +fn is_ascii_number(s: &str) -> bool { + !s.is_empty() && s.as_bytes().iter().all(|&b| b.is_ascii_digit()) +} + +fn to_error_message(error: &jsonschema::ValidationError<'_>, mut message: String) -> String { // It roughly doubles message.reserve(message.len()); message.push('\n'); @@ -225,7 +228,7 @@ fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String { message.push_str("Failed validating"); let push_segment = |m: &mut String, segment: &str| { - if segment.parse::().is_ok() { + if is_ascii_number(segment) { m.push_str(segment); } else { m.push('"'); @@ -257,10 +260,26 @@ fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String { } message.push(':'); message.push_str("\n "); - message.push_str(&error.instance.to_string()); + let mut writer = StringWriter(&mut message); + serde_json::to_writer(&mut writer, &error.instance).expect("Failed to serialize JSON"); message } +struct StringWriter<'a>(&'a mut String); + +impl<'a> Write for StringWriter<'a> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + // SAFETY: `serde_json` always produces valid UTF-8 + self.0 + .push_str(unsafe { std::str::from_utf8_unchecked(buf) }); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + /// is_valid(schema, instance, draft=None, formats=None, validate_formats=None, ignore_unknown_formats=True) /// /// A shortcut for validating the input instance against the schema.