From ed9cfe7a7ecc88ac86ad9623a91a69cea0bc9170 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 30 Nov 2023 18:59:32 +0000 Subject: [PATCH 1/6] optimize peak for efficiency --- src/jiter.rs | 69 ++++++++++++++++++++-------------- src/parse.rs | 102 ++++++++++++++++++++++++++++++-------------------- src/python.rs | 18 +++++---- src/value.rs | 16 ++++---- tests/main.rs | 44 +++++++++++----------- 5 files changed, 143 insertions(+), 106 deletions(-) diff --git a/src/jiter.rs b/src/jiter.rs index f8fecea5..57844e01 100644 --- a/src/jiter.rs +++ b/src/jiter.rs @@ -1,4 +1,4 @@ -use crate::errors::{JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT}; +use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT}; use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange}; use crate::parse::{Parser, Peak}; use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape}; @@ -110,13 +110,15 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a number, parse it. pub fn known_number(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::Num(first) => self - .parser - .consume_number::(first, self.allow_inf_nan) - .map_err(Into::into), - _ => Err(self.wrong_type(JsonType::Int, peak)), - } + self.parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + .map_err(|e| { + if !peak.is_num() { + self.wrong_type(JsonType::Int, peak) + } else { + e.into() + } + }) } /// Assuming the next value is an integer, consume it. Error if it is not an integer, or is invalid JSON. @@ -127,13 +129,15 @@ impl<'j> Jiter<'j> { /// Knowing the next value is an integer, parse it. pub fn known_int(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::Num(first) => self - .parser - .consume_number::(first, self.allow_inf_nan) - .map_err(Into::into), - _ => Err(self.wrong_type(JsonType::Int, peak)), - } + self.parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + .map_err(|e| { + if !peak.is_num() { + self.wrong_type(JsonType::Int, peak) + } else { + e.into() + } + }) } /// Assuming the next value is a float, consume it. Error if it is not a float, or is invalid JSON. @@ -144,13 +148,15 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a float, parse it. pub fn known_float(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::Num(first) => self - .parser - .consume_number::(first, self.allow_inf_nan) - .map_err(Into::into), - _ => Err(self.wrong_type(JsonType::Int, peak)), - } + self.parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + .map_err(|e| { + if !peak.is_num() { + self.wrong_type(JsonType::Float, peak) + } else { + e.into() + } + }) } /// Assuming the next value is a number, consume it and return bytes from the original JSON data. @@ -161,12 +167,18 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a number, parse it and return bytes from the original JSON data. fn known_number_bytes(&mut self, peak: Peak) -> JiterResult<&[u8]> { - match peak { - Peak::Num(first) => { - let range = self.parser.consume_number::(first, self.allow_inf_nan)?; - Ok(&self.data[range]) + match self + .parser + .consume_number::(peak.into_inner(), self.allow_inf_nan) + { + Ok(range) => Ok(&self.data[range]), + Err(e) => { + if !peak.is_num() { + Err(self.wrong_type(JsonType::Float, peak)) + } else { + Err(e.into()) + } } - _ => Err(self.wrong_type(JsonType::Float, peak)), } } @@ -299,9 +311,10 @@ impl<'j> Jiter<'j> { Peak::True | Peak::False => JiterError::wrong_type(expected, JsonType::Bool, self.parser.index), Peak::Null => JiterError::wrong_type(expected, JsonType::Null, self.parser.index), Peak::String => JiterError::wrong_type(expected, JsonType::String, self.parser.index), - Peak::Num(first) => self.wrong_num(first, expected), Peak::Array => JiterError::wrong_type(expected, JsonType::Array, self.parser.index), Peak::Object => JiterError::wrong_type(expected, JsonType::Object, self.parser.index), + _ if peak.is_num() => self.wrong_num(peak.into_inner(), expected), + _ => json_error!(ExpectedSomeValue, self.parser.index).into(), } } diff --git a/src/parse.rs b/src/parse.rs index e1cf868f..4d94b58e 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -2,33 +2,60 @@ use crate::errors::{json_err, JsonResult, LinePosition}; use crate::number_decoder::AbstractNumberDecoder; use crate::string_decoder::{AbstractStringDecoder, Tape}; -/// Enum used to describe the next expected value in JSON. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Peak { - Null, - True, - False, - // we keep the first character of the number as we'll need it when decoding - Num(u8), - String, - Array, - Object, +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Peak(u8); + +#[allow(non_upper_case_globals)] // while testing +impl Peak { + pub const Null: Self = Self(b'n'); + pub const True: Self = Self(b't'); + pub const False: Self = Self(b'f'); + pub const Zero: Self = Self(b'0'); + pub const One: Self = Self(b'1'); + pub const Two: Self = Self(b'2'); + pub const Three: Self = Self(b'3'); + pub const Four: Self = Self(b'4'); + pub const Five: Self = Self(b'5'); + pub const Six: Self = Self(b'6'); + pub const Seven: Self = Self(b'7'); + pub const Eight: Self = Self(b'8'); + pub const Nine: Self = Self(b'9'); + pub const Minus: Self = Self(b'-'); + pub const Plus: Self = Self(b'+'); + pub const Infinity: Self = Self(b'I'); + pub const NaN: Self = Self(b'N'); + pub const String: Self = Self(b'"'); + pub const Array: Self = Self(b'['); + pub const Object: Self = Self(b'{'); } impl Peak { - fn new(next: u8) -> Option { - match next { - b'[' => Some(Self::Array), - b'{' => Some(Self::Object), - b'"' => Some(Self::String), - b't' => Some(Self::True), - b'f' => Some(Self::False), - b'n' => Some(Self::Null), - b'0'..=b'9' => Some(Self::Num(next)), - // `-` negative, `I` Infinity, `N` NaN - b'-' | b'I' | b'N' => Some(Self::Num(next)), - _ => None, - } + const fn new(next: u8) -> Self { + Self(next) + } + + pub const fn is_num(self) -> bool { + matches!( + self, + Self::Zero + | Self::One + | Self::Two + | Self::Three + | Self::Four + | Self::Five + | Self::Six + | Self::Seven + | Self::Eight + | Self::Nine + | Self::Minus + | Self::Plus + | Self::Infinity + | Self::NaN + ) + } + + pub const fn into_inner(self) -> u8 { + self.0 } } @@ -57,10 +84,7 @@ impl<'j> Parser<'j> { pub fn peak(&mut self) -> JsonResult { if let Some(next) = self.eat_whitespace() { - match Peak::new(next) { - Some(p) => Ok(p), - None => json_err!(ExpectedSomeValue, self.index), - } + Ok(Peak::new(next)) } else { json_err!(EofWhileParsingValue, self.index) } @@ -73,7 +97,7 @@ impl<'j> Parser<'j> { self.index += 1; Ok(None) } else { - self.array_peak() + Ok(Some(Peak::new(next))) } } else { json_err!(EofWhileParsingList, self.index) @@ -85,7 +109,12 @@ impl<'j> Parser<'j> { match next { b',' => { self.index += 1; - self.array_peak() + let next = self.array_peak()?; + if next.is_none() { + json_err!(TrailingComma, self.index) + } else { + Ok(next) + } } b']' => { self.index += 1; @@ -216,16 +245,9 @@ impl<'j> Parser<'j> { fn array_peak(&mut self) -> JsonResult> { if let Some(next) = self.eat_whitespace() { - match Peak::new(next) { - Some(p) => Ok(Some(p)), - None => { - // if next is a `]`, we have a "trailing comma" error - if next == b']' { - json_err!(TrailingComma, self.index) - } else { - json_err!(ExpectedSomeValue, self.index) - } - } + match next { + b']' => Ok(None), + _ => Ok(Some(Peak::new(next))), } } else { json_err!(EofWhileParsingValue, self.index) diff --git a/src/python.rs b/src/python.rs index 33914574..a35c6484 100644 --- a/src/python.rs +++ b/src/python.rs @@ -75,14 +75,6 @@ impl<'j> PythonParser<'j> { let s = self.parser.consume_string::(&mut self.tape)?; Ok(StringCache::get(py, s.as_str())) } - Peak::Num(first) => { - let n = self.parser.consume_number::(first, self.allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), - NumberAny::Float(float) => Ok(float.to_object(py)), - } - } Peak::Array => { let list = if let Some(peak_first) = self.parser.array_first()? { let mut vec: SmallVec<[PyObject; 8]> = SmallVec::with_capacity(8); @@ -125,6 +117,16 @@ impl<'j> PythonParser<'j> { } Ok(dict.to_object(py)) } + _ => { + let n = self + .parser + .consume_number::(peak.into_inner(), self.allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), + NumberAny::Float(float) => Ok(float.to_object(py)), + } + } } } diff --git a/src/value.rs b/src/value.rs index 448eed4c..833f7a85 100644 --- a/src/value.rs +++ b/src/value.rs @@ -97,14 +97,6 @@ pub(crate) fn take_value( let s = parser.consume_string::(tape)?; Ok(JsonValue::Str(s.into())) } - Peak::Num(first) => { - let n = parser.consume_number::(first, allow_inf_nan)?; - match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(JsonValue::Int(int)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(JsonValue::BigInt(big_int)), - NumberAny::Float(float) => Ok(JsonValue::Float(float)), - } - } Peak::Array => { // we could do something clever about guessing the size of the array let mut array: SmallVec<[JsonValue; 8]> = SmallVec::new(); @@ -144,5 +136,13 @@ pub(crate) fn take_value( Ok(JsonValue::Object(Arc::new(object))) } + _ => { + let n = parser.consume_number::(peak.into_inner(), allow_inf_nan)?; + match n { + NumberAny::Int(NumberInt::Int(int)) => Ok(JsonValue::Int(int)), + NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(JsonValue::BigInt(big_int)), + NumberAny::Float(float) => Ok(JsonValue::Float(float)), + } + } } } diff --git a/tests/main.rs b/tests/main.rs index 74a1a542..8a7e7046 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -36,10 +36,6 @@ fn json_vec(jiter: &mut Jiter, peak: Option) -> JiterResult> { let str = jiter.known_str()?; v.push(format!("String({str}) @ {position}")); } - Peak::Num(_) => { - let s = display_number(peak, jiter)?; - v.push(s); - } Peak::Array => { v.push(format!("[ @ {position}")); if let Some(peak) = jiter.known_array()? { @@ -66,6 +62,10 @@ fn json_vec(jiter: &mut Jiter, peak: Option) -> JiterResult> { } v.push("}".to_string()); } + _ => { + let s = display_number(peak, jiter)?; + v.push(s); + } }; Ok(v) } @@ -349,7 +349,7 @@ fn invalid_unicode_code() { fn nan_disallowed() { let json = r#"[NaN]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'N')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::NaN); let e = jiter.next_number().unwrap_err(); assert_eq!( e.error_type, @@ -363,7 +363,7 @@ fn nan_disallowed() { fn inf_disallowed() { let json = r#"[Infinity]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'I')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Infinity); let e = jiter.next_number().unwrap_err(); assert_eq!( e.error_type, @@ -377,7 +377,7 @@ fn inf_disallowed() { fn inf_neg_disallowed() { let json = r#"[-Infinity]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'-')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Minus); let e = jiter.next_number().unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); assert_eq!(e.index, 2); @@ -388,7 +388,7 @@ fn inf_neg_disallowed() { fn nan_disallowed_wrong_type() { let json = r#"[NaN]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Num(b'N')); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::NaN); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -630,9 +630,9 @@ fn jiter_object() { assert_eq!(jiter.next_object().unwrap(), Some("foo")); assert_eq!(jiter.next_str().unwrap(), "bar"); assert_eq!(jiter.next_key().unwrap(), Some("spam")); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'-'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(-2)); assert_eq!(jiter.array_step().unwrap(), Some(Peak::String)); assert_eq!(jiter.next_bytes().unwrap(), b"x"); @@ -644,11 +644,11 @@ fn jiter_object() { #[test] fn jiter_inf() { let mut jiter = Jiter::new(b"[Infinity, -Infinity, NaN]", true); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'I'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::Infinity)); assert_eq!(jiter.next_float().unwrap(), f64::INFINITY); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'-'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); assert_eq!(jiter.next_float().unwrap(), f64::NEG_INFINITY); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'N'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::NaN)); assert_eq!(jiter.next_float().unwrap().to_string(), "NaN"); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -681,20 +681,20 @@ fn jiter_bytes() { #[test] fn jiter_number() { let mut jiter = Jiter::new(br#" [1, 2.2, 3, 4.1, 5.67]"#, false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'2'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Two)); assert_eq!(jiter.next_float().unwrap(), 2.2); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'3'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Three)); let n = jiter.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(3))); let n_float: f64 = n.into(); assert_eq!(n_float, 3.0); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'4'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Four)); assert_eq!(jiter.next_number().unwrap(), NumberAny::Float(4.1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Num(b'5'))); + assert_eq!(jiter.array_step().unwrap(), Some(Peak::Five)); assert_eq!(jiter.next_number_bytes().unwrap(), b"5.67"); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -726,7 +726,7 @@ fn jiter_empty_array() { #[test] fn jiter_trailing_bracket() { let mut jiter = Jiter::new(b"[1]]", false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Num(b'1'))); + assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); assert!(jiter.array_step().unwrap().is_none()); let e = jiter.finish().unwrap_err(); @@ -914,17 +914,17 @@ fn readme_jiter() { fn jiter_clone() { let json = r#"[1, 2]"#; let mut jiter1 = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter1.next_array().unwrap().unwrap(), Peak::Num(b'1')); + assert_eq!(jiter1.next_array().unwrap().unwrap(), Peak::One); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(1))); let mut jiter2 = jiter1.clone(); - assert_eq!(jiter1.array_step().unwrap().unwrap(), Peak::Num(b'2')); + assert_eq!(jiter1.array_step().unwrap().unwrap(), Peak::Two); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(2))); - assert_eq!(jiter2.array_step().unwrap().unwrap(), Peak::Num(b'2')); + assert_eq!(jiter2.array_step().unwrap().unwrap(), Peak::Two); let n = jiter2.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(2))); From 3bf3ef70768aa782c49aad02be8eb8f83fa66e9b Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 30 Nov 2023 22:31:02 +0000 Subject: [PATCH 2/6] try to make errors match serde --- src/value.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/value.rs b/src/value.rs index 833f7a85..9c726850 100644 --- a/src/value.rs +++ b/src/value.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use num_bigint::BigInt; use smallvec::SmallVec; -use crate::errors::{JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; +use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; use crate::lazy_index_map::LazyIndexMap; use crate::number_decoder::{NumberAny, NumberInt}; use crate::parse::{Parser, Peak}; @@ -137,11 +137,18 @@ pub(crate) fn take_value( Ok(JsonValue::Object(Arc::new(object))) } _ => { - let n = parser.consume_number::(peak.into_inner(), allow_inf_nan)?; + let n = parser.consume_number::(peak.into_inner(), allow_inf_nan); match n { - NumberAny::Int(NumberInt::Int(int)) => Ok(JsonValue::Int(int)), - NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(JsonValue::BigInt(big_int)), - NumberAny::Float(float) => Ok(JsonValue::Float(float)), + Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)), + Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)), + Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)), + Err(e) => { + if !peak.is_num() { + Err(json_error!(ExpectedSomeValue, self.parser.index).into()) + } else { + Err(e.into()) + } + } } } } From 881570126ba8034fc43f886e98f5a2ee98817463 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Thu, 30 Nov 2023 22:42:46 +0000 Subject: [PATCH 3/6] Update src/value.rs --- src/value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/value.rs b/src/value.rs index 9c726850..62ccc897 100644 --- a/src/value.rs +++ b/src/value.rs @@ -144,7 +144,7 @@ pub(crate) fn take_value( Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)), Err(e) => { if !peak.is_num() { - Err(json_error!(ExpectedSomeValue, self.parser.index).into()) + Err(json_error!(ExpectedSomeValue, parser.index).into()) } else { Err(e.into()) } From ae82f74c60c7af48645f97bd9f73afea6deceae8 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 1 Dec 2023 14:23:33 +0000 Subject: [PATCH 4/6] fixup serde errors --- src/parse.rs | 29 +---------------------------- src/value.rs | 4 ++-- tests/main.rs | 20 ++++++++++---------- 3 files changed, 13 insertions(+), 40 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index 4d94b58e..518bae62 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -10,18 +10,7 @@ impl Peak { pub const Null: Self = Self(b'n'); pub const True: Self = Self(b't'); pub const False: Self = Self(b'f'); - pub const Zero: Self = Self(b'0'); - pub const One: Self = Self(b'1'); - pub const Two: Self = Self(b'2'); - pub const Three: Self = Self(b'3'); - pub const Four: Self = Self(b'4'); - pub const Five: Self = Self(b'5'); - pub const Six: Self = Self(b'6'); - pub const Seven: Self = Self(b'7'); - pub const Eight: Self = Self(b'8'); - pub const Nine: Self = Self(b'9'); pub const Minus: Self = Self(b'-'); - pub const Plus: Self = Self(b'+'); pub const Infinity: Self = Self(b'I'); pub const NaN: Self = Self(b'N'); pub const String: Self = Self(b'"'); @@ -35,23 +24,7 @@ impl Peak { } pub const fn is_num(self) -> bool { - matches!( - self, - Self::Zero - | Self::One - | Self::Two - | Self::Three - | Self::Four - | Self::Five - | Self::Six - | Self::Seven - | Self::Eight - | Self::Nine - | Self::Minus - | Self::Plus - | Self::Infinity - | Self::NaN - ) + self.0.is_ascii_digit() || matches!(self, Self::Minus | Self::Infinity | Self::NaN) } pub const fn into_inner(self) -> u8 { diff --git a/src/value.rs b/src/value.rs index 62ccc897..a051d890 100644 --- a/src/value.rs +++ b/src/value.rs @@ -144,9 +144,9 @@ pub(crate) fn take_value( Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)), Err(e) => { if !peak.is_num() { - Err(json_error!(ExpectedSomeValue, parser.index).into()) + Err(json_error!(ExpectedSomeValue, parser.index)) } else { - Err(e.into()) + Err(e) } } } diff --git a/tests/main.rs b/tests/main.rs index 8a7e7046..a4040ed8 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -630,7 +630,7 @@ fn jiter_object() { assert_eq!(jiter.next_object().unwrap(), Some("foo")); assert_eq!(jiter.next_str().unwrap(), "bar"); assert_eq!(jiter.next_key().unwrap(), Some("spam")); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); + assert_eq!(jiter.next_array().unwrap().unwrap().into_inner(), b'1'); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(-2)); @@ -681,20 +681,20 @@ fn jiter_bytes() { #[test] fn jiter_number() { let mut jiter = Jiter::new(br#" [1, 2.2, 3, 4.1, 5.67]"#, false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); + assert_eq!(jiter.next_array().unwrap().unwrap().into_inner(), b'1'); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Two)); + assert_eq!(jiter.array_step().unwrap().unwrap().into_inner(), b'2'); assert_eq!(jiter.next_float().unwrap(), 2.2); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Three)); + assert_eq!(jiter.array_step().unwrap().unwrap().into_inner(), b'3'); let n = jiter.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(3))); let n_float: f64 = n.into(); assert_eq!(n_float, 3.0); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Four)); + assert_eq!(jiter.array_step().unwrap().unwrap().into_inner(), b'4'); assert_eq!(jiter.next_number().unwrap(), NumberAny::Float(4.1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Five)); + assert_eq!(jiter.array_step().unwrap().unwrap().into_inner(), b'5'); assert_eq!(jiter.next_number_bytes().unwrap(), b"5.67"); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -726,7 +726,7 @@ fn jiter_empty_array() { #[test] fn jiter_trailing_bracket() { let mut jiter = Jiter::new(b"[1]]", false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::One)); + assert_eq!(jiter.next_array().unwrap().unwrap().into_inner(), b'1'); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); assert!(jiter.array_step().unwrap().is_none()); let e = jiter.finish().unwrap_err(); @@ -914,17 +914,17 @@ fn readme_jiter() { fn jiter_clone() { let json = r#"[1, 2]"#; let mut jiter1 = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter1.next_array().unwrap().unwrap(), Peak::One); + assert_eq!(jiter1.next_array().unwrap().unwrap().into_inner(), b'1'); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(1))); let mut jiter2 = jiter1.clone(); - assert_eq!(jiter1.array_step().unwrap().unwrap(), Peak::Two); + assert_eq!(jiter1.array_step().unwrap().unwrap().into_inner(), b'2'); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(2))); - assert_eq!(jiter2.array_step().unwrap().unwrap(), Peak::Two); + assert_eq!(jiter2.array_step().unwrap().unwrap().into_inner(), b'2'); let n = jiter2.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(2))); From 7280838b94561eba9d1f5fd2056a3209ff409356 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 4 Dec 2023 13:17:59 +0300 Subject: [PATCH 5/6] peak -> peek --- README.md | 6 +- benches/main.rs | 42 ++++++------- src/jiter.rs | 158 ++++++++++++++++++++++++------------------------ src/lib.rs | 2 +- src/parse.rs | 22 +++---- src/python.rs | 46 +++++++------- src/value.rs | 42 ++++++------- tests/main.rs | 74 +++++++++++------------ 8 files changed, 196 insertions(+), 196 deletions(-) diff --git a/README.md b/README.md index ed5143fa..33b13058 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Object( To use [Jiter], you need to know what schema you're expecting: ```rust -use jiter::{Jiter, NumberInt, Peak}; +use jiter::{Jiter, NumberInt, Peek}; fn main() { let json_data = r#" @@ -75,10 +75,10 @@ fn main() { assert_eq!(jiter.next_key().unwrap(), Some("age")); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); assert_eq!(jiter.next_key().unwrap(), Some("phones")); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::String)); + assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); // we know the next value is a string as we just asserted so assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::String)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); // same again assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); // next we'll get `None` from `array_step` as the array is finished diff --git a/benches/main.rs b/benches/main.rs index 8c67af8b..41ea2e38 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -4,7 +4,7 @@ use std::hint::black_box; use std::fs::File; use std::io::Read; -use jiter::{Jiter, JsonValue, Peak}; +use jiter::{Jiter, JsonValue, Peek}; use serde_json::Value; fn read_file(path: &str) -> String { @@ -31,11 +31,11 @@ fn jiter_iter_big(path: &str, bench: &mut Bencher) { jiter.next_array().unwrap(); loop { - if let Some(peak) = jiter.next_array().unwrap() { - let i = jiter.known_float(peak).unwrap(); + if let Some(peek) = jiter.next_array().unwrap() { + let i = jiter.known_float(peek).unwrap(); black_box(i); - while let Some(peak) = jiter.array_step().unwrap() { - let i = jiter.known_float(peak).unwrap(); + while let Some(peek) = jiter.array_step().unwrap() { + let i = jiter.known_float(peek).unwrap(); black_box(i); } } @@ -47,10 +47,10 @@ fn jiter_iter_big(path: &str, bench: &mut Bencher) { } fn find_string(jiter: &mut Jiter) -> String { - let peak = jiter.peak().unwrap(); - match peak { - Peak::String => jiter.known_str().unwrap().to_string(), - Peak::Array => { + let peek = jiter.peek().unwrap(); + match peek { + Peek::String => jiter.known_str().unwrap().to_string(), + Peek::Array => { assert!(jiter.known_array().unwrap().is_some()); let s = find_string(jiter).to_string(); assert!(jiter.array_step().unwrap().is_none()); @@ -93,11 +93,11 @@ fn jiter_iter_true_array(path: &str, bench: &mut Bencher) { let json_data = black_box(json.as_bytes()); bench.iter(|| { let mut jiter = Jiter::new(json_data, false); - let first_peak = jiter.next_array().unwrap().unwrap(); - let i = jiter.known_bool(first_peak).unwrap(); + let first_peek = jiter.next_array().unwrap().unwrap(); + let i = jiter.known_bool(first_peek).unwrap(); black_box(i); - while let Some(peak) = jiter.array_step().unwrap() { - let i = jiter.known_bool(peak).unwrap(); + while let Some(peek) = jiter.array_step().unwrap() { + let i = jiter.known_bool(peek).unwrap(); black_box(i); } }) @@ -126,11 +126,11 @@ fn jiter_iter_ints_array(path: &str, bench: &mut Bencher) { let json_data = black_box(json.as_bytes()); bench.iter(|| { let mut jiter = Jiter::new(json_data, false); - let first_peak = jiter.next_array().unwrap().unwrap(); - let i = jiter.known_int(first_peak).unwrap(); + let first_peek = jiter.next_array().unwrap().unwrap(); + let i = jiter.known_int(first_peek).unwrap(); black_box(i); - while let Some(peak) = jiter.array_step().unwrap() { - let i = jiter.known_int(peak).unwrap(); + while let Some(peek) = jiter.array_step().unwrap() { + let i = jiter.known_int(peek).unwrap(); black_box(i); } }) @@ -141,11 +141,11 @@ fn jiter_iter_floats_array(path: &str, bench: &mut Bencher) { let json_data = black_box(json.as_bytes()); bench.iter(|| { let mut jiter = Jiter::new(json_data, false); - let first_peak = jiter.next_array().unwrap().unwrap(); - let i = jiter.known_float(first_peak).unwrap(); + let first_peek = jiter.next_array().unwrap().unwrap(); + let i = jiter.known_float(first_peek).unwrap(); black_box(i); - while let Some(peak) = jiter.array_step().unwrap() { - let i = jiter.known_float(peak).unwrap(); + while let Some(peek) = jiter.array_step().unwrap() { + let i = jiter.known_float(peek).unwrap(); black_box(i); } }) diff --git a/src/jiter.rs b/src/jiter.rs index 57844e01..4dd8b6d1 100644 --- a/src/jiter.rs +++ b/src/jiter.rs @@ -1,6 +1,6 @@ use crate::errors::{json_error, JiterError, JsonType, LinePosition, DEFAULT_RECURSION_LIMIT}; use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange}; -use crate::parse::{Parser, Peak}; +use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape}; use crate::value::{take_value, JsonValue}; @@ -55,17 +55,17 @@ impl<'j> Jiter<'j> { LinePosition::find(self.data, index) } - /// Peak at the next JSON value without consuming it. - pub fn peak(&mut self) -> JiterResult { - self.parser.peak().map_err(Into::into) + /// Peek at the next JSON value without consuming it. + pub fn peek(&mut self) -> JiterResult { + self.parser.peek().map_err(Into::into) } /// Assuming the next value is `null`, consume it. Error if it is not `null`, or is invalid JSON. pub fn next_null(&mut self) -> JiterResult<()> { - let peak = self.peak()?; - match peak { - Peak::Null => self.known_null(), - _ => Err(self.wrong_type(JsonType::Null, peak)), + let peek = self.peek()?; + match peek { + Peek::Null => self.known_null(), + _ => Err(self.wrong_type(JsonType::Null, peek)), } } @@ -80,22 +80,22 @@ impl<'j> Jiter<'j> { /// # Returns /// The boolean value. pub fn next_bool(&mut self) -> JiterResult { - let peak = self.peak()?; - self.known_bool(peak) + let peek = self.peek()?; + self.known_bool(peek) } /// Knowing the next value is `true` or `false`, parse it. - pub fn known_bool(&mut self, peak: Peak) -> JiterResult { - match peak { - Peak::True => { + pub fn known_bool(&mut self, peek: Peek) -> JiterResult { + match peek { + Peek::True => { self.parser.consume_true()?; Ok(true) } - Peak::False => { + Peek::False => { self.parser.consume_false()?; Ok(false) } - _ => Err(self.wrong_type(JsonType::Bool, peak)), + _ => Err(self.wrong_type(JsonType::Bool, peek)), } } @@ -104,17 +104,17 @@ impl<'j> Jiter<'j> { /// # Returns /// A [NumberAny] representing the number. pub fn next_number(&mut self) -> JiterResult { - let peak = self.peak()?; - self.known_number(peak) + let peek = self.peek()?; + self.known_number(peek) } /// Knowing the next value is a number, parse it. - pub fn known_number(&mut self, peak: Peak) -> JiterResult { + pub fn known_number(&mut self, peek: Peek) -> JiterResult { self.parser - .consume_number::(peak.into_inner(), self.allow_inf_nan) + .consume_number::(peek.into_inner(), self.allow_inf_nan) .map_err(|e| { - if !peak.is_num() { - self.wrong_type(JsonType::Int, peak) + if !peek.is_num() { + self.wrong_type(JsonType::Int, peek) } else { e.into() } @@ -123,17 +123,17 @@ impl<'j> Jiter<'j> { /// Assuming the next value is an integer, consume it. Error if it is not an integer, or is invalid JSON. pub fn next_int(&mut self) -> JiterResult { - let peak = self.peak()?; - self.known_int(peak) + let peek = self.peek()?; + self.known_int(peek) } /// Knowing the next value is an integer, parse it. - pub fn known_int(&mut self, peak: Peak) -> JiterResult { + pub fn known_int(&mut self, peek: Peek) -> JiterResult { self.parser - .consume_number::(peak.into_inner(), self.allow_inf_nan) + .consume_number::(peek.into_inner(), self.allow_inf_nan) .map_err(|e| { - if !peak.is_num() { - self.wrong_type(JsonType::Int, peak) + if !peek.is_num() { + self.wrong_type(JsonType::Int, peek) } else { e.into() } @@ -142,17 +142,17 @@ impl<'j> Jiter<'j> { /// Assuming the next value is a float, consume it. Error if it is not a float, or is invalid JSON. pub fn next_float(&mut self) -> JiterResult { - let peak = self.peak()?; - self.known_float(peak) + let peek = self.peek()?; + self.known_float(peek) } /// Knowing the next value is a float, parse it. - pub fn known_float(&mut self, peak: Peak) -> JiterResult { + pub fn known_float(&mut self, peek: Peek) -> JiterResult { self.parser - .consume_number::(peak.into_inner(), self.allow_inf_nan) + .consume_number::(peek.into_inner(), self.allow_inf_nan) .map_err(|e| { - if !peak.is_num() { - self.wrong_type(JsonType::Float, peak) + if !peek.is_num() { + self.wrong_type(JsonType::Float, peek) } else { e.into() } @@ -161,20 +161,20 @@ impl<'j> Jiter<'j> { /// Assuming the next value is a number, consume it and return bytes from the original JSON data. pub fn next_number_bytes(&mut self) -> JiterResult<&[u8]> { - let peak = self.peak()?; - self.known_number_bytes(peak) + let peek = self.peek()?; + self.known_number_bytes(peek) } /// Knowing the next value is a number, parse it and return bytes from the original JSON data. - fn known_number_bytes(&mut self, peak: Peak) -> JiterResult<&[u8]> { + fn known_number_bytes(&mut self, peek: Peek) -> JiterResult<&[u8]> { match self .parser - .consume_number::(peak.into_inner(), self.allow_inf_nan) + .consume_number::(peek.into_inner(), self.allow_inf_nan) { Ok(range) => Ok(&self.data[range]), Err(e) => { - if !peak.is_num() { - Err(self.wrong_type(JsonType::Float, peak)) + if !peek.is_num() { + Err(self.wrong_type(JsonType::Float, peek)) } else { Err(e.into()) } @@ -184,10 +184,10 @@ impl<'j> Jiter<'j> { /// Assuming the next value is a string, consume it. Error if it is not a string, or is invalid JSON. pub fn next_str(&mut self) -> JiterResult<&str> { - let peak = self.peak()?; - match peak { - Peak::String => self.known_str(), - _ => Err(self.wrong_type(JsonType::String, peak)), + let peek = self.peek()?; + match peek { + Peek::String => self.known_str(), + _ => Err(self.wrong_type(JsonType::String, peek)), } } @@ -201,10 +201,10 @@ impl<'j> Jiter<'j> { /// Assuming the next value is a string, consume it and return bytes from the original JSON data. pub fn next_bytes(&mut self) -> JiterResult<&[u8]> { - let peak = self.peak()?; - match peak { - Peak::String => self.known_bytes(), - _ => Err(self.wrong_type(JsonType::String, peak)), + let peek = self.peek()?; + match peek { + Peek::String => self.known_bytes(), + _ => Err(self.wrong_type(JsonType::String, peek)), } } @@ -216,17 +216,17 @@ impl<'j> Jiter<'j> { /// Parse the next JSON value and return it as a [JsonValue]. Error if it is invalid JSON. pub fn next_value(&mut self) -> JiterResult { - let peak = self.peak()?; - self.known_value(peak) + let peek = self.peek()?; + self.known_value(peek) } /// Parse the next JSON value and return it as a [JsonValue]. Error if it is invalid JSON. /// /// # Arguments - /// - `peak`: The [Peak] of the next JSON value. - pub fn known_value(&mut self, peak: Peak) -> JiterResult { + /// - `peek`: The [Peek] of the next JSON value. + pub fn known_value(&mut self, peek: Peek) -> JiterResult { take_value( - peak, + peek, &mut self.parser, &mut self.tape, DEFAULT_RECURSION_LIMIT, @@ -235,26 +235,26 @@ impl<'j> Jiter<'j> { .map_err(Into::into) } - /// Assuming the next value is an array, peak at the first value. + /// Assuming the next value is an array, peek at the first value. /// Error if it is not an array, or is invalid JSON. /// /// # Returns - /// The `Some(peak)` of the first value in the array is not empty, `None` if it is empty. - pub fn next_array(&mut self) -> JiterResult> { - let peak = self.peak()?; - match peak { - Peak::Array => self.known_array(), - _ => Err(self.wrong_type(JsonType::Array, peak)), + /// The `Some(peek)` of the first value in the array is not empty, `None` if it is empty. + pub fn next_array(&mut self) -> JiterResult> { + let peek = self.peek()?; + match peek { + Peek::Array => self.known_array(), + _ => Err(self.wrong_type(JsonType::Array, peek)), } } /// Assuming the next value is an array, peat at the first value. - pub fn known_array(&mut self) -> JiterResult> { + pub fn known_array(&mut self) -> JiterResult> { self.parser.array_first().map_err(Into::into) } - /// Peak at the next value in an array. - pub fn array_step(&mut self) -> JiterResult> { + /// Peek at the next value in an array. + pub fn array_step(&mut self) -> JiterResult> { self.parser.array_step().map_err(Into::into) } @@ -264,10 +264,10 @@ impl<'j> Jiter<'j> { /// # Returns /// The `Some(key)` of the first key in the object is not empty, `None` if it is empty. pub fn next_object(&mut self) -> JiterResult> { - let peak = self.peak()?; - match peak { - Peak::Object => self.known_object(), - _ => Err(self.wrong_type(JsonType::Object, peak)), + let peek = self.peek()?; + match peek { + Peek::Object => self.known_object(), + _ => Err(self.wrong_type(JsonType::Object, peek)), } } @@ -277,15 +277,15 @@ impl<'j> Jiter<'j> { Ok(op_str.map(|s| s.as_str())) } - /// Assuming the next value is an object, peak at the first key. + /// Assuming the next value is an object, peek at the first key. pub fn next_object_bytes(&mut self) -> JiterResult> { - let peak = self.peak()?; - match peak { - Peak::Object => { + let peek = self.peek()?; + match peek { + Peek::Object => { let op_range = self.parser.object_first::(&mut self.tape)?; Ok(op_range.map(|r| &self.data[r])) } - _ => Err(self.wrong_type(JsonType::Object, peak)), + _ => Err(self.wrong_type(JsonType::Object, peek)), } } @@ -306,14 +306,14 @@ impl<'j> Jiter<'j> { self.parser.finish().map_err(Into::into) } - fn wrong_type(&self, expected: JsonType, peak: Peak) -> JiterError { - match peak { - Peak::True | Peak::False => JiterError::wrong_type(expected, JsonType::Bool, self.parser.index), - Peak::Null => JiterError::wrong_type(expected, JsonType::Null, self.parser.index), - Peak::String => JiterError::wrong_type(expected, JsonType::String, self.parser.index), - Peak::Array => JiterError::wrong_type(expected, JsonType::Array, self.parser.index), - Peak::Object => JiterError::wrong_type(expected, JsonType::Object, self.parser.index), - _ if peak.is_num() => self.wrong_num(peak.into_inner(), expected), + fn wrong_type(&self, expected: JsonType, peek: Peek) -> JiterError { + match peek { + Peek::True | Peek::False => JiterError::wrong_type(expected, JsonType::Bool, self.parser.index), + Peek::Null => JiterError::wrong_type(expected, JsonType::Null, self.parser.index), + Peek::String => JiterError::wrong_type(expected, JsonType::String, self.parser.index), + Peek::Array => JiterError::wrong_type(expected, JsonType::Array, self.parser.index), + Peek::Object => JiterError::wrong_type(expected, JsonType::Object, self.parser.index), + _ if peek.is_num() => self.wrong_num(peek.into_inner(), expected), _ => json_error!(ExpectedSomeValue, self.parser.index).into(), } } diff --git a/src/lib.rs b/src/lib.rs index 02af357c..374ac4c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ pub use errors::{JiterErrorType, JsonError, JsonErrorType, JsonResult, JsonType, pub use jiter::{Jiter, JiterResult}; pub use lazy_index_map::LazyIndexMap; pub use number_decoder::{NumberAny, NumberInt}; -pub use parse::Peak; +pub use parse::Peek; pub use value::{JsonArray, JsonObject, JsonValue}; #[cfg(feature = "python")] diff --git a/src/parse.rs b/src/parse.rs index 518bae62..c4f5b5a0 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -3,10 +3,10 @@ use crate::number_decoder::AbstractNumberDecoder; use crate::string_decoder::{AbstractStringDecoder, Tape}; #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Peak(u8); +pub struct Peek(u8); #[allow(non_upper_case_globals)] // while testing -impl Peak { +impl Peek { pub const Null: Self = Self(b'n'); pub const True: Self = Self(b't'); pub const False: Self = Self(b'f'); @@ -18,7 +18,7 @@ impl Peak { pub const Object: Self = Self(b'{'); } -impl Peak { +impl Peek { const fn new(next: u8) -> Self { Self(next) } @@ -55,34 +55,34 @@ impl<'j> Parser<'j> { LinePosition::find(self.data, self.index) } - pub fn peak(&mut self) -> JsonResult { + pub fn peek(&mut self) -> JsonResult { if let Some(next) = self.eat_whitespace() { - Ok(Peak::new(next)) + Ok(Peek::new(next)) } else { json_err!(EofWhileParsingValue, self.index) } } - pub fn array_first(&mut self) -> JsonResult> { + pub fn array_first(&mut self) -> JsonResult> { self.index += 1; if let Some(next) = self.eat_whitespace() { if next == b']' { self.index += 1; Ok(None) } else { - Ok(Some(Peak::new(next))) + Ok(Some(Peek::new(next))) } } else { json_err!(EofWhileParsingList, self.index) } } - pub fn array_step(&mut self) -> JsonResult> { + pub fn array_step(&mut self) -> JsonResult> { if let Some(next) = self.eat_whitespace() { match next { b',' => { self.index += 1; - let next = self.array_peak()?; + let next = self.array_peek()?; if next.is_none() { json_err!(TrailingComma, self.index) } else { @@ -216,11 +216,11 @@ impl<'j> Parser<'j> { Ok(()) } - fn array_peak(&mut self) -> JsonResult> { + fn array_peek(&mut self) -> JsonResult> { if let Some(next) = self.eat_whitespace() { match next { b']' => Ok(None), - _ => Ok(Some(Peak::new(next))), + _ => Ok(Some(Peek::new(next))), } } else { json_err!(EofWhileParsingValue, self.index) diff --git a/src/python.rs b/src/python.rs index a35c6484..726a0e87 100644 --- a/src/python.rs +++ b/src/python.rs @@ -11,7 +11,7 @@ use smallvec::SmallVec; use crate::errors::{json_err, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; use crate::number_decoder::{NumberAny, NumberInt}; -use crate::parse::{Parser, Peak}; +use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, Tape}; /// Parse a JSON value from a byte slice and return a Python object. @@ -34,11 +34,11 @@ pub fn python_parse(py: Python, json_data: &[u8], allow_inf_nan: bool, cache_str allow_inf_nan, }; - let peak = python_parser.parser.peak()?; + let peek = python_parser.parser.peek()?; let v = if cache_strings { - python_parser.py_take_value::(py, peak)? + python_parser.py_take_value::(py, peek)? } else { - python_parser.py_take_value::(py, peak)? + python_parser.py_take_value::(py, peek)? }; python_parser.parser.finish()?; Ok(v) @@ -57,31 +57,31 @@ struct PythonParser<'j> { } impl<'j> PythonParser<'j> { - fn py_take_value(&mut self, py: Python, peak: Peak) -> JsonResult { - match peak { - Peak::True => { + fn py_take_value(&mut self, py: Python, peek: Peek) -> JsonResult { + match peek { + Peek::True => { self.parser.consume_true()?; Ok(true.to_object(py)) } - Peak::False => { + Peek::False => { self.parser.consume_false()?; Ok(false.to_object(py)) } - Peak::Null => { + Peek::Null => { self.parser.consume_null()?; Ok(py.None()) } - Peak::String => { + Peek::String => { let s = self.parser.consume_string::(&mut self.tape)?; Ok(StringCache::get(py, s.as_str())) } - Peak::Array => { - let list = if let Some(peak_first) = self.parser.array_first()? { + Peek::Array => { + let list = if let Some(peek_first) = self.parser.array_first()? { let mut vec: SmallVec<[PyObject; 8]> = SmallVec::with_capacity(8); - let v = self._check_take_value::(py, peak_first)?; + let v = self._check_take_value::(py, peek_first)?; vec.push(v); - while let Some(peak) = self.parser.array_step()? { - let v = self._check_take_value::(py, peak)?; + while let Some(peek) = self.parser.array_step()? { + let v = self._check_take_value::(py, peek)?; vec.push(v); } PyList::new(py, vec) @@ -90,7 +90,7 @@ impl<'j> PythonParser<'j> { }; Ok(list.to_object(py)) } - Peak::Object => { + Peek::Object => { let dict = PyDict::new(py); let set_item = |key: PyObject, value: PyObject| { @@ -105,13 +105,13 @@ impl<'j> PythonParser<'j> { if let Some(first_key) = self.parser.object_first::(&mut self.tape)? { let first_key = StringCache::get(py, first_key.as_str()); - let peak = self.parser.peak()?; - let first_value = self._check_take_value::(py, peak)?; + let peek = self.parser.peek()?; + let first_value = self._check_take_value::(py, peek)?; set_item(first_key, first_value); while let Some(key) = self.parser.object_step::(&mut self.tape)? { let key = StringCache::get(py, key.as_str()); - let peak = self.parser.peak()?; - let value = self._check_take_value::(py, peak)?; + let peek = self.parser.peek()?; + let value = self._check_take_value::(py, peek)?; set_item(key, value); } } @@ -120,7 +120,7 @@ impl<'j> PythonParser<'j> { _ => { let n = self .parser - .consume_number::(peak.into_inner(), self.allow_inf_nan)?; + .consume_number::(peek.into_inner(), self.allow_inf_nan)?; match n { NumberAny::Int(NumberInt::Int(int)) => Ok(int.to_object(py)), NumberAny::Int(NumberInt::BigInt(big_int)) => Ok(big_int.to_object(py)), @@ -130,13 +130,13 @@ impl<'j> PythonParser<'j> { } } - fn _check_take_value(&mut self, py: Python, peak: Peak) -> JsonResult { + fn _check_take_value(&mut self, py: Python, peek: Peek) -> JsonResult { self.recursion_limit = match self.recursion_limit.checked_sub(1) { Some(limit) => limit, None => return json_err!(RecursionLimitExceeded, self.parser.index), }; - let r = self.py_take_value::(py, peak); + let r = self.py_take_value::(py, peek); self.recursion_limit += 1; r diff --git a/src/value.rs b/src/value.rs index a051d890..04b59a41 100644 --- a/src/value.rs +++ b/src/value.rs @@ -6,7 +6,7 @@ use smallvec::SmallVec; use crate::errors::{json_error, JsonError, JsonResult, DEFAULT_RECURSION_LIMIT}; use crate::lazy_index_map::LazyIndexMap; use crate::number_decoder::{NumberAny, NumberInt}; -use crate::parse::{Parser, Peak}; +use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, Tape}; /// Enum representing a JSON value. @@ -53,8 +53,8 @@ impl JsonValue { let mut parser = Parser::new(data); let mut tape = Tape::default(); - let peak = parser.peak()?; - let v = take_value(peak, &mut parser, &mut tape, DEFAULT_RECURSION_LIMIT, allow_inf_nan)?; + let peek = parser.peek()?; + let v = take_value(peek, &mut parser, &mut tape, DEFAULT_RECURSION_LIMIT, allow_inf_nan)?; parser.finish()?; Ok(v) } @@ -74,61 +74,61 @@ macro_rules! check_recursion { } pub(crate) fn take_value( - peak: Peak, + peek: Peek, parser: &mut Parser, tape: &mut Tape, mut recursion_limit: u8, allow_inf_nan: bool, ) -> JsonResult { - match peak { - Peak::True => { + match peek { + Peek::True => { parser.consume_true()?; Ok(JsonValue::Bool(true)) } - Peak::False => { + Peek::False => { parser.consume_false()?; Ok(JsonValue::Bool(false)) } - Peak::Null => { + Peek::Null => { parser.consume_null()?; Ok(JsonValue::Null) } - Peak::String => { + Peek::String => { let s = parser.consume_string::(tape)?; Ok(JsonValue::Str(s.into())) } - Peak::Array => { + Peek::Array => { // we could do something clever about guessing the size of the array let mut array: SmallVec<[JsonValue; 8]> = SmallVec::new(); - if let Some(peak_first) = parser.array_first()? { + if let Some(peek_first) = parser.array_first()? { check_recursion!(recursion_limit, parser.index, - let v = take_value(peak_first, parser, tape, recursion_limit, allow_inf_nan)?; + let v = take_value(peek_first, parser, tape, recursion_limit, allow_inf_nan)?; ); array.push(v); - while let Some(peak) = parser.array_step()? { + while let Some(peek) = parser.array_step()? { check_recursion!(recursion_limit, parser.index, - let v = take_value(peak, parser, tape, recursion_limit, allow_inf_nan)?; + let v = take_value(peek, parser, tape, recursion_limit, allow_inf_nan)?; ); array.push(v); } } Ok(JsonValue::Array(Arc::new(array))) } - Peak::Object => { + Peek::Object => { // same for objects let mut object: LazyIndexMap = LazyIndexMap::new(); if let Some(first_key) = parser.object_first::(tape)? { let first_key = first_key.into(); - let peak = parser.peak()?; + let peek = parser.peek()?; check_recursion!(recursion_limit, parser.index, - let first_value = take_value(peak, parser, tape, recursion_limit, allow_inf_nan)?; + let first_value = take_value(peek, parser, tape, recursion_limit, allow_inf_nan)?; ); object.insert(first_key, first_value); while let Some(key) = parser.object_step::(tape)? { let key = key.into(); - let peak = parser.peak()?; + let peek = parser.peek()?; check_recursion!(recursion_limit, parser.index, - let value = take_value(peak, parser, tape, recursion_limit, allow_inf_nan)?; + let value = take_value(peek, parser, tape, recursion_limit, allow_inf_nan)?; ); object.insert(key, value); } @@ -137,13 +137,13 @@ pub(crate) fn take_value( Ok(JsonValue::Object(Arc::new(object))) } _ => { - let n = parser.consume_number::(peak.into_inner(), allow_inf_nan); + let n = parser.consume_number::(peek.into_inner(), allow_inf_nan); match n { Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)), Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)), Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)), Err(e) => { - if !peak.is_num() { + if !peek.is_num() { Err(json_error!(ExpectedSomeValue, parser.index)) } else { Err(e) diff --git a/tests/main.rs b/tests/main.rs index a4040ed8..5f430bae 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -8,47 +8,47 @@ use smallvec::smallvec; use jiter::{ Jiter, JiterErrorType, JiterResult, JsonErrorType, JsonType, JsonValue, LazyIndexMap, LinePosition, NumberAny, - NumberInt, Peak, + NumberInt, Peek, }; -fn json_vec(jiter: &mut Jiter, peak: Option) -> JiterResult> { +fn json_vec(jiter: &mut Jiter, peek: Option) -> JiterResult> { let mut v = Vec::new(); - let peak = match peak { - Some(peak) => peak, - None => jiter.peak()?, + let peek = match peek { + Some(peek) => peek, + None => jiter.peek()?, }; let position = jiter.current_position().short(); - match peak { - Peak::True => { - jiter.known_bool(peak)?; + match peek { + Peek::True => { + jiter.known_bool(peek)?; v.push(format!("true @ {position}")); } - Peak::False => { - jiter.known_bool(peak)?; + Peek::False => { + jiter.known_bool(peek)?; v.push(format!("false @ {position}")); } - Peak::Null => { + Peek::Null => { jiter.known_null()?; v.push(format!("null @ {position}")); } - Peak::String => { + Peek::String => { let str = jiter.known_str()?; v.push(format!("String({str}) @ {position}")); } - Peak::Array => { + Peek::Array => { v.push(format!("[ @ {position}")); - if let Some(peak) = jiter.known_array()? { - let el_vec = json_vec(jiter, Some(peak))?; + if let Some(peek) = jiter.known_array()? { + let el_vec = json_vec(jiter, Some(peek))?; v.extend(el_vec); - while let Some(peak) = jiter.array_step()? { - let el_vec = json_vec(jiter, Some(peak))?; + while let Some(peek) = jiter.array_step()? { + let el_vec = json_vec(jiter, Some(peek))?; v.extend(el_vec); } } v.push("]".to_string()); } - Peak::Object => { + Peek::Object => { v.push(format!("{{ @ {position}")); if let Some(key) = jiter.known_object()? { v.push(format!("Key({key})")); @@ -63,16 +63,16 @@ fn json_vec(jiter: &mut Jiter, peak: Option) -> JiterResult> { v.push("}".to_string()); } _ => { - let s = display_number(peak, jiter)?; + let s = display_number(peek, jiter)?; v.push(s); } }; Ok(v) } -fn display_number(peak: Peak, jiter: &mut Jiter) -> JiterResult { +fn display_number(peek: Peek, jiter: &mut Jiter) -> JiterResult { let position = jiter.current_position().short(); - let number = jiter.known_number(peak)?; + let number = jiter.known_number(peek)?; let s = match number { NumberAny::Int(NumberInt::Int(int)) => { format!("Int({int}) @ {position}") @@ -255,8 +255,8 @@ fn json_parse_str() { let json = r#" "foobar" "#; let data = json.as_bytes(); let mut jiter = Jiter::new(data, false); - let peak = jiter.peak().unwrap(); - assert_eq!(peak, Peak::String); + let peek = jiter.peek().unwrap(); + assert_eq!(peek, Peek::String); assert_eq!(jiter.current_position(), LinePosition::new(1, 2)); let result_string = jiter.known_str().unwrap(); @@ -349,7 +349,7 @@ fn invalid_unicode_code() { fn nan_disallowed() { let json = r#"[NaN]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::NaN); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::NaN); let e = jiter.next_number().unwrap_err(); assert_eq!( e.error_type, @@ -363,7 +363,7 @@ fn nan_disallowed() { fn inf_disallowed() { let json = r#"[Infinity]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Infinity); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::Infinity); let e = jiter.next_number().unwrap_err(); assert_eq!( e.error_type, @@ -377,7 +377,7 @@ fn inf_disallowed() { fn inf_neg_disallowed() { let json = r#"[-Infinity]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::Minus); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::Minus); let e = jiter.next_number().unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); assert_eq!(e.index, 2); @@ -388,7 +388,7 @@ fn inf_neg_disallowed() { fn nan_disallowed_wrong_type() { let json = r#"[NaN]"#; let mut jiter = Jiter::new(json.as_bytes(), false); - assert_eq!(jiter.next_array().unwrap().unwrap(), Peak::NaN); + assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::NaN); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -632,9 +632,9 @@ fn jiter_object() { assert_eq!(jiter.next_key().unwrap(), Some("spam")); assert_eq!(jiter.next_array().unwrap().unwrap().into_inner(), b'1'); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::Minus)); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(-2)); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::String)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); assert_eq!(jiter.next_bytes().unwrap(), b"x"); assert!(jiter.array_step().unwrap().is_none()); assert_eq!(jiter.next_key().unwrap(), None); @@ -644,11 +644,11 @@ fn jiter_object() { #[test] fn jiter_inf() { let mut jiter = Jiter::new(b"[Infinity, -Infinity, NaN]", true); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::Infinity)); + assert_eq!(jiter.next_array().unwrap(), Some(Peek::Infinity)); assert_eq!(jiter.next_float().unwrap(), f64::INFINITY); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Minus)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::Minus)); assert_eq!(jiter.next_float().unwrap(), f64::NEG_INFINITY); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::NaN)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::NaN)); assert_eq!(jiter.next_float().unwrap().to_string(), "NaN"); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -657,11 +657,11 @@ fn jiter_inf() { #[test] fn jiter_bool() { let mut jiter = Jiter::new(b"[true, false, null]", false); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::True)); + assert_eq!(jiter.next_array().unwrap(), Some(Peek::True)); assert_eq!(jiter.next_bool().unwrap(), true); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::False)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::False)); assert_eq!(jiter.next_bool().unwrap(), false); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::Null)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::Null)); jiter.next_null().unwrap(); assert_eq!(jiter.array_step().unwrap(), None); jiter.finish().unwrap(); @@ -896,10 +896,10 @@ fn readme_jiter() { assert_eq!(jiter.next_key().unwrap(), Some("age")); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(43)); assert_eq!(jiter.next_key().unwrap(), Some("phones")); - assert_eq!(jiter.next_array().unwrap(), Some(Peak::String)); + assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); // we know the next value is a string as we just asserted so assert_eq!(jiter.known_str().unwrap(), "+44 1234567"); - assert_eq!(jiter.array_step().unwrap(), Some(Peak::String)); + assert_eq!(jiter.array_step().unwrap(), Some(Peek::String)); // same again assert_eq!(jiter.known_str().unwrap(), "+44 2345678"); // next we'll get `None` from `array_step` as the array is finished From 530ce062ec209c99445231c9f107a3985143d22c Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 4 Dec 2023 13:31:54 +0300 Subject: [PATCH 6/6] test coverage --- src/jiter.rs | 39 ++++++++------------- tests/main.rs | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 24 deletions(-) diff --git a/src/jiter.rs b/src/jiter.rs index 4dd8b6d1..c89164a9 100644 --- a/src/jiter.rs +++ b/src/jiter.rs @@ -3,6 +3,7 @@ use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange}; use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape}; use crate::value::{take_value, JsonValue}; +use crate::{JsonError, JsonErrorType}; pub type JiterResult = Result; @@ -112,13 +113,7 @@ impl<'j> Jiter<'j> { pub fn known_number(&mut self, peek: Peek) -> JiterResult { self.parser .consume_number::(peek.into_inner(), self.allow_inf_nan) - .map_err(|e| { - if !peek.is_num() { - self.wrong_type(JsonType::Int, peek) - } else { - e.into() - } - }) + .map_err(|e| self.maybe_number_error(e, JsonType::Int, peek)) } /// Assuming the next value is an integer, consume it. Error if it is not an integer, or is invalid JSON. @@ -132,10 +127,10 @@ impl<'j> Jiter<'j> { self.parser .consume_number::(peek.into_inner(), self.allow_inf_nan) .map_err(|e| { - if !peek.is_num() { - self.wrong_type(JsonType::Int, peek) + if e.error_type == JsonErrorType::FloatExpectingInt { + JiterError::wrong_type(JsonType::Int, JsonType::Float, self.parser.index) } else { - e.into() + self.maybe_number_error(e, JsonType::Int, peek) } }) } @@ -150,13 +145,7 @@ impl<'j> Jiter<'j> { pub fn known_float(&mut self, peek: Peek) -> JiterResult { self.parser .consume_number::(peek.into_inner(), self.allow_inf_nan) - .map_err(|e| { - if !peek.is_num() { - self.wrong_type(JsonType::Float, peek) - } else { - e.into() - } - }) + .map_err(|e| self.maybe_number_error(e, JsonType::Float, peek)) } /// Assuming the next value is a number, consume it and return bytes from the original JSON data. @@ -172,13 +161,7 @@ impl<'j> Jiter<'j> { .consume_number::(peek.into_inner(), self.allow_inf_nan) { Ok(range) => Ok(&self.data[range]), - Err(e) => { - if !peek.is_num() { - Err(self.wrong_type(JsonType::Float, peek)) - } else { - Err(e.into()) - } - } + Err(e) => Err(self.maybe_number_error(e, JsonType::Float, peek)), } } @@ -327,4 +310,12 @@ impl<'j> Jiter<'j> { }; JiterError::wrong_type(expected, actual, self.parser.index) } + + fn maybe_number_error(&self, e: JsonError, expected: JsonType, peek: Peek) -> JiterError { + if peek.is_num() { + e.into() + } else { + self.wrong_type(expected, peek) + } + } } diff --git a/tests/main.rs b/tests/main.rs index 5f430bae..88128d6a 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -934,3 +934,100 @@ fn jiter_clone() { jiter1.finish().unwrap(); jiter2.finish().unwrap(); } + +#[test] +fn jiter_invalid_value() { + let mut jiter = Jiter::new(b" bar", false); + let e = jiter.next_value().unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::JsonError(JsonErrorType::ExpectedSomeValue) + ); + assert_eq!(e.index, 1); + assert_eq!(jiter.error_position(e.index), LinePosition::new(1, 2)); +} + +#[test] +fn jiter_wrong_types() { + macro_rules! expect_wrong_type_inner { + ($actual:path, $input:expr, $method: ident, $expected:path) => { + let mut jiter = Jiter::new($input, false); + let result = jiter.$method(); + if $actual == $expected || matches!(($actual, $expected), (JsonType::Int, JsonType::Float)) { + // Type matches, or int input to float + assert!(result.is_ok()); + } else { + let e = result.unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::WrongType { + expected: $expected, + actual: $actual, + } + ); + } + }; + } + + macro_rules! expect_wrong_type { + ($method:ident, $expected:path) => { + expect_wrong_type_inner!(JsonType::Array, b"[]", $method, $expected); + expect_wrong_type_inner!(JsonType::Bool, b"true", $method, $expected); + expect_wrong_type_inner!(JsonType::Int, b"123", $method, $expected); + expect_wrong_type_inner!(JsonType::Float, b"123.123", $method, $expected); + expect_wrong_type_inner!(JsonType::Null, b"null", $method, $expected); + expect_wrong_type_inner!(JsonType::Object, b"{}", $method, $expected); + expect_wrong_type_inner!(JsonType::String, b"\"hello\"", $method, $expected); + }; + } + + expect_wrong_type!(next_array, JsonType::Array); + expect_wrong_type!(next_bool, JsonType::Bool); + expect_wrong_type!(next_bytes, JsonType::String); + expect_wrong_type!(next_null, JsonType::Null); + expect_wrong_type!(next_object, JsonType::Object); + expect_wrong_type!(next_object_bytes, JsonType::Object); + expect_wrong_type!(next_str, JsonType::String); + expect_wrong_type!(next_int, JsonType::Int); + expect_wrong_type!(next_float, JsonType::Float); +} + +#[test] +fn jiter_invalid_numbers() { + let mut jiter = Jiter::new(b" -a", false); + let peek = jiter.peek().unwrap(); + let e = jiter.known_int(peek).unwrap_err(); + assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); + let e = jiter.known_float(peek).unwrap_err(); + assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); + let e = jiter.known_number(peek).unwrap_err(); + assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); + let e = jiter.next_number_bytes().unwrap_err(); + assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); +} + +#[test] +fn jiter_invalid_numbers_expected_some_value() { + let mut jiter = Jiter::new(b" bar", false); + let peek = jiter.peek().unwrap(); + let e = jiter.known_int(peek).unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::JsonError(JsonErrorType::ExpectedSomeValue) + ); + let e = jiter.known_float(peek).unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::JsonError(JsonErrorType::ExpectedSomeValue) + ); + let e = jiter.known_number(peek).unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::JsonError(JsonErrorType::ExpectedSomeValue) + ); + let e = jiter.next_number_bytes().unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::JsonError(JsonErrorType::ExpectedSomeValue) + ); +}