Skip to content

Commit

Permalink
feat: Make num-bigint optional (#130)
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterthanlime authored Sep 12, 2024
1 parent 754395c commit b09b969
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 64 deletions.
2 changes: 1 addition & 1 deletion crates/jiter-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repository = {workspace = true}

[dependencies]
pyo3 = { workspace = true, features = ["num-bigint"] }
jiter = { path = "../jiter", features = ["python"] }
jiter = { path = "../jiter", features = ["python", "num-bigint"] }

[features]
# must be enabled when building with `cargo build`, maturin enables this automatically
Expand Down
34 changes: 20 additions & 14 deletions crates/jiter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,37 @@
name = "jiter"
description = "Fast Iterable JSON parser"
readme = "../../README.md"
version = {workspace = true}
edition = {workspace = true}
authors = {workspace = true}
license = {workspace = true}
keywords = {workspace = true}
categories = {workspace = true}
homepage = {workspace = true}
repository = {workspace = true}
version = { workspace = true }
edition = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
categories = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }

[dependencies]
num-bigint = "0.4.4"
num-bigint = { version = "0.4.4", optional = true }
num-traits = "0.2.16"
ahash = "0.8.0"
smallvec = "1.11.0"
pyo3 = { workspace = true, optional = true, features = ["num-bigint"] }
lexical-parse-float = { version = "0.8.5", features = ["format"] }
pyo3 = { workspace = true, optional = true }
lexical-parse-float = { version = "0.8.5", features = ["format"] }
bitvec = "1.0.1"

[features]
default = ["num-bigint"]
python = ["dep:pyo3", "dep:pyo3-build-config"]
num-bigint = ["dep:num-bigint", "pyo3/num-bigint"]

[dev-dependencies]
bencher = "0.1.5"
paste = "1.0.7"
serde_json = {version = "1.0.87", features = ["preserve_order", "arbitrary_precision", "float_roundtrip"]}
serde_json = { version = "1.0.87", features = [
"preserve_order",
"arbitrary_precision",
"float_roundtrip",
] }
serde = "1.0.147"
pyo3 = { workspace = true, features = ["auto-initialize"] }
codspeed-bencher-compat = "2.7.1"
Expand Down Expand Up @@ -71,5 +77,5 @@ doc_markdown = "allow"
implicit_clone = "allow"
iter_without_into_iter = "allow"
return_self_not_must_use = "allow"
inline_always = "allow" # TODO remove?
match_same_arms = "allow" # TODO remove?
inline_always = "allow" # TODO remove?
match_same_arms = "allow" # TODO remove?
120 changes: 71 additions & 49 deletions crates/jiter/src/number_decoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
#[cfg(feature = "num-bigint")]
use num_traits::cast::ToPrimitive;

use std::ops::Range;

use lexical_parse_float::{format as lexical_format, FromLexicalWithOptions, Options as ParseFloatOptions};
Expand All @@ -16,13 +19,15 @@ pub trait AbstractNumberDecoder {
#[derive(Debug, Clone, PartialEq)]
pub enum NumberInt {
Int(i64),
#[cfg(feature = "num-bigint")]
BigInt(BigInt),
}

impl From<NumberInt> for f64 {
fn from(num: NumberInt) -> Self {
match num {
NumberInt::Int(int) => int as f64,
#[cfg(feature = "num-bigint")]
NumberInt::BigInt(big_int) => big_int.to_f64().unwrap_or(f64::NAN),
}
}
Expand Down Expand Up @@ -118,6 +123,7 @@ impl pyo3::ToPyObject for NumberAny {
fn to_object(&self, py: pyo3::Python<'_>) -> pyo3::PyObject {
match self {
Self::Int(NumberInt::Int(int)) => int.to_object(py),
#[cfg(feature = "num-bigint")]
Self::Int(NumberInt::BigInt(big_int)) => big_int.to_object(py),
Self::Float(float) => float.to_object(py),
}
Expand Down Expand Up @@ -220,8 +226,8 @@ impl IntParse {
index += 1;
let (chunk, new_index) = IntChunk::parse_small(data, index, first_value);

let mut big_value: BigInt = match chunk {
IntChunk::Ongoing(value) => value.into(),
let ongoing: u64 = match chunk {
IntChunk::Ongoing(value) => value,
IntChunk::Done(value) => {
let mut value_i64 = value as i64;
if !positive {
Expand All @@ -231,62 +237,76 @@ impl IntParse {
}
IntChunk::Float => return Ok((Self::Float, new_index)),
};
index = new_index;

// number is too big for i64, we need ot use a big int
loop {
let (chunk, new_index) = IntChunk::parse_big(data, index);
if (new_index - start) > 4300 {
return json_err!(NumberOutOfRange, start + 4301);
}
match chunk {
IntChunk::Ongoing(value) => {
big_value *= ONGOING_CHUNK_MULTIPLIER;
big_value += value;
index = new_index;
// number is too big for i64, we need to use a BigInt,
// or error out if num-bigint is not enabled

#[cfg(not(feature = "num-bigint"))]
{
// silence unused variable warning
let _ = (ongoing, start);
return json_err!(NumberOutOfRange, index);
}

#[cfg(feature = "num-bigint")]
{
#[cfg(target_arch = "aarch64")]
// in aarch64 we use a 128 bit registers - 16 bytes
const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
#[cfg(not(target_arch = "aarch64"))]
// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
const ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);

const POW_10: [u64; 18] = [
10u64.pow(0),
10u64.pow(1),
10u64.pow(2),
10u64.pow(3),
10u64.pow(4),
10u64.pow(5),
10u64.pow(6),
10u64.pow(7),
10u64.pow(8),
10u64.pow(9),
10u64.pow(10),
10u64.pow(11),
10u64.pow(12),
10u64.pow(13),
10u64.pow(14),
10u64.pow(15),
10u64.pow(16),
10u64.pow(17),
];

let mut big_value: BigInt = ongoing.into();
index = new_index;

loop {
let (chunk, new_index) = IntChunk::parse_big(data, index);
if (new_index - start) > 4300 {
return json_err!(NumberOutOfRange, start + 4301);
}
IntChunk::Done(value) => {
big_value *= POW_10[new_index - index];
big_value += value;
if !positive {
big_value = -big_value;
match chunk {
IntChunk::Ongoing(value) => {
big_value *= ONGOING_CHUNK_MULTIPLIER;
big_value += value;
index = new_index;
}
IntChunk::Done(value) => {
big_value *= POW_10[new_index - index];
big_value += value;
if !positive {
big_value = -big_value;
}
return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
}
return Ok((Self::Int(NumberInt::BigInt(big_value)), new_index));
IntChunk::Float => return Ok((Self::Float, new_index)),
}
IntChunk::Float => return Ok((Self::Float, new_index)),
}
}
}
}

static POW_10: [u64; 18] = [
10u64.pow(0),
10u64.pow(1),
10u64.pow(2),
10u64.pow(3),
10u64.pow(4),
10u64.pow(5),
10u64.pow(6),
10u64.pow(7),
10u64.pow(8),
10u64.pow(9),
10u64.pow(10),
10u64.pow(11),
10u64.pow(12),
10u64.pow(13),
10u64.pow(14),
10u64.pow(15),
10u64.pow(16),
10u64.pow(17),
];

#[cfg(target_arch = "aarch64")]
// in aarch64 we use a 128 bit registers - 16 bytes
static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(16);
#[cfg(not(target_arch = "aarch64"))]
// decode_int_chunk_fallback - we parse 18 bytes when the number is ongoing
static ONGOING_CHUNK_MULTIPLIER: u64 = 10u64.pow(18);

pub(crate) enum IntChunk {
Ongoing(u64),
Done(u64),
Expand Down Expand Up @@ -362,6 +382,8 @@ pub(crate) static INT_CHAR_MAP: [bool; 256] = {

pub struct NumberRange {
pub range: Range<usize>,
// in some cfg configurations, this field is never read.
#[allow(dead_code)]
pub is_int: bool,
}

Expand Down
5 changes: 5 additions & 0 deletions crates/jiter/src/value.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::borrow::Cow;
use std::sync::Arc;

#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
use smallvec::SmallVec;

Expand All @@ -16,6 +17,7 @@ pub enum JsonValue<'s> {
Null,
Bool(bool),
Int(i64),
#[cfg(feature = "num-bigint")]
BigInt(BigInt),
Float(f64),
Str(Cow<'s, str>),
Expand All @@ -34,6 +36,7 @@ impl pyo3::ToPyObject for JsonValue<'_> {
Self::Null => py.None().to_object(py),
Self::Bool(b) => b.to_object(py),
Self::Int(i) => i.to_object(py),
#[cfg(feature = "num-bigint")]
Self::BigInt(b) => b.to_object(py),
Self::Float(f) => f.to_object(py),
Self::Str(s) => s.to_object(py),
Expand Down Expand Up @@ -78,6 +81,7 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> {
JsonValue::Null => JsonValue::Null,
JsonValue::Bool(b) => JsonValue::Bool(b),
JsonValue::Int(i) => JsonValue::Int(i),
#[cfg(feature = "num-bigint")]
JsonValue::BigInt(b) => JsonValue::BigInt(b),
JsonValue::Float(f) => JsonValue::Float(f),
JsonValue::Str(s) => JsonValue::Str(s.into_owned().into()),
Expand Down Expand Up @@ -200,6 +204,7 @@ fn take_value<'j, 's>(
let n = parser.consume_number::<NumberAny>(peek.into_inner(), allow_inf_nan);
match n {
Ok(NumberAny::Int(NumberInt::Int(int))) => Ok(JsonValue::Int(int)),
#[cfg(feature = "num-bigint")]
Ok(NumberAny::Int(NumberInt::BigInt(big_int))) => Ok(JsonValue::BigInt(big_int)),
Ok(NumberAny::Float(float)) => Ok(JsonValue::Float(float)),
Err(e) => {
Expand Down

0 comments on commit b09b969

Please sign in to comment.