Skip to content

Commit

Permalink
support bigint
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed Sep 17, 2024
1 parent a297310 commit e3a95a8
Show file tree
Hide file tree
Showing 12 changed files with 107 additions and 88 deletions.
8 changes: 5 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ debug = true
[workspace.dependencies]
jiter = { path = "crates/jiter", version = "0.5.0" }
batson = { path = "crates/batson", version = "0.5.0" }
pyo3 = { version = "0.22.0" }
pyo3-build-config = { version = "0.22.0" }
bencher = "0.1.5"
paste = "1.0.7"
codspeed-bencher-compat = "2.7.1"
num-bigint = "0.4.4"
num-traits = "0.2.16"
paste = "1.0.7"
pyo3 = { version = "0.22.0" }
pyo3-build-config = { version = "0.22.0" }
smallvec = "2.0.0-alpha.7"
serde = "1.0.210"
serde_json = "1.0.128"
2 changes: 1 addition & 1 deletion crates/batson/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repository = {workspace = true}
[dependencies]
bytemuck = { version = "1.17.1", features = ["aarch64_simd", "derive", "align_offset"] }
jiter = { workspace = true }
num = "0.4.3"
num-bigint = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
simdutf8 = { version = "0.1.4", features = ["aarch64_neon"] }
Expand Down
12 changes: 6 additions & 6 deletions crates/batson/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl<'b> HetArray<'b> {
}
}

pub fn to_json(&self, d: &mut Decoder<'b>) -> DecodeResult<JsonArray<'b>> {
pub fn to_value(&self, d: &mut Decoder<'b>) -> DecodeResult<JsonArray<'b>> {
(0..self.len())
.map(|_| d.take_value())
.collect::<DecodeResult<SmallVec<_, 8>>>()
Expand Down Expand Up @@ -98,7 +98,7 @@ pub(crate) fn header_array_to_json<'b>(d: &mut Decoder<'b>, length: Length) -> D
let length = length.decode(d)?;
d.take_slice(length)?
.iter()
.map(|b| Header::decode(*b, d).map(|h| h.as_value(d)))
.map(|b| Header::decode(*b, d).map(|h| h.header_as_value(d)))
.collect::<DecodeResult<_>>()
.map(Arc::new)
}
Expand Down Expand Up @@ -296,7 +296,7 @@ impl PackedArray {
}
}
}
JsonValue::BigInt(b) => todo!("BigInt {b:?}"),
JsonValue::BigInt(_) => return None,
JsonValue::Float(f) => {
u8_only = None;
i64_only = None;
Expand Down Expand Up @@ -373,7 +373,7 @@ mod test {
};

assert_eq!(offsets, &[0, 1, 3]);
let decode_array = het_array.to_json(&mut decoder).unwrap();
let decode_array = het_array.to_value(&mut decoder).unwrap();
assert_arrays_eq!(decode_array, array);
}

Expand All @@ -391,7 +391,7 @@ mod test {

let het_array = HetArray::decode_header(&mut decoder, 0.into()).unwrap();
assert_eq!(het_array.len(), 0);
let decode_array = het_array.to_json(&mut decoder).unwrap();
let decode_array = het_array.to_value(&mut decoder).unwrap();
assert_arrays_eq!(decode_array, array);
}

Expand Down Expand Up @@ -500,7 +500,7 @@ mod test {
let mut d = decoder.clone();
assert!(!het_array.get(&mut d, 200));

let decode_array = het_array.to_json(&mut decoder).unwrap();
let decode_array = het_array.to_value(&mut decoder).unwrap();
assert_arrays_eq!(decode_array, array);
}

Expand Down
55 changes: 25 additions & 30 deletions crates/batson/src/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use jiter::JsonValue;
use num_bigint::{BigInt, Sign};
use std::fmt;
use std::mem::{align_of, size_of};

use jiter::JsonValue;

use crate::array::{
header_array_to_json, header_array_write_to_json, i64_array_slice, i64_array_to_json, u8_array_slice,
u8_array_to_json, HetArray,
Expand Down Expand Up @@ -56,16 +56,16 @@ impl<'b> Decoder<'b> {
Header::Null => Ok(JsonValue::Null),
Header::Bool(b) => Ok(JsonValue::Bool(b)),
Header::Int(n) => n.decode_i64(self).map(JsonValue::Int),
Header::IntBig(i) => todo!("decoding for bigint {i:?}"),
Header::IntBig(s, l) => self.take_big_int(s, l).map(JsonValue::BigInt),
Header::Float(n) => n.decode_f64(self).map(JsonValue::Float),
Header::Str(l) => self.decode_str(l).map(|s| JsonValue::Str(s.into())),
Header::Str(l) => self.take_str_len(l).map(|s| JsonValue::Str(s.into())),
Header::Object(length) => {
let obj = Object::decode_header(self, length)?;
obj.to_json(self).map(JsonValue::Object)
obj.to_value(self).map(JsonValue::Object)
}
Header::HetArray(length) => {
let het = HetArray::decode_header(self, length)?;
het.to_json(self).map(JsonValue::Array)
het.to_value(self).map(JsonValue::Array)
}
Header::U8Array(length) => u8_array_to_json(self, length).map(JsonValue::Array),
Header::HeaderArray(length) => header_array_to_json(self, length).map(JsonValue::Array),
Expand All @@ -81,13 +81,16 @@ impl<'b> Decoder<'b> {
let i = n.decode_i64(self)?;
writer.write_value(i)?;
}
Header::IntBig(i) => todo!("decoding for bigint {i:?}"),
Header::IntBig(s, l) => {
let int = self.take_big_int(s, l)?;
writer.write_value(int)?;
}
Header::Float(n) => {
let f = n.decode_f64(self)?;
writer.write_value(f)?;
}
Header::Str(l) => {
let s = self.decode_str(l)?;
let s = self.take_str_len(l)?;
writer.write_value(s)?;
}
Header::Object(length) => {
Expand Down Expand Up @@ -130,32 +133,23 @@ impl<'b> Decoder<'b> {
Ok(t)
}

pub fn decode_str(&mut self, length: Length) -> DecodeResult<&'b str> {
fn take_str_len(&mut self, length: Length) -> DecodeResult<&'b str> {
let len = length.decode(self)?;
if len == 0 {
Ok("")
} else {
self.take_str(len)
}
self.take_str(len)
}

pub fn decode_bytes(&mut self, length: Length) -> DecodeResult<&'b [u8]> {
let len = length.decode(self)?;
if len == 0 {
Ok(b"")
pub fn take_str(&mut self, length: usize) -> DecodeResult<&'b str> {
if length == 0 {
Ok("")
} else {
self.take_slice(len)
let end = self.index + length;
let slice = self.bytes.get(self.index..end).ok_or_else(|| self.eof())?;
let s = simdutf8::basic::from_utf8(slice).map_err(|e| DecodeError::from_utf8_error(self.index, e))?;
self.index = end;
Ok(s)
}
}

pub fn take_str(&mut self, length: usize) -> DecodeResult<&'b str> {
let end = self.index + length;
let slice = self.bytes.get(self.index..end).ok_or_else(|| self.eof())?;
let s = simdutf8::basic::from_utf8(slice).map_err(|e| DecodeError::from_utf8_error(self.index, e))?;
self.index = end;
Ok(s)
}

pub fn take_u8(&mut self) -> DecodeResult<u8> {
self.next().ok_or_else(|| self.eof())
}
Expand Down Expand Up @@ -187,9 +181,10 @@ impl<'b> Decoder<'b> {
Ok(i64::from_le_bytes(slice.try_into().unwrap()))
}

pub fn take_f32(&mut self) -> DecodeResult<f32> {
let slice = self.take_slice(4)?;
Ok(f32::from_le_bytes(slice.try_into().unwrap()))
pub fn take_big_int(&mut self, sign: Sign, length: Length) -> DecodeResult<BigInt> {
let size = length.decode(self)?;
let slice = self.take_slice(size)?;
Ok(BigInt::from_bytes_le(sign, slice))
}

pub fn take_f64(&mut self) -> DecodeResult<f64> {
Expand Down
22 changes: 13 additions & 9 deletions crates/batson/src/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::mem::align_of;

use jiter::{JsonArray, JsonObject, JsonValue};
use num_bigint::{BigInt, Sign};
use std::mem::align_of;

use crate::array::encode_array;
use crate::errors::{EncodeError, EncodeResult};
Expand Down Expand Up @@ -35,7 +35,7 @@ impl Encoder {
JsonValue::Null => self.encode_null(),
JsonValue::Bool(b) => self.encode_bool(*b),
JsonValue::Int(int) => self.encode_i64(*int),
JsonValue::BigInt(_) => todo!("encoding BigInt"),
JsonValue::BigInt(big_int) => self.encode_big_int(big_int)?,
JsonValue::Float(f) => self.encode_f64(*f),
JsonValue::Str(s) => self.encode_str(s.as_ref())?,
JsonValue::Array(array) => self.encode_array(array)?,
Expand Down Expand Up @@ -99,15 +99,19 @@ impl Encoder {
}
}

pub fn encode_str(&mut self, s: &str) -> EncodeResult<()> {
self.encode_length(Category::Str, s.len())?;
self.extend(s.as_bytes());
pub fn encode_big_int(&mut self, int: &BigInt) -> EncodeResult<()> {
let (sign, bytes) = int.to_bytes_le();
match sign {
Sign::Minus => self.encode_length(Category::BigIntNeg, bytes.len())?,
_ => self.encode_length(Category::BigIntPos, bytes.len())?,
}
self.extend(&bytes);
Ok(())
}

pub fn encode_bytes(&mut self, b: &[u8]) -> EncodeResult<()> {
self.encode_length(Category::U8Array, b.len())?;
self.extend(b);
pub fn encode_str(&mut self, s: &str) -> EncodeResult<()> {
self.encode_length(Category::Str, s.len())?;
self.extend(s.as_bytes());
Ok(())
}

Expand Down
44 changes: 24 additions & 20 deletions crates/batson/src/header.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
use std::sync::Arc;

use jiter::{JsonValue, LazyIndexMap};
use num_bigint::Sign;
use smallvec::smallvec;

use crate::decoder::Decoder;
use crate::errors::{DecodeErrorType, DecodeResult};
use crate::json_writer::JsonWriter;
use crate::ToJsonResult;
use jiter::{JsonValue, LazyIndexMap};
use smallvec::smallvec;
use std::sync::Arc;

#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum Header {
Null,
Bool(bool),
Int(NumberHint),
IntBig(Length),
IntBig(Sign, Length),
Float(NumberHint),
Str(Length),
Object(Length),
Expand All @@ -31,7 +34,8 @@ impl Header {
match cat {
Category::Primitive => Primitive::from_u8(right, d).map(Primitive::header_value),
Category::Int => NumberHint::from_u8(right, d).map(Self::Int),
Category::BigInt => Length::from_u8(right, d).map(Self::IntBig),
Category::BigIntPos => Length::from_u8(right, d).map(|l| Self::IntBig(Sign::Plus, l)),
Category::BigIntNeg => Length::from_u8(right, d).map(|l| Self::IntBig(Sign::Minus, l)),
Category::Float => NumberHint::from_u8(right, d).map(Self::Float),
Category::Str => Length::from_u8(right, d).map(Self::Str),
Category::Object => Length::from_u8(right, d).map(Self::Object),
Expand All @@ -43,12 +47,12 @@ impl Header {
}

/// TODO `'static` should be okay as return lifetime, I don't know why it's not
pub fn as_value<'b>(self, _: &Decoder<'b>) -> JsonValue<'b> {
pub fn header_as_value<'b>(self, _: &Decoder<'b>) -> JsonValue<'b> {
match self {
Header::Null => JsonValue::Null,
Header::Bool(b) => JsonValue::Bool(b),
Header::Int(n) => JsonValue::Int(n.decode_i64_header()),
Header::IntBig(_) => todo!(),
Header::IntBig(..) => unreachable!("Big ints are not supported as header only values"),
Header::Float(n) => JsonValue::Float(n.decode_f64_header()),
Header::Str(_) => JsonValue::Str("".into()),
Header::Object(_) => JsonValue::Object(Arc::new(LazyIndexMap::default())),
Expand All @@ -61,7 +65,7 @@ impl Header {
Header::Null => writer.write_null(),
Header::Bool(b) => writer.write_value(b)?,
Header::Int(n) => writer.write_value(n.decode_i64_header())?,
Header::IntBig(_) => todo!(),
Header::IntBig(..) => return Err("Big ints are not supported as header only values".into()),
Header::Float(n) => writer.write_value(n.decode_f64_header())?,
// TODO check the
Header::Str(len) => {
Expand Down Expand Up @@ -111,16 +115,17 @@ macro_rules! impl_from_u8 {
pub(crate) enum Category {
Primitive = 0,
Int = 1,
BigInt = 2,
Float = 3,
Str = 4,
Object = 5,
HeaderArray = 6,
U8Array = 7,
I64Array = 8,
HetArray = 9,
BigIntPos = 2,
BigIntNeg = 3,
Float = 4,
Str = 5,
Object = 6,
HeaderArray = 7,
U8Array = 8,
I64Array = 9,
HetArray = 10,
}
impl_from_u8!(Category, 9);
impl_from_u8!(Category, 10);

impl Category {
pub fn encode_with(self, right: u8) -> u8 {
Expand Down Expand Up @@ -208,12 +213,11 @@ impl NumberHint {

pub fn decode_f64(self, d: &mut Decoder) -> DecodeResult<f64> {
match self {
// f8 doesn't exist
NumberHint::Size8 => Err(d.error(DecodeErrorType::HeaderInvalid {
// f8 doesn't exist, and currently we don't use f32 anywhere
NumberHint::Size8 | NumberHint::Size32 => Err(d.error(DecodeErrorType::HeaderInvalid {
value: self as u8,
ty: "f64",
})),
NumberHint::Size32 => d.take_f32().map(f64::from),
NumberHint::Size64 => d.take_f64(),
// TODO check this has same performance as inline match
_ => Ok(self.decode_f64_header()),
Expand Down
8 changes: 8 additions & 0 deletions crates/batson/src/json_writer.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use num_bigint::BigInt;
use serde::ser::Serializer as _;
use serde_json::ser::Serializer;

Expand Down Expand Up @@ -116,3 +117,10 @@ impl WriteJson for f64 {
ser.serialize_f64(*self).map_err(Into::into)
}
}

impl WriteJson for BigInt {
fn write_json(&self, writer: &mut JsonWriter) -> ToJsonResult<()> {
writer.vec.extend_from_slice(self.to_str_radix(10).as_bytes());
Ok(())
}
}
1 change: 0 additions & 1 deletion crates/batson/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![allow(dead_code)]
mod array;
mod decoder;
mod encoder;
Expand Down
Loading

0 comments on commit e3a95a8

Please sign in to comment.