Skip to content

Commit

Permalink
lossless float support (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored May 21, 2024
1 parent 1fbedbf commit e78c3fa
Show file tree
Hide file tree
Showing 12 changed files with 382 additions and 122 deletions.
28 changes: 27 additions & 1 deletion crates/jiter-python/jiter.pyi
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import decimal
from typing import Any, Literal

def from_json(
json_data: bytes,
/,
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = "all",
allow_partial: bool = False,
catch_duplicate_keys: bool = False,
lossless_floats: bool = False,
) -> Any:
"""
Parse input bytes into a JSON object.
Expand All @@ -22,6 +24,7 @@ def from_json(
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`
Returns:
Python object built from the JSON input.
Expand All @@ -39,3 +42,26 @@ def cache_usage() -> int:
Returns:
Size of the string cache in bytes.
"""


class LosslessFloat:
"""
Represents a float from JSON, by holding the underlying bytes representing a float from JSON.
"""
def __init__(self, json_float: bytes):
"""Construct a LosslessFloat object from a JSON bytes slice"""

def as_decimal(self) -> decimal.Decimal:
"""Construct a Python Decimal from the JSON bytes slice"""

def __float__(self) -> float:
"""Construct a Python float from the JSON bytes slice"""

def __bytes__(self) -> bytes:
"""Return the JSON bytes slice as bytes"""

def __str__(self):
"""Return the JSON bytes slice as a string"""

def __repr__(self):
...
21 changes: 13 additions & 8 deletions crates/jiter-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ use std::sync::OnceLock;

use pyo3::prelude::*;

use jiter::{map_json_error, python_parse, StringCacheMode};
use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};

#[allow(clippy::fn_params_excessive_bools)]
#[pyfunction(
signature = (
json_data,
Expand All @@ -12,7 +13,8 @@ use jiter::{map_json_error, python_parse, StringCacheMode};
allow_inf_nan=true,
cache_strings=StringCacheMode::All,
allow_partial=false,
catch_duplicate_keys=false
catch_duplicate_keys=false,
lossless_floats=false,
)
)]
pub fn from_json<'py>(
Expand All @@ -22,16 +24,18 @@ pub fn from_json<'py>(
cache_strings: StringCacheMode,
allow_partial: bool,
catch_duplicate_keys: bool,
lossless_floats: bool,
) -> PyResult<Bound<'py, PyAny>> {
python_parse(
py,
json_data,
let parse_builder = PythonParseBuilder {
allow_inf_nan,
cache_strings,
cache_mode: cache_strings,
allow_partial,
catch_duplicate_keys,
)
.map_err(|e| map_json_error(json_data, &e))
lossless_floats,
};
parse_builder
.python_parse(py, json_data)
.map_err(|e| map_json_error(json_data, &e))
}

pub fn get_jiter_version() -> &'static str {
Expand Down Expand Up @@ -65,5 +69,6 @@ fn jiter_python(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(from_json, m)?)?;
m.add_function(wrap_pyfunction!(cache_clear, m)?)?;
m.add_function(wrap_pyfunction!(cache_usage, m)?)?;
m.add_class::<LosslessFloat>()?;
Ok(())
}
49 changes: 49 additions & 0 deletions crates/jiter-python/tests/test_jiter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from decimal import Decimal

import jiter
import pytest
from math import inf
Expand Down Expand Up @@ -144,3 +146,50 @@ def test_unicode_cache():
jiter.cache_clear()
parsed = jiter.from_json(json)
assert parsed == {"💩": "£"}


def test_json_float():
f = jiter.LosslessFloat(b'123.45')
assert str(f) == '123.45'
assert repr(f) == 'LosslessFloat(123.45)'
assert float(f) == 123.45
assert f.as_decimal() == Decimal('123.45')
assert bytes(f) == b'123.45'


def test_json_float_scientific():
f = jiter.LosslessFloat(b'123e4')
assert str(f) == '123e4'
assert float(f) == 123e4
assert f.as_decimal() == Decimal('123e4')


def test_json_float_invalid():
with pytest.raises(ValueError, match='trailing characters at line 1 column 6'):
jiter.LosslessFloat(b'123.4x')


def test_lossless_floats():
f = jiter.from_json(b'12.3')
assert isinstance(f, float)
assert f == 12.3

f = jiter.from_json(b'12.3', lossless_floats=True)
assert isinstance(f, jiter.LosslessFloat)
assert str(f) == '12.3'
assert float(f) == 12.3
assert f.as_decimal() == Decimal('12.3')

f = jiter.from_json(b'123.456789123456789e45', lossless_floats=True)
assert isinstance(f, jiter.LosslessFloat)
assert 123e45 < float(f) < 124e45
assert f.as_decimal() == Decimal('1.23456789123456789E+47')
assert bytes(f) == b'123.456789123456789e45'
assert str(f) == '123.456789123456789e45'
assert repr(f) == 'LosslessFloat(123.456789123456789e45)'


def test_lossless_floats_int():
v = jiter.from_json(b'123', lossless_floats=True)
assert isinstance(v, int)
assert v == 123
3 changes: 2 additions & 1 deletion crates/jiter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ num-bigint = "0.4.4"
num-traits = "0.2.16"
ahash = "0.8.0"
smallvec = "1.11.0"
pyo3 = { version = "0.21.0", default-features=false, features = ["num-bigint"], optional = true }
pyo3 = { version = "0.21.0", optional = true }
lexical-parse-float = { version = "0.8.5", features = ["format"] }

[features]
Expand Down Expand Up @@ -69,5 +69,6 @@ match_bool = "allow"
doc_markdown = "allow"
implicit_clone = "allow"
iter_without_into_iter = "allow"
return_self_not_must_use = "allow"
inline_always = "allow" # TODO remove?
match_same_arms = "allow" # TODO remove?
38 changes: 18 additions & 20 deletions crates/jiter/benches/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,18 @@ use std::io::Read;

use pyo3::Python;

use jiter::{cache_clear, python_parse, StringCacheMode};
use jiter::{cache_clear, PythonParseBuilder, StringCacheMode};

fn python_parse_numeric(bench: &mut Bencher) {
Python::with_gil(|py| {
cache_clear(py);
bench.iter(|| {
python_parse(
py,
br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#,
false,
StringCacheMode::All,
false,
false,
)
.unwrap()
PythonParseBuilder::default()
.python_parse(
py,
br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#,
)
.unwrap()
});
})
}
Expand All @@ -28,15 +25,9 @@ fn python_parse_other(bench: &mut Bencher) {
Python::with_gil(|py| {
cache_clear(py);
bench.iter(|| {
python_parse(
py,
br#"["string", true, false, null]"#,
false,
StringCacheMode::All,
false,
false,
)
.unwrap()
PythonParseBuilder::default()
.python_parse(py, br#"["string", true, false, null]"#)
.unwrap()
});
})
}
Expand All @@ -49,7 +40,14 @@ fn _python_parse_file(path: &str, bench: &mut Bencher, cache_mode: StringCacheMo

Python::with_gil(|py| {
cache_clear(py);
bench.iter(|| python_parse(py, json_data, false, cache_mode, false, false).unwrap());
bench.iter(|| {
PythonParseBuilder {
cache_mode,
..Default::default()
}
.python_parse(py, json_data)
.unwrap()
});
})
}

Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/src/jiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ impl<'j> Jiter<'j> {
.parser
.consume_number::<NumberRange>(peek.into_inner(), self.allow_inf_nan)
{
Ok(range) => Ok(&self.data[range]),
Ok(numbe_range) => Ok(&self.data[numbe_range.range]),
Err(e) => Err(self.maybe_number_error(e, JsonType::Float, peek)),
}
}
Expand Down
6 changes: 5 additions & 1 deletion crates/jiter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ mod lazy_index_map;
mod number_decoder;
mod parse;
#[cfg(feature = "python")]
mod py_lossless_float;
#[cfg(feature = "python")]
mod py_string_cache;
#[cfg(feature = "python")]
mod python;
Expand All @@ -21,7 +23,9 @@ pub use number_decoder::{NumberAny, NumberInt};
pub use parse::Peek;
pub use value::{JsonArray, JsonObject, JsonValue};

#[cfg(feature = "python")]
pub use py_lossless_float::LosslessFloat;
#[cfg(feature = "python")]
pub use py_string_cache::{cache_clear, cache_usage, cached_py_string, pystring_fast_new, StringCacheMode};
#[cfg(feature = "python")]
pub use python::{map_json_error, python_parse};
pub use python::{map_json_error, PythonParseBuilder};
Loading

0 comments on commit e78c3fa

Please sign in to comment.