Skip to content

Commit

Permalink
Partial JSON parsing support trailing strings (#101)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored May 22, 2024
1 parent 240f180 commit c7f8fdd
Show file tree
Hide file tree
Showing 20 changed files with 410 additions and 150 deletions.
48 changes: 45 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
- run: cargo careful t -F python
if: matrix.rust-version == 'nightly'

- uses: codecov/codecov-action@v3
- uses: codecov/codecov-action@v4
with:
env_vars: RUNS_ON,RUST_VERSION
token: ${{ secrets.CODECOV_TOKEN }}
Expand Down Expand Up @@ -103,11 +103,53 @@ jobs:

- run: cargo test --doc

- uses: codecov/codecov-action@v3
- uses: codecov/codecov-action@v4
with:
env_vars: RUNS_ON,RUST_VERSION
token: ${{ secrets.CODECOV_TOKEN }}

test-python:
name: test jiter-python

runs-on: ubuntu-latest

env:
RUNS_ON: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: set up python
uses: actions/setup-python@v4
with:
python-version: '3.12'

- uses: dtolnay/rust-toolchain@stable

- id: cache-rust
uses: Swatinem/rust-cache@v2

- run: cargo install rustfilt coverage-prepare
if: steps.cache-rust.outputs.cache-hit != 'true'

- run: rustup component add llvm-tools-preview

- run: make python-install

- run: pip install -e crates/jiter-python
env:
RUSTFLAGS: '-C instrument-coverage'

- run: pytest crates/jiter-python/tests
env:
RUST_BACKTRACE: 1

- run: coverage-prepare lcov $(python -c 'import jiter.jiter;print(jiter.jiter.__file__)')

- uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}

bench:
runs-on: ubuntu-latest
steps:
Expand Down Expand Up @@ -504,7 +546,7 @@ jobs:
# https://github.com/marketplace/actions/alls-green#why used for branch protection checks
check:
if: always()
needs: [test-linux, test-macos, bench, fuzz, fuzz-skip, lint]
needs: [test-linux, test-macos, test-python, bench, fuzz, fuzz-skip, lint]
runs-on: ubuntu-latest
steps:
- name: Decide whether the needed jobs succeeded or failed
Expand Down
44 changes: 44 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
.DEFAULT_GOAL := all

.PHONY: format
format:
@cargo fmt --version
cargo fmt

.PHONY: lint
lint:
@cargo clippy --version
cargo clippy -- -D warnings
cargo doc

.PHONY: test
test:
cargo test

.PHONY: python-install
python-install:
pip install maturin
pip install -r crates/jiter-python/tests/requirements.txt

.PHONY: python-dev
python-dev:
maturin develop -m crates/jiter-python/Cargo.toml

.PHONY: python-test
python-test: python-dev
pytest crates/jiter-python/tests

.PHONY: bench
bench:
cargo bench -p jiter -F python

.PHONY: fuzz
fuzz:
cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_to_serde --release

.PHONY: fuzz-skip
fuzz-skip:
cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_skip --release

.PHONY: all
all: format lint test test-python
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ Fast iterable JSON parser.
Documentation is available at [docs.rs/jiter](https://docs.rs/jiter).

jiter has three interfaces:
* [`JsonValue`] an enum representing JSON data
* [`Jiter`] an iterator over JSON data
* [`python_parse`] which parses a JSON string into a Python object
* `JsonValue` an enum representing JSON data
* `Jiter` an iterator over JSON data
* `PythonParse` which parses a JSON string into a Python object

## JsonValue Example

See [the `JsonValue` docs][JsonValue] for more details.
See [the `JsonValue` docs](https://docs.rs/jiter/latest/jiter/enum.JsonValue.html) for more details.

```rust
use jiter::JsonValue;
Expand Down Expand Up @@ -54,7 +54,7 @@ Object(

## Jiter Example

To use [Jiter], you need to know what schema you're expecting:
To use [Jiter](https://docs.rs/jiter/latest/jiter/struct.Jiter.html), you need to know what schema you're expecting:

```rust
use jiter::{Jiter, NumberInt, Peek};
Expand All @@ -69,7 +69,7 @@ fn main() {
"+44 2345678"
]
}"#;
let mut jiter = Jiter::new(json_data.as_bytes(), true);
let mut jiter = Jiter::new(json_data.as_bytes());
assert_eq!(jiter.next_object().unwrap(), Some("name"));
assert_eq!(jiter.next_str().unwrap(), "John Doe");
assert_eq!(jiter.next_key().unwrap(), Some("age"));
Expand Down
4 changes: 2 additions & 2 deletions crates/fuzz/fuzz_targets/compare_skip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ fuzz_target!(|json: String| {
let json_data = json.as_bytes();
match JsonValue::parse(json_data, false) {
Ok(_) => {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
jiter.next_skip().unwrap();
jiter.finish().unwrap();
}
Err(json_error) => {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let jiter_error = match jiter.next_skip() {
Ok(_) => jiter.finish().unwrap_err(),
Err(e) => e,
Expand Down
13 changes: 9 additions & 4 deletions crates/jiter-python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ def from_json(
/,
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
allow_partial: bool = False,
cache_mode: Literal[True, False, "all", "keys", "none"] = "all",
partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False,
catch_duplicate_keys: bool = False,
lossless_floats: bool = False,
) -> Any:
"""
Parse input bytes into a JSON object.
Expand All @@ -26,12 +27,16 @@ def from_json(
json_data: The JSON data to parse
allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields.
Defaults to True.
cache_strings: cache Python strings to improve performance at the cost of some memory usage
cache_mode: cache Python strings to improve performance at the cost of some memory usage
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
partial_mode: How to handle incomplete strings:
- False / 'off' - raise an exception if the input is incomplete
- True / 'on' - allow incomplete JSON but discard the last string if it is incomplete
- 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`
Returns:
Python object built from the JSON input.
Expand Down
11 changes: 7 additions & 4 deletions crates/jiter-python/jiter.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ def from_json(
/,
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = "all",
allow_partial: bool = False,
cache_mode: Literal[True, False, "all", "keys", "none"] = "all",
partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False,
catch_duplicate_keys: bool = False,
lossless_floats: bool = False,
) -> Any:
Expand All @@ -18,11 +18,14 @@ def from_json(
json_data: The JSON data to parse
allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields.
Defaults to True.
cache_strings: cache Python strings to improve performance at the cost of some memory usage
cache_mode: cache Python strings to improve performance at the cost of some memory usage
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
partial_mode: How to handle incomplete strings:
- False / 'off' - raise an exception if the input is incomplete
- True / 'on' - allow incomplete JSON but discard the last string if it is incomplete
- 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`
Expand Down
16 changes: 8 additions & 8 deletions crates/jiter-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::sync::OnceLock;

use pyo3::prelude::*;

use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
use jiter::{map_json_error, LosslessFloat, PartialMode, PythonParse, StringCacheMode};

#[allow(clippy::fn_params_excessive_bools)]
#[pyfunction(
Expand All @@ -11,8 +11,8 @@ use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
/,
*,
allow_inf_nan=true,
cache_strings=StringCacheMode::All,
allow_partial=false,
cache_mode=StringCacheMode::All,
partial_mode=PartialMode::Off,
catch_duplicate_keys=false,
lossless_floats=false,
)
Expand All @@ -21,15 +21,15 @@ pub fn from_json<'py>(
py: Python<'py>,
json_data: &[u8],
allow_inf_nan: bool,
cache_strings: StringCacheMode,
allow_partial: bool,
cache_mode: StringCacheMode,
partial_mode: PartialMode,
catch_duplicate_keys: bool,
lossless_floats: bool,
) -> PyResult<Bound<'py, PyAny>> {
let parse_builder = PythonParseBuilder {
let parse_builder = PythonParse {
allow_inf_nan,
cache_mode: cache_strings,
allow_partial,
cache_mode,
partial_mode,
catch_duplicate_keys,
lossless_floats,
};
Expand Down
Loading

0 comments on commit c7f8fdd

Please sign in to comment.