From 1fbedbfbcb86a767b9dd621d080f430528aebc33 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Mon, 20 May 2024 11:25:48 -0400 Subject: [PATCH] tweak python package (#96) --- .github/workflows/ci.yml | 3 +- crates/jiter-python/Cargo.toml | 10 +++++ crates/jiter-python/README.md | 48 ++++++++++++++-------- crates/jiter-python/jiter.pyi | 37 +++++++++++++++-- crates/jiter-python/pyproject.toml | 29 ++++++++++--- crates/jiter-python/src/lib.rs | 12 +++--- crates/jiter-python/tests/requirements.txt | 1 + 7 files changed, 107 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 57c23fc..03dfbc7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -492,11 +492,10 @@ jobs: - name: run tests run: | - cd crates/jiter-python python3 -m pip install -U pip -r tests/requirements.txt python3 -m pip install jiter --no-index --no-deps --find-links dist --force-reinstall python3 -m pytest - + working-directory: crates/jiter-python # https://github.com/marketplace/actions/alls-green#why used for branch protection checks check: diff --git a/crates/jiter-python/Cargo.toml b/crates/jiter-python/Cargo.toml index 8fa369d..2a24d0c 100644 --- a/crates/jiter-python/Cargo.toml +++ b/crates/jiter-python/Cargo.toml @@ -15,3 +15,13 @@ extension-module = ["pyo3/extension-module"] [lib] name = "jiter_python" crate-type = ["cdylib", "rlib"] + +[lints.clippy] +dbg_macro = "deny" +print_stdout = "deny" +print_stderr = "deny" +# in general we lint against the pedantic group, but we will whitelist +# certain lints which we don't want to enforce (for now) +pedantic = { level = "deny", priority = -1 } +missing_errors_doc = "allow" +must_use_candidate = "allow" diff --git a/crates/jiter-python/README.md b/crates/jiter-python/README.md index 15dd267..d125b5c 100644 --- a/crates/jiter-python/README.md +++ b/crates/jiter-python/README.md @@ -1,36 +1,52 @@ # jiter +[![CI](https://github.com/pydantic/jiter/workflows/CI/badge.svg?event=push)](https://github.com/pydantic/jiter/actions?query=event%3Apush+branch%3Amain+workflow%3ACI) +[![pypi](https://img.shields.io/pypi/v/jiter.svg)](https://pypi.python.org/pypi/jiter) +[![versions](https://img.shields.io/pypi/pyversions/jiter.svg)](https://github.com/pydantic/jiter) +[![license](https://img.shields.io/github/license/pydantic/jiter.svg)](https://github.com/pydantic/jiter/blob/main/LICENSE) + This is a standalone version of the JSON parser used in `pydantic-core`. The recommendation is to only use this package directly if you do not use `pydantic`. The API is extremely minimal: ```python def from_json( - data: bytes, + json_data: bytes, + /, *, allow_inf_nan: bool = True, - cache_strings: Literal[True, False, 'all', 'keys', 'none'] = True, + cache_strings: Literal[True, False, "all", "keys", "none"] = True, allow_partial: bool = False, catch_duplicate_keys: bool = False, ) -> Any: """ - Parse input bytes into a JSON string. - - allow_inf_nan: if True, to allow Infinity and NaN as values in the JSON - cache_strings: cache Python strings to improve performance at the cost of some memory usage - - True / 'all' - cache all strings - - 'keys' - cache only object keys - - 'none' - cache nothing - allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays - catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times + Parse input bytes into a JSON object. + + Arguments: + json_data: The JSON data to parse + allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields. + Defaults to True. + cache_strings: cache Python strings to improve performance at the cost of some memory usage + - True / 'all' - cache all strings + - 'keys' - cache only object keys + - False / 'none' - cache nothing + allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays + catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times + + Returns: + Python object built from the JSON input. """ - ... def cache_clear() -> None: - """Clear the string cache""" - ... + """ + Reset the string cache. + """ def cache_usage() -> int: - """Get number of strings in the cache""" - ... + """ + get the size of the string cache. + + Returns: + Size of the string cache in bytes. + """ ``` diff --git a/crates/jiter-python/jiter.pyi b/crates/jiter-python/jiter.pyi index c3a781c..d15033f 100644 --- a/crates/jiter-python/jiter.pyi +++ b/crates/jiter-python/jiter.pyi @@ -1,12 +1,41 @@ from typing import Any, Literal def from_json( - data: bytes, + json_data: bytes, + /, *, allow_inf_nan: bool = True, cache_strings: Literal[True, False, "all", "keys", "none"] = True, allow_partial: bool = False, catch_duplicate_keys: bool = False, -) -> Any: ... -def cache_clear() -> None: ... -def cache_usage() -> int: ... +) -> Any: + """ + Parse input bytes into a JSON object. + + Arguments: + json_data: The JSON data to parse + allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields. + Defaults to True. + cache_strings: cache Python strings to improve performance at the cost of some memory usage + - True / 'all' - cache all strings + - 'keys' - cache only object keys + - False / 'none' - cache nothing + allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays + catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times + + Returns: + Python object built from the JSON input. + """ + +def cache_clear() -> None: + """ + Reset the string cache. + """ + +def cache_usage() -> int: + """ + get the size of the string cache. + + Returns: + Size of the string cache in bytes. + """ diff --git a/crates/jiter-python/pyproject.toml b/crates/jiter-python/pyproject.toml index e9f985e..a45c48a 100644 --- a/crates/jiter-python/pyproject.toml +++ b/crates/jiter-python/pyproject.toml @@ -4,16 +4,35 @@ build-backend = "maturin" [project] name = "jiter" +description = "Fast iterable JSON parser." requires-python = ">=3.8" authors = [ {name = "Samuel Colvin", email = "s@muelcolvin.com"} ] -dynamic = [ - "description", - "license", - "readme", - "version" +license = "MIT" +readme = "README.md" +classifiers = [ + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: MIT License", + "Operating System :: Unix", + "Operating System :: POSIX :: Linux", + "Environment :: Console", + "Environment :: MacOS X", + "Topic :: File Formats :: JSON", + "Framework :: Pydantic :: 2", ] +dynamic = ["version"] [tool.maturin] module-name = "jiter" diff --git a/crates/jiter-python/src/lib.rs b/crates/jiter-python/src/lib.rs index 2aac8c4..a30082d 100644 --- a/crates/jiter-python/src/lib.rs +++ b/crates/jiter-python/src/lib.rs @@ -6,7 +6,8 @@ use jiter::{map_json_error, python_parse, StringCacheMode}; #[pyfunction( signature = ( - data, + json_data, + /, *, allow_inf_nan=true, cache_strings=StringCacheMode::All, @@ -16,22 +17,21 @@ use jiter::{map_json_error, python_parse, StringCacheMode}; )] pub fn from_json<'py>( py: Python<'py>, - data: &[u8], + json_data: &[u8], allow_inf_nan: bool, cache_strings: StringCacheMode, allow_partial: bool, catch_duplicate_keys: bool, ) -> PyResult> { - let json_bytes = data; python_parse( py, - json_bytes, + json_data, allow_inf_nan, cache_strings, allow_partial, catch_duplicate_keys, ) - .map_err(|e| map_json_error(json_bytes, &e)) + .map_err(|e| map_json_error(json_data, &e)) } pub fn get_jiter_version() -> &'static str { @@ -50,7 +50,7 @@ pub fn get_jiter_version() -> &'static str { #[pyfunction] pub fn cache_clear(py: Python<'_>) { - jiter::cache_clear(py) + jiter::cache_clear(py); } #[pyfunction] diff --git a/crates/jiter-python/tests/requirements.txt b/crates/jiter-python/tests/requirements.txt index 43b7e76..3a8f5fb 100644 --- a/crates/jiter-python/tests/requirements.txt +++ b/crates/jiter-python/tests/requirements.txt @@ -1,2 +1,3 @@ pytest +pytest-pretty dirty_equals