Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add argument none_value for None representation in loading and dumping #53

Merged
merged 16 commits into from
Jun 24, 2024
Merged
285 changes: 126 additions & 159 deletions Cargo.lock

Large diffs are not rendered by default.

65 changes: 54 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ A better TOML library for python implemented in rust.
library, it passes all the [standard TOML tests](https://github.com/BurntSushi/toml-test) as well as having 100%
coverage on python code. Other TOML libraries for python I tried all failed to parse some valid TOML.
* Performance: see [github.com/pwwang/toml-bench](https://github.com/pwwang/toml-bench) -
rtoml is much faster than pure Python TOML libraries.
rtoml is the fastest Python TOML libraries at the time of writing.
* `None`-value handling: rtoml has flexible support for `None` values, instead of simply ignoring them.

## Install

Expand All @@ -33,38 +34,50 @@ installed before you can install rtoml.

#### load
```python
def load(toml: Union[str, Path, TextIO]) -> Dict[str, Any]: ...
def load(toml: Union[str, Path, TextIO], *, none_value: Optional[str] = None) -> Dict[str, Any]: ...
```

Parse TOML via a string or file and return a python dictionary. The `toml` argument may be a `str`,
`Path` or file object from `open()`.
Parse TOML via a string or file and return a python dictionary.

* `toml`: a `str`, `Path` or file object from `open()`.
* `none_value`: controlling which value in `toml` is loaded as `None` in python. By default, `none_value` is `None`, which means nothing is loaded as `None`

#### loads
```python
def loads(toml: str) -> Dict[str, Any]: ...
def loads(toml: str, *, none_value: Optional[str] = None) -> Dict[str, Any]: ...
```

Parse a TOML string and return a python dictionary. (provided to match the interface of `json` and similar libraries)

* `toml`: a `str` containing TOML.
* `none_value`: controlling which value in `toml` is loaded as `None` in python. By default, `none_value` is `None`, which means nothing is loaded as `None`

#### dumps
```python
def dumps(obj: Any, *, pretty: bool = False) -> str: ...
def dumps(obj: Any, *, pretty: bool = False, none_value: Optional[str] = "null") -> str: ...
```

Serialize a python object to TOML.

If `pretty` is true, output has a more "pretty" format.
* `obj`: a python object to be serialized.
* `pretty`: if `True` the output has a more "pretty" format.
* `none_value`: controlling how `None` values in `obj` are serialized. `none_value=None` means `None` values are ignored.

#### dump
```python
def dump(obj: Any, file: Union[Path, TextIO], *, pretty: bool = False) -> int: ...
def dump(
obj: Any, file: Union[Path, TextIO], *, pretty: bool = False, none_value: Optional[str] = "null"
) -> int: ...
```

Serialize a python object to TOML and write it to a file. `file` may be a `Path` or file object from `open()`.
Serialize a python object to TOML and write it to a file.

If `pretty` is true, output has a more "pretty" format.
* `obj`: a python object to be serialized.
* `file`: a `Path` or file object from `open()`.
* `pretty`: if `True` the output has a more "pretty" format.
* `none_value`: controlling how `None` values in `obj` are serialized. `none_value=None` means `None` values are ignored.

### Example
### Examples

```py
from datetime import datetime, timezone, timedelta
Expand Down Expand Up @@ -116,3 +129,33 @@ server = "192.168.1.1"
ports = [8001, 8001, 8002]
"""
```

An example of `None`-value handling:

```python
obj = {
'a': None,
'b': 1,
'c': [1, 2, None, 3],
}

# Ignore None values
assert rtoml.dumps(obj, none_value=None) == """\
b = 1
c = [1, 2, 3]
"""

# Serialize None values as '@None'
assert rtoml.dumps(obj, none_value='@None') == """\
a = "@None"
b = 1
c = [1, 2, "@None", 3]
"""

# Deserialize '@None' back to None
assert rtoml.load("""\
a = "@None"
b = 1
c = [1, 2, "@None", 3]
""", none_value='@None') == obj
```
3 changes: 2 additions & 1 deletion example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timezone, timedelta
from datetime import datetime, timedelta, timezone

import rtoml

obj = {
Expand Down
46 changes: 32 additions & 14 deletions rtoml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from io import TextIOBase
from pathlib import Path
from typing import Any, Dict, TextIO, Union
from typing import Any, Dict, Optional, TextIO, Union

from . import _rtoml

Expand All @@ -13,49 +13,67 @@
TomlSerializationError = _rtoml.TomlSerializationError


def load(toml: Union[str, Path, TextIO]) -> Dict[str, Any]:
def load(toml: Union[str, Path, TextIO], *, none_value: Optional[str] = None) -> Dict[str, Any]:
"""
Parse TOML via a string or file and return a python dict. The `toml` argument may be a `str`,
`Path` or file object from `open()`.
Parse TOML via a string or file and return a python dict.

Args:
toml: a `str`, `Path` or file object from `open()`.
none_value: controlling which value in `toml` is loaded as `None` in python.
By default, `none_value` is `None`, which means nothing is loaded as `None`.
"""
if isinstance(toml, Path):
toml = toml.read_text(encoding='UTF-8')
elif isinstance(toml, (TextIOBase, TextIO)):
toml = toml.read()

return loads(toml)
return loads(toml, none_value=none_value)


def loads(toml: str) -> Dict[str, Any]:
def loads(toml: str, *, none_value: Optional[str] = None) -> Dict[str, Any]:
"""
Parse a TOML string and return a python dict. (provided to match the interface of `json` and similar libraries)

Args:
toml: a `str` containing TOML.
none_value: controlling which value in `toml` is loaded as `None` in python.
By default, `none_value` is `None`, which means nothing is loaded as `None`.
"""
if not isinstance(toml, str):
raise TypeError(f'invalid toml input, must be str not {type(toml)}')
return _rtoml.deserialize(toml)
return _rtoml.deserialize(toml, none_value=none_value)


def dumps(obj: Any, *, pretty: bool = False) -> str:
def dumps(obj: Any, *, pretty: bool = False, none_value: Optional[str] = 'null') -> str:
"""
Serialize a python object to TOML.

If `pretty` is true, output has a more "pretty" format.
Args:
obj: a python object to be serialized.
pretty: if true, output has a more "pretty" format.
none_value: controlling how `None` values in `obj` are serialized.
`none_value=None` means `None` values are ignored.
"""
if pretty:
serialize = _rtoml.serialize_pretty
else:
serialize = _rtoml.serialize

return serialize(obj)
return serialize(obj, none_value=none_value)


def dump(obj: Any, file: Union[Path, TextIO], *, pretty: bool = False) -> int:
def dump(obj: Any, file: Union[Path, TextIO], *, pretty: bool = False, none_value: Optional[str] = 'null') -> int:
"""
Serialize a python object to TOML and write it to a file. `file` may be a `Path` or file object from `open()`.
Serialize a python object to TOML and write it to a file.

If `pretty` is true, output has a more "pretty" format.
Args:
obj: a python object to be serialized.
file: a `Path` or file object from `open()`.
pretty: if `True` the output has a more "pretty" format.
none_value: controlling how `None` values in `obj` are serialized.
`none_value=None` means `None` values are ignored.
"""
s = dumps(obj, pretty=pretty)
s = dumps(obj, pretty=pretty, none_value=none_value)
if isinstance(file, Path):
return file.write_text(s, encoding='UTF-8')
else:
Expand Down
6 changes: 3 additions & 3 deletions rtoml/_rtoml.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ from typing import Any

__version__: str

def deserialize(toml: str) -> Any: ...
def serialize(obj: Any) -> str: ...
def serialize_pretty(obj: Any) -> str: ...
def deserialize(toml: str, none_value: str | None = None) -> Any: ...
def serialize(obj: Any, none_value: str | None = 'null') -> str: ...
def serialize_pretty(obj: Any, none_value: str | None = 'null') -> str: ...

class TomlParsingError(ValueError): ...
class TomlSerializationError(ValueError): ...
16 changes: 10 additions & 6 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ pub type NoHashSet<T> = HashSet<T, BuildNoHashHasher<T>>;

pub struct PyDeserializer<'py> {
py: Python<'py>,
none_value: Option<&'py str>,
}

impl<'py> PyDeserializer<'py> {
pub fn new(py: Python<'py>) -> Self {
Self { py }
pub fn new(py: Python<'py>, none_value: Option<&'py str>) -> Self {
Self { py, none_value }
}
}

Expand Down Expand Up @@ -78,7 +79,10 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
where
E: de::Error,
{
Ok(value.into_py(self.py))
match self.none_value {
Some(none_value) if value == none_value => Ok(self.py.None()),
_ => Ok(value.into_py(self.py)),
}
}

fn visit_unit<E>(self) -> Result<Self::Value, E> {
Expand All @@ -91,7 +95,7 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
{
let mut elements = Vec::new();

while let Some(elem) = seq.next_element_seed(PyDeserializer::new(self.py))? {
while let Some(elem) = seq.next_element_seed(PyDeserializer::new(self.py, self.none_value))? {
elements.push(elem);
}

Expand All @@ -102,7 +106,7 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
where
A: MapAccess<'de>,
{
match map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py))? {
match map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py, self.none_value))? {
Some((first_key, first_value)) if first_key == DATETIME_MAPPING_KEY => {
let py_string = first_value.extract::<&str>(self.py).map_err(de::Error::custom)?;
let dt: TomlDatetime = TomlDatetime::from_str(py_string).map_err(de::Error::custom)?;
Expand All @@ -119,7 +123,7 @@ impl<'de, 'py> Visitor<'de> for PyDeserializer<'py> {
dict.set_item(first_key, first_value).map_err(de::Error::custom)?;

while let Some((key, value)) =
map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py))?
map_access.next_entry_seed(PhantomData::<String>, PyDeserializer::new(self.py, self.none_value))?
{
if key_set.insert(hash_builder.hash_one(&key)) {
dict.set_item(key, value).map_err(de::Error::custom)?;
Expand Down
12 changes: 6 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,25 @@ create_exception!(_rtoml, TomlParsingError, PyValueError);
create_exception!(_rtoml, TomlSerializationError, PyValueError);

#[pyfunction]
fn deserialize(py: Python, toml_data: String) -> PyResult<PyObject> {
fn deserialize(py: Python, toml_data: String, none_value: Option<&str>) -> PyResult<PyObject> {
let mut deserializer = Deserializer::new(&toml_data);
let seed = de::PyDeserializer::new(py);
let seed = de::PyDeserializer::new(py, none_value);
seed.deserialize(&mut deserializer)
.map_err(|e| TomlParsingError::new_err(e.to_string()))
}

#[pyfunction]
fn serialize(py: Python, obj: &PyAny) -> PyResult<String> {
let s = SerializePyObject::new(py, obj);
fn serialize(py: Python, obj: &PyAny, none_value: Option<&str>) -> PyResult<String> {
let s = SerializePyObject::new(py, obj, none_value);
match to_toml_string(&s) {
Ok(s) => Ok(s),
Err(e) => Err(TomlSerializationError::new_err(e.to_string())),
}
}

#[pyfunction]
fn serialize_pretty(py: Python, obj: &PyAny) -> PyResult<String> {
let s = SerializePyObject::new(py, obj);
fn serialize_pretty(py: Python, obj: &PyAny, none_value: Option<&str>) -> PyResult<String> {
let s = SerializePyObject::new(py, obj, none_value);
match to_toml_string_pretty(&s) {
Ok(s) => Ok(s),
Err(e) => Err(TomlSerializationError::new_err(e.to_string())),
Expand Down
4 changes: 1 addition & 3 deletions src/py_type.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use pyo3::once_cell::GILOnceCell;
use pyo3::prelude::*;
use pyo3::types::{PyByteArray, PyBytes, PyDate, PyDateTime, PyDelta, PyDict, PyList, PyString, PyTime, PyTuple};
use pyo3::types::{PyByteArray, PyBytes, PyDate, PyDateTime, PyDict, PyList, PyString, PyTime, PyTuple};

#[derive(Clone)]
#[cfg_attr(debug_assertions, derive(Debug))]
Expand All @@ -23,7 +23,6 @@ pub struct PyTypeLookup {
pub datetime: usize,
pub date: usize,
pub time: usize,
pub timedelta: usize,
}

static TYPE_LOOKUP: GILOnceCell<PyTypeLookup> = GILOnceCell::new();
Expand Down Expand Up @@ -51,7 +50,6 @@ impl PyTypeLookup {
.get_type_ptr() as usize,
date: PyDate::new(py, 2000, 1, 1).unwrap().get_type_ptr() as usize,
time: PyTime::new(py, 0, 0, 0, 0, None).unwrap().get_type_ptr() as usize,
timedelta: PyDelta::new(py, 0, 0, 0, false).unwrap().get_type_ptr() as usize,
}
}

Expand Down
Loading
Loading