Skip to content

Commit

Permalink
fix u style unicode strings in python (#110)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored May 31, 2024
1 parent 366d16a commit fc5a710
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 0 deletions.
15 changes: 15 additions & 0 deletions crates/jiter-python/tests/test_jiter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from decimal import Decimal

import jiter
Expand Down Expand Up @@ -239,3 +240,17 @@ def test_lossless_floats_int():
v = jiter.from_json(b'123', lossless_floats=True)
assert isinstance(v, int)
assert v == 123


def test_unicode_roundtrip():
original = ['中文']
json_data = json.dumps(original).encode()
assert jiter.from_json(json_data) == original
assert json.loads(json_data) == original


def test_unicode_roundtrip_ensure_ascii():
original = {'name': '中文'}
json_data = json.dumps(original, ensure_ascii=False).encode()
assert jiter.from_json(json_data, cache_mode=False) == original
assert json.loads(json_data) == original
1 change: 1 addition & 0 deletions crates/jiter/src/string_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ fn decode_to_tape<'t, 'j>(
b't' => tape.push(b'\t'),
b'u' => {
let (c, new_index) = parse_escape(data, index)?;
ascii_only = false;
index = new_index;
tape.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
}
Expand Down
13 changes: 13 additions & 0 deletions crates/jiter/tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1594,3 +1594,16 @@ fn jiter_partial_string() {
JiterErrorType::JsonError(JsonErrorType::EofWhileParsingList)
);
}

#[test]
fn test_unicode_roundtrip() {
// '"中文"'
let json_bytes = b"\"\\u4e2d\\u6587\"";
let value = JsonValue::parse(json_bytes, false).unwrap();
let cow = match value {
JsonValue::Str(s) => s,
_ => panic!("expected string"),
};
assert_eq!(cow, "中文");
assert!(matches!(cow, Cow::Owned(_)));
}

0 comments on commit fc5a710

Please sign in to comment.