Skip to content

Commit

Permalink
Universal newline support
Browse files Browse the repository at this point in the history
  • Loading branch information
nineteendo committed Jul 31, 2024
1 parent ba1c32f commit 756f778
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 34 deletions.
2 changes: 0 additions & 2 deletions src/jsonyx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ def loads(

if not isinstance(s, str):
s = s.decode(detect_encoding(s), self._errors)
# Normalize newlines
s = s.replace("\r\n", "\n").replace("\r", "\n")
elif s.startswith("\ufeff"):
msg: str = "Unexpected UTF-8 BOM"
raise JSONSyntaxError(msg, filename, s, 0)
Expand Down
54 changes: 37 additions & 17 deletions src/jsonyx/_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
_match_chunk: Callable[[str, int], Match[str] | None] = re.compile(
r'[^"\\\x00-\x1f]+', _FLAGS,
).match
_match_line_end: Callable[[str, int], Match[str] | None] = re.compile(
r"[^\n\r]+", _FLAGS,
).match
_match_number: Callable[[str, int], Match[str] | None] = re.compile(
r"(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?", _FLAGS,
).match
Expand All @@ -40,9 +43,13 @@


def _get_err_context(doc: str, start: int, end: int) -> tuple[int, str, int]:
line_start: int = doc.rfind("\n", 0, start) + 1
if (line_end := doc.find("\n", start)) == -1:
line_end = len(doc)
line_start: int = max(
doc.rfind("\n", 0, start), doc.rfind("\r", 0, start),
) + 1
if (match := _match_line_end(doc, start)):
line_end: int = match.end()
else:
line_end = start

end = min(max(start + 1, line_end), end)
max_chars: int = get_terminal_size().columns - 4 # leading spaces
Expand Down Expand Up @@ -91,16 +98,30 @@ def __init__( # pylint: disable=R0913
self, msg: str, filename: str, doc: str, start: int, end: int = 0,
) -> None:
"""Create new JSON syntax error."""
lineno: int = doc.count("\n", 0, start) + 1
colno: int = start - doc.rfind("\n", 0, start)
lineno: int = (
doc.count("\n", 0, start)
+ doc.count("\r", 0, start)
- doc.count("\r\n", 0, start)
+ 1
)
colno: int = start - max(
doc.rfind("\n", 0, start), doc.rfind("\r", 0, start),
)
if end <= 0: # offset
if (line_end := doc.find("\n", start)) == -1:
line_end = len(doc)

end = min(line_end, start - end)
if (match := _match_line_end(doc, start)):
end = min(match.end(), start - end)
else:
end = start

end_lineno: int = doc.count("\n", 0, end) + 1
end_colno: int = end - doc.rfind("\n", 0, end)
end_lineno: int = (
doc.count("\n", 0, end)
+ doc.count("\r", 0, end)
- doc.count("\r\n", 0, end)
+ 1
)
end_colno: int = end - max(
doc.rfind("\n", 0, end), doc.rfind("\r", 0, end),
)
offset, text, end_offset = _get_err_context(doc, start, end)
super().__init__(
msg, (filename, lineno, offset, text, end_lineno, end_offset),
Expand Down Expand Up @@ -156,10 +177,9 @@ def skip_comments(filename: str, s: str, end: int) -> int:

comment_idx: int = end
if (comment_prefix := s[end:end + 2]) == "//":
if (end := find("\n", end + 2)) != -1:
end += 1
else:
end = len(s)
end += 2
if (match := _match_line_end(s, end)):
end = match.end()
elif comment_prefix == "/*":
if (end := find("*/", end + 2)) == -1:
if allow_comments:
Expand Down Expand Up @@ -213,7 +233,7 @@ def scan_string( # noqa: C901, PLR0912, PLR0915
return "".join(chunks), end + 1

if terminator != "\\":
if terminator == "\n":
if terminator in {"\n", "\r"}:
msg = "Unterminated string"
raise _errmsg(msg, filename, s, str_idx, end)

Expand All @@ -232,7 +252,7 @@ def scan_string( # noqa: C901, PLR0912, PLR0915
try:
char = _UNESCAPE[esc]
except KeyError:
if esc == "\n":
if esc in {"\n", "\r"}:
msg = "Expecting escaped character"
raise _errmsg(msg, filename, s, end) from None

Expand Down
6 changes: 3 additions & 3 deletions src/jsonyx/_speedups.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ _skip_comments(PyScannerObject *s, PyObject *pyfilename, PyObject *pystr, Py_ssi
PyUnicode_READ(kind, str, idx + 1) == '/')
{
idx += 2;
while (idx < len && PyUnicode_READ(kind,str, idx) != '\n') {
while (idx < len && (PyUnicode_READ(kind,str, idx) != '\n' || PyUnicode_READ(kind,str, idx) != '\r')) {
idx++;
}
}
Expand Down Expand Up @@ -417,7 +417,7 @@ scanstring_unicode(PyObject *pyfilename, PyObject *pystr, Py_ssize_t end, int al
break;
}
if (d <= 0x1f) {
if (d == '\n') {
if (d == '\n' || d == '\r') {
raise_errmsg("Unterminated string", pyfilename, pystr, begin, next);
}
else {
Expand Down Expand Up @@ -474,7 +474,7 @@ scanstring_unicode(PyObject *pyfilename, PyObject *pystr, Py_ssize_t end, int al
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default:
if (c == '\n') {
if (c == '\n' || c == '\r') {
raise_errmsg("Expecting escaped character", pyfilename, pystr, end - 1, 0);
}
else {
Expand Down
16 changes: 12 additions & 4 deletions src/jsonyx/test/test_loads.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,13 @@ def test_surrogate_escapes(json: ModuleType, s: str, expected: Any) -> None:
@pytest.mark.parametrize(("s", "msg", "colno", "end_colno"), [
('"foo', "Unterminated string", 1, 5),
('"foo\n', "Unterminated string", 1, 5),
('"foo\r', "Unterminated string", 1, 5),
('"foo\r\n', "Unterminated string", 1, 5),
('"\b"', "Unescaped control character", 2, 3),
('"\\', "Expecting escaped character", 3, 0),
('"\\\n', "Expecting escaped character", 3, 0),
('"\\\r', "Expecting escaped character", 3, 0),
('"\\\r\n', "Expecting escaped character", 3, 0),
(r'"\a"', "Invalid backslash escape", 2, 4),
(r'"\u"', "Expecting 4 hex digits", 4, 5),
(r'"\u0xff"', "Expecting 4 hex digits", 4, 8),
Expand Down Expand Up @@ -501,14 +505,18 @@ def test_whitespace(json: ModuleType, s: str) -> None:

@pytest.mark.parametrize("s", [
# One comment
"0//line comment", "0/*block comment*/",
"0//", "0//line comment", "0/*block comment*/",
# Multiple comments
"0//comment 1\n//comment 2\n//comment 3",
"0//comment 1\r//comment 2\r//comment 3",
"0//comment 1\r\n//comment 2\r\n//comment 3",
"0/*comment 1*//*comment 2*//*comment 3*/",
# Whitespace
"0 //comment 1\n //comment 2\n //comment 3\n ",
"0 //comment 1\r //comment 2\r //comment 3\r ",
"0 //comment 1\r\n //comment 2\r\n //comment 3\r\n ",
"0 /*comment 1*/ /*comment 2*/ /*comment 3*/ ",
])
def test_comments(json: ModuleType, s: str) -> None:
Expand All @@ -524,9 +532,9 @@ def test_invalid_comment(json: ModuleType) -> None:
_check_syntax_err(exc_info, "Unterminated comment", 2, 24)


@pytest.mark.parametrize(
"s", ["0//line comment", "0/*block comment*/", "0/*unterminated comment"],
)
@pytest.mark.parametrize("s", [
"0//", "0//line comment", "0/*block comment*/", "0/*unterminated comment",
])
def test_comments_not_allowed(json: ModuleType, s: str) -> None:
"""Test comments if not allowed."""
with pytest.raises(json.JSONSyntaxError) as exc_info:
Expand Down
44 changes: 36 additions & 8 deletions src/jsonyx/test/test_syntax_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,44 @@
@pytest.mark.parametrize(
("doc", "start", "end", "lineno", "end_lineno", "colno", "end_colno"), [
# Offset
("line ", 5, -1, 1, 1, 6, 6), # line 1, column 6
("line ", 5, -1, 1, 1, 6, 6), # ln 1, col 6
# ^
("line \nline 2", 5, -1, 1, 1, 6, 6), # line 1, column 6
("line \nline 2", 5, -1, 1, 1, 6, 6), # ln 1, col 6
# ^
("line ?", 5, -1, 1, 1, 6, 7), # line 1, column 6-7
("line \rline 2", 5, -1, 1, 1, 6, 6), # ln 1, col 6
# ^
("line ?\nline 2", 5, -1, 1, 1, 6, 7), # line 1, column 6-7
("line \r\nline 2", 5, -1, 1, 1, 6, 6), # ln 1, col 6
# ^
("line ?", 5, -1, 1, 1, 6, 7), # ln 1, col 6-7
# ^
("line ?\nline 2", 5, -1, 1, 1, 6, 7), # ln 1, col 6-7
# ^
("line ?\rline 2", 5, -1, 1, 1, 6, 7), # ln 1, col 6-7
# ^
("line ?\r\nline 2", 5, -1, 1, 1, 6, 7), # ln 1, col 6-7
# ^
# Range
("line 1", 0, 1, 1, 1, 1, 2), # line 1, column 1-2
("line 1", 0, 1, 1, 1, 1, 2), # ln 1, col 1-2
# ^
("line 1\nline 2", 12, 13, 2, 2, 6, 7), # line 2, column 6-7
("line 1\nline 2", 12, 13, 2, 2, 6, 7), # ln 2, col 6-7
# ^
("line 1\rline 2", 12, 13, 2, 2, 6, 7), # ln 2, col 6-7
# ^
("line 1\nline 2\nline 3", 12, 19, 2, 3, 6, 6), # line 2-3, column 6
("line 1\r\nline 2", 13, 14, 2, 2, 6, 7), # ln 2, col 6-7
# ^
("line 1\nline 2\nline 3", 12, 19, 2, 3, 6, 6), # ln 2-3, col 6
# ^^^^^^^^
("line 1\nline 2\nline 3", 12, 20, 2, 3, 6, 7), # line 2-3, column 6-7
("line 1\rline 2\rline 3", 12, 19, 2, 3, 6, 6), # ln 2-3, col 6
# ^^^^^^^^
("line 1\r\nline 2\r\nline 3", 13, 21, 2, 3, 6, 6), # ln 2-3, col 6
# ^^^^^^^^^^
("line 1\nline 2\nline 3", 12, 20, 2, 3, 6, 7), # ln 2-3, col 6-7
# ^^^^^^^^^
("line 1\rline 2\rline 3", 12, 20, 2, 3, 6, 7), # ln 2-3, col 6-7
# ^^^^^^^^^
("line 1\r\nline 2\r\nline 3", 13, 22, 2, 3, 6, 7), # ln 2-3, col 6-7
# ^^^^^^^^^^^
],
)
# pylint: disable-next=R0913
Expand All @@ -52,8 +72,16 @@ def test_start_and_end_position( # noqa: PLR0913, PLR0917
# ^^^^^^^ ^^^^^^^
(12, "current\nnext", 0, 7, 1, "current", 8),
# ^^^^^^^ ^^^^^^^
(12, "current\rnext", 0, 7, 1, "current", 8),
# ^^^^^^^ ^^^^^^^
(12, "current\r\nnext", 0, 7, 1, "current", 8),
# ^^^^^^^ ^^^^^^^
(16, "previous\ncurrent", 9, 16, 1, "current", 8),
# ^^^^^^^ ^^^^^^^
(16, "previous\rcurrent", 9, 16, 1, "current", 8),
# ^^^^^^^ ^^^^^^^
(16, "previous\r\ncurrent", 10, 17, 1, "current", 8),
# ^^^^^^^ ^^^^^^^
# No newline
(17, "start-middle-end", 0, 5, 1, "start-middle-end", 6),
Expand Down

0 comments on commit 756f778

Please sign in to comment.