Skip to content

Commit

Permalink
Prevent from_n3 from unescaping \xhh
Browse files Browse the repository at this point in the history
This is a fairly pragmatic fix to a problem which should be solved by
changing `from_n3` to do the same as the actual n3/turtle parser.

There are still many issues with this function, some of which I added
tests for.
  • Loading branch information
aucampia committed Jun 26, 2021
1 parent a32f48b commit 50c3112
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
5 changes: 4 additions & 1 deletion rdflib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def to_term(s, default=None):
raise Exception(msg)


def from_n3(s, default=None, backend=None, nsm=None):
def from_n3(s: str, default=None, backend=None, nsm=None):
r'''
Creates the Identifier corresponding to the given n3 string.
Expand Down Expand Up @@ -193,6 +193,9 @@ def from_n3(s, default=None, backend=None, nsm=None):
language = rest[1:] # strip leading at sign

value = value.replace(r"\"", '"')
# unicode-escape interprets \xhh as an escape sequence,
# but n3 does not define it as such.
value = value.replace(r"\x", r"\\x")
# Hack: this should correctly handle strings with either native unicode
# characters, or \u1234 unicode escapes.
value = value.encode("raw-unicode-escape").decode("unicode-escape")
Expand Down
42 changes: 42 additions & 0 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import unittest
import time
from unittest.case import expectedFailure
from rdflib.graph import Graph
from rdflib.graph import QuotedGraph
from rdflib.graph import ConjunctiveGraph
Expand Down Expand Up @@ -301,6 +302,47 @@ def test_util_from_n3_expectgraph(self):
res = util.from_n3(s, default=None, backend="Memory")
self.assertTrue(isinstance(res, Graph))

def test_util_from_n3_escapes(self) -> None:
pairs = [
("\\t", "\t"),
("\\b", "\b"),
("\\n", "\n"),
("\\r", "\r"),
("\\f", "\f"),
('\\"', '"'),
("\\'", "'"),
("\\\\", "\\"),
("\\u00F6", "ö"),
("\\U000000F6", "ö"),
]
for escaped, raw in pairs:
with self.subTest(f"{escaped} => {raw}"):
literal_str = str(util.from_n3(f'"{escaped}"'))
self.assertEqual(literal_str, f"{raw}")

def test_util_from_n3_not_escapes(self) -> None:
strings = [
"jörn",
"j\\xf6rn",
]
for string in strings:
with self.subTest(f"{string}"):
literal_str = str(util.from_n3(f'"{string}"'))
self.assertEqual(literal_str, f"{string}")

@expectedFailure
def test_util_from_n3_not_escapes_xf(self) -> None:
strings = [
f"j\\366rn",
f"\\",
f"\\0",
f"\\I",
]
for string in strings:
with self.subTest(f"{string}"):
literal_str = str(util.from_n3(f'"{string}"'))
self.assertEqual(literal_str, f"{string}")


class TestUtilCheckers(unittest.TestCase):
def setUp(self):
Expand Down

0 comments on commit 50c3112

Please sign in to comment.