diff --git a/rdflib/util.py b/rdflib/util.py index 592e896e1..4f4b8296b 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -137,7 +137,7 @@ def to_term(s, default=None): raise Exception(msg) -def from_n3(s, default=None, backend=None, nsm=None): +def from_n3(s: str, default=None, backend=None, nsm=None): r''' Creates the Identifier corresponding to the given n3 string. @@ -193,6 +193,9 @@ def from_n3(s, default=None, backend=None, nsm=None): language = rest[1:] # strip leading at sign value = value.replace(r"\"", '"') + # unicode-escape interprets \xhh as an escape sequence, + # but n3 does not define it as such. + value = value.replace(r"\x", r"\\x") # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. value = value.encode("raw-unicode-escape").decode("unicode-escape") diff --git a/test/test_util.py b/test/test_util.py index ea3b122ed..62cb52db8 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -2,6 +2,7 @@ import unittest import time +from unittest.case import expectedFailure from rdflib.graph import Graph from rdflib.graph import QuotedGraph from rdflib.graph import ConjunctiveGraph @@ -301,6 +302,47 @@ def test_util_from_n3_expectgraph(self): res = util.from_n3(s, default=None, backend="Memory") self.assertTrue(isinstance(res, Graph)) + def test_util_from_n3_escapes(self) -> None: + pairs = [ + ("\\t", "\t"), + ("\\b", "\b"), + ("\\n", "\n"), + ("\\r", "\r"), + ("\\f", "\f"), + ('\\"', '"'), + ("\\'", "'"), + ("\\\\", "\\"), + ("\\u00F6", "ö"), + ("\\U000000F6", "ö"), + ] + for escaped, raw in pairs: + with self.subTest(f"{escaped} => {raw}"): + literal_str = str(util.from_n3(f'"{escaped}"')) + self.assertEqual(literal_str, f"{raw}") + + def test_util_from_n3_not_escapes(self) -> None: + strings = [ + "jörn", + "j\\xf6rn", + ] + for string in strings: + with self.subTest(f"{string}"): + literal_str = str(util.from_n3(f'"{string}"')) + self.assertEqual(literal_str, f"{string}") + + @expectedFailure + def test_util_from_n3_not_escapes_xf(self) -> None: + strings = [ + f"j\\366rn", + f"\\", + f"\\0", + f"\\I", + ] + for string in strings: + with self.subTest(f"{string}"): + literal_str = str(util.from_n3(f'"{string}"')) + self.assertEqual(literal_str, f"{string}") + class TestUtilCheckers(unittest.TestCase): def setUp(self):