Prevent from_n3 from unescaping \xhh

This is a fairly pragmatic fix to a problem which should be solved by changing `from_n3` to do the same as the actual n3/turtle parser. There are still many issues with this function, some of which I added tests for.
RDFLib · Jun 26, 2021 · 50c3112 · 50c3112
1 parent a32f48b
commit 50c3112
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 1 deletion.
diff --git a/rdflib/util.py b/rdflib/util.py
@@ -137,7 +137,7 @@ def to_term(s, default=None):
         raise Exception(msg)
 
 
-def from_n3(s, default=None, backend=None, nsm=None):
+def from_n3(s: str, default=None, backend=None, nsm=None):
     r'''
     Creates the Identifier corresponding to the given n3 string.
 
@@ -193,6 +193,9 @@ def from_n3(s, default=None, backend=None, nsm=None):
                 language = rest[1:]  # strip leading at sign
 
         value = value.replace(r"\"", '"')
+        # unicode-escape interprets \xhh as an escape sequence,
+        # but n3 does not define it as such.
+        value = value.replace(r"\x", r"\\x")
         # Hack: this should correctly handle strings with either native unicode
         # characters, or \u1234 unicode escapes.
         value = value.encode("raw-unicode-escape").decode("unicode-escape")

diff --git a/test/test_util.py b/test/test_util.py
@@ -2,6 +2,7 @@
 
 import unittest
 import time
+from unittest.case import expectedFailure
 from rdflib.graph import Graph
 from rdflib.graph import QuotedGraph
 from rdflib.graph import ConjunctiveGraph
@@ -301,6 +302,47 @@ def test_util_from_n3_expectgraph(self):
         res = util.from_n3(s, default=None, backend="Memory")
         self.assertTrue(isinstance(res, Graph))
 
+    def test_util_from_n3_escapes(self) -> None:
+        pairs = [
+            ("\\t", "\t"),
+            ("\\b", "\b"),
+            ("\\n", "\n"),
+            ("\\r", "\r"),
+            ("\\f", "\f"),
+            ('\\"', '"'),
+            ("\\'", "'"),
+            ("\\\\", "\\"),
+            ("\\u00F6", "ö"),
+            ("\\U000000F6", "ö"),
+        ]
+        for escaped, raw in pairs:
+            with self.subTest(f"{escaped} => {raw}"):
+                literal_str = str(util.from_n3(f'"{escaped}"'))
+                self.assertEqual(literal_str, f"{raw}")
+
+    def test_util_from_n3_not_escapes(self) -> None:
+        strings = [
+            "jörn",
+            "j\\xf6rn",
+        ]
+        for string in strings:
+            with self.subTest(f"{string}"):
+                literal_str = str(util.from_n3(f'"{string}"'))
+                self.assertEqual(literal_str, f"{string}")
+
+    @expectedFailure
+    def test_util_from_n3_not_escapes_xf(self) -> None:
+        strings = [
+            f"j\\366rn",
+            f"\\",
+            f"\\0",
+            f"\\I",
+        ]
+        for string in strings:
+            with self.subTest(f"{string}"):
+                literal_str = str(util.from_n3(f'"{string}"'))
+                self.assertEqual(literal_str, f"{string}")
+
 
 class TestUtilCheckers(unittest.TestCase):
     def setUp(self):