From 6d146179b76c36599e0fa591400b80acb3109ed1 Mon Sep 17 00:00:00 2001
From: Iwan Aucamp <aucampia@gmail.com>
Date: Mon, 28 Aug 2023 20:14:59 +0000
Subject: [PATCH] fix: SPARQL `LOAD ... INTO GRAPH` handling

`LOAD ... INTO GRAPH` stopped working correctly after the change to
handling of the `publicID` `Graph.parse` parameter in RDFLib 7.0.0
(<https://github.com/RDFLib/rdflib/pull/2406>).

This is because `LOAD` evaluation relied on `publicID` to select the
graph name. So after <https://github.com/RDFLib/rdflib/pull/2406> data
would be loaded into the default graph even if a named graph is
specified.

This change adds tests for `LOAD ... INTO GRAPH` and fixes the load
evaluation.

A consequence of this change is also that relative IRI lookup for graphs
loaded with `LOAD ... INTO GRAPH` is now relative to the source document
URI instead of the base URI of the graph being loaded into, which is
more correct.
---
 rdflib/plugins/sparql/sparql.py | 21 ++++++++---
 rdflib/plugins/sparql/update.py |  2 +-
 test/test_sparql/test_update.py | 67 +++++++++++++++++++++++++++++++++
 test/utils/__init__.py          | 35 ++++++++++++++++-
 4 files changed, 118 insertions(+), 7 deletions(-)
 create mode 100644 test/test_sparql/test_update.py

diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py
index 7bfe282841..0946557185 100644
--- a/rdflib/plugins/sparql/sparql.py
+++ b/rdflib/plugins/sparql/sparql.py
@@ -311,14 +311,23 @@ def dataset(self) -> ConjunctiveGraph:
             )
         return self._dataset
 
-    def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None:
+    def load(
+        self,
+        source: URIRef,
+        default: bool = False,
+        into: Optional[Identifier] = None,
+        **kwargs: Any,
+    ) -> None:
         """
         Load data from the source into the query context's.
 
         :param source: The source to load from.
-        :param default: If `True`, triples from the source will be added to the
-            default graph, otherwise it will be loaded into a graph with
-            ``source`` URI as its name.
+        :param default: If `True`, triples from the source will be added
+            to the default graph, otherwise it will be loaded into a
+            graph with ``source`` URI as its name.
+        :param into: The name of the graph to load the data into. If
+            `None`, the source URI will be used as as the name of the
+            graph.
         :param kwargs: Keyword arguments to pass to
             :meth:`rdflib.graph.Graph.parse`.
         """
@@ -353,7 +362,9 @@ def _load(graph, source):
             if default:
                 _load(self.graph, source)
             else:
-                _load(self.dataset.get_context(source), source)
+                if into is None:
+                    into = source
+                _load(self.dataset.get_context(into), source)
 
     def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
         # in SPARQL BNodes are just labels
diff --git a/rdflib/plugins/sparql/update.py b/rdflib/plugins/sparql/update.py
index 5ce86f3931..777eaa8773 100644
--- a/rdflib/plugins/sparql/update.py
+++ b/rdflib/plugins/sparql/update.py
@@ -51,7 +51,7 @@ def evalLoad(ctx: QueryContext, u: CompValue) -> None:
         assert isinstance(u.iri, URIRef)
 
     if u.graphiri:
-        ctx.load(u.iri, default=False, publicID=u.graphiri)
+        ctx.load(u.iri, default=False, into=u.graphiri)
     else:
         ctx.load(u.iri, default=True)
 
diff --git a/test/test_sparql/test_update.py b/test/test_sparql/test_update.py
new file mode 100644
index 0000000000..86931778d6
--- /dev/null
+++ b/test/test_sparql/test_update.py
@@ -0,0 +1,67 @@
+import logging
+from test.data import TEST_DATA_DIR
+from test.utils import GraphHelper
+from test.utils.graph import load_sources
+from test.utils.namespace import EGDO
+from typing import Callable
+
+import pytest
+
+from rdflib.graph import ConjunctiveGraph, Dataset, Graph
+
+
+@pytest.mark.parametrize("graph_factory", [Graph, ConjunctiveGraph, Dataset])
+def test_load_into_default(graph_factory: Callable[[], Graph]) -> None:
+    """
+    Evaluation of ``LOAD <source>`` into default graph works correctly.
+    """
+    source_path = TEST_DATA_DIR / "variants" / "simple_triple.ttl"
+
+    expected_graph = graph_factory()
+    load_sources(source_path, graph=expected_graph)
+
+    actual_graph = graph_factory()
+    actual_graph.update(f"LOAD <{source_path.as_uri()}>")
+
+    if logging.getLogger().isEnabledFor(logging.DEBUG):
+        debug_format = (
+            "trig" if isinstance(expected_graph, ConjunctiveGraph) else "turtle"
+        )
+        logging.debug(
+            "expected_graph = \n%s", expected_graph.serialize(format=debug_format)
+        )
+        logging.debug(
+            "actual_graph = \n%s", actual_graph.serialize(format=debug_format)
+        )
+
+    if isinstance(expected_graph, ConjunctiveGraph):
+        assert isinstance(actual_graph, ConjunctiveGraph)
+        GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
+    else:
+        GraphHelper.assert_triple_sets_equals(expected_graph, actual_graph)
+
+
+@pytest.mark.parametrize("graph_factory", [ConjunctiveGraph, Dataset])
+def test_load_into_named(graph_factory: Callable[[], ConjunctiveGraph]) -> None:
+    """
+    Evaluation of ``LOAD <source> INTO GRAPH <name>`` works correctly.
+    """
+    source_path = TEST_DATA_DIR / "variants" / "simple_triple.ttl"
+
+    expected_graph = graph_factory()
+    load_sources(source_path, graph=expected_graph.get_context(EGDO.graph))
+
+    actual_graph = graph_factory()
+
+    actual_graph.update(f"LOAD <{source_path.as_uri()}> INTO GRAPH <{EGDO.graph}>")
+
+    if logging.getLogger().isEnabledFor(logging.DEBUG):
+        debug_format = "trig"
+        logging.debug(
+            "expected_graph = \n%s", expected_graph.serialize(format=debug_format)
+        )
+        logging.debug(
+            "actual_graph = \n%s", actual_graph.serialize(format=debug_format)
+        )
+
+    GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
diff --git a/test/utils/__init__.py b/test/utils/__init__.py
index dc27251a3b..5c988d96ff 100644
--- a/test/utils/__init__.py
+++ b/test/utils/__init__.py
@@ -19,6 +19,7 @@
     Iterable,
     List,
     Optional,
+    Sequence,
     Set,
     Tuple,
     Type,
@@ -35,7 +36,7 @@
 from rdflib import BNode, ConjunctiveGraph, Graph
 from rdflib.graph import Dataset
 from rdflib.plugin import Plugin
-from rdflib.term import Identifier, Literal, Node, URIRef
+from rdflib.term import IdentifiedNode, Identifier, Literal, Node, URIRef
 
 PluginT = TypeVar("PluginT")
 
@@ -257,6 +258,23 @@ def assert_quad_sets_equals(
         else:
             assert lhs_set != rhs_set
 
+    @classmethod
+    def assert_collection_graphs_equal(
+        cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph
+    ) -> None:
+        """
+        Assert that all graphs in provides collections are equal,
+        comparing named graphs with identically named graphs.
+        """
+        cls.assert_triple_sets_equals(lhs.default_context, rhs.default_context)
+        graph_names = cls.non_default_graph_names(lhs) | cls.non_default_graph_names(
+            rhs
+        )
+        for identifier in graph_names:
+            cls.assert_triple_sets_equals(
+                lhs.get_context(identifier), rhs.get_context(identifier)
+            )
+
     @classmethod
     def assert_sets_equals(
         cls,
@@ -381,6 +399,21 @@ def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None:
             if object.datatype in datatypes:
                 object._datatype = None
 
+    @classmethod
+    def non_default_graph_names(
+        cls, container: ConjunctiveGraph
+    ) -> Set[IdentifiedNode]:
+        return set(context.identifier for context in container.contexts()) - {
+            container.default_context.identifier
+        }
+
+    @classmethod
+    def non_default_graphs(cls, container: ConjunctiveGraph) -> Sequence[Graph]:
+        result = []
+        for name in cls.non_default_graph_names(container):
+            result.append(container.get_context(name))
+        return result
+
 
 def eq_(lhs, rhs, msg=None):
     """