From 6d146179b76c36599e0fa591400b80acb3109ed1 Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Mon, 28 Aug 2023 20:14:59 +0000 Subject: [PATCH] fix: SPARQL `LOAD ... INTO GRAPH` handling `LOAD ... INTO GRAPH` stopped working correctly after the change to handling of the `publicID` `Graph.parse` parameter in RDFLib 7.0.0 (). This is because `LOAD` evaluation relied on `publicID` to select the graph name. So after data would be loaded into the default graph even if a named graph is specified. This change adds tests for `LOAD ... INTO GRAPH` and fixes the load evaluation. A consequence of this change is also that relative IRI lookup for graphs loaded with `LOAD ... INTO GRAPH` is now relative to the source document URI instead of the base URI of the graph being loaded into, which is more correct. --- rdflib/plugins/sparql/sparql.py | 21 ++++++++--- rdflib/plugins/sparql/update.py | 2 +- test/test_sparql/test_update.py | 67 +++++++++++++++++++++++++++++++++ test/utils/__init__.py | 35 ++++++++++++++++- 4 files changed, 118 insertions(+), 7 deletions(-) create mode 100644 test/test_sparql/test_update.py diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py index 7bfe282841..0946557185 100644 --- a/rdflib/plugins/sparql/sparql.py +++ b/rdflib/plugins/sparql/sparql.py @@ -311,14 +311,23 @@ def dataset(self) -> ConjunctiveGraph: ) return self._dataset - def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None: + def load( + self, + source: URIRef, + default: bool = False, + into: Optional[Identifier] = None, + **kwargs: Any, + ) -> None: """ Load data from the source into the query context's. :param source: The source to load from. - :param default: If `True`, triples from the source will be added to the - default graph, otherwise it will be loaded into a graph with - ``source`` URI as its name. + :param default: If `True`, triples from the source will be added + to the default graph, otherwise it will be loaded into a + graph with ``source`` URI as its name. + :param into: The name of the graph to load the data into. If + `None`, the source URI will be used as as the name of the + graph. :param kwargs: Keyword arguments to pass to :meth:`rdflib.graph.Graph.parse`. """ @@ -353,7 +362,9 @@ def _load(graph, source): if default: _load(self.graph, source) else: - _load(self.dataset.get_context(source), source) + if into is None: + into = source + _load(self.dataset.get_context(into), source) def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]: # in SPARQL BNodes are just labels diff --git a/rdflib/plugins/sparql/update.py b/rdflib/plugins/sparql/update.py index 5ce86f3931..777eaa8773 100644 --- a/rdflib/plugins/sparql/update.py +++ b/rdflib/plugins/sparql/update.py @@ -51,7 +51,7 @@ def evalLoad(ctx: QueryContext, u: CompValue) -> None: assert isinstance(u.iri, URIRef) if u.graphiri: - ctx.load(u.iri, default=False, publicID=u.graphiri) + ctx.load(u.iri, default=False, into=u.graphiri) else: ctx.load(u.iri, default=True) diff --git a/test/test_sparql/test_update.py b/test/test_sparql/test_update.py new file mode 100644 index 0000000000..86931778d6 --- /dev/null +++ b/test/test_sparql/test_update.py @@ -0,0 +1,67 @@ +import logging +from test.data import TEST_DATA_DIR +from test.utils import GraphHelper +from test.utils.graph import load_sources +from test.utils.namespace import EGDO +from typing import Callable + +import pytest + +from rdflib.graph import ConjunctiveGraph, Dataset, Graph + + +@pytest.mark.parametrize("graph_factory", [Graph, ConjunctiveGraph, Dataset]) +def test_load_into_default(graph_factory: Callable[[], Graph]) -> None: + """ + Evaluation of ``LOAD `` into default graph works correctly. + """ + source_path = TEST_DATA_DIR / "variants" / "simple_triple.ttl" + + expected_graph = graph_factory() + load_sources(source_path, graph=expected_graph) + + actual_graph = graph_factory() + actual_graph.update(f"LOAD <{source_path.as_uri()}>") + + if logging.getLogger().isEnabledFor(logging.DEBUG): + debug_format = ( + "trig" if isinstance(expected_graph, ConjunctiveGraph) else "turtle" + ) + logging.debug( + "expected_graph = \n%s", expected_graph.serialize(format=debug_format) + ) + logging.debug( + "actual_graph = \n%s", actual_graph.serialize(format=debug_format) + ) + + if isinstance(expected_graph, ConjunctiveGraph): + assert isinstance(actual_graph, ConjunctiveGraph) + GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph) + else: + GraphHelper.assert_triple_sets_equals(expected_graph, actual_graph) + + +@pytest.mark.parametrize("graph_factory", [ConjunctiveGraph, Dataset]) +def test_load_into_named(graph_factory: Callable[[], ConjunctiveGraph]) -> None: + """ + Evaluation of ``LOAD INTO GRAPH `` works correctly. + """ + source_path = TEST_DATA_DIR / "variants" / "simple_triple.ttl" + + expected_graph = graph_factory() + load_sources(source_path, graph=expected_graph.get_context(EGDO.graph)) + + actual_graph = graph_factory() + + actual_graph.update(f"LOAD <{source_path.as_uri()}> INTO GRAPH <{EGDO.graph}>") + + if logging.getLogger().isEnabledFor(logging.DEBUG): + debug_format = "trig" + logging.debug( + "expected_graph = \n%s", expected_graph.serialize(format=debug_format) + ) + logging.debug( + "actual_graph = \n%s", actual_graph.serialize(format=debug_format) + ) + + GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph) diff --git a/test/utils/__init__.py b/test/utils/__init__.py index dc27251a3b..5c988d96ff 100644 --- a/test/utils/__init__.py +++ b/test/utils/__init__.py @@ -19,6 +19,7 @@ Iterable, List, Optional, + Sequence, Set, Tuple, Type, @@ -35,7 +36,7 @@ from rdflib import BNode, ConjunctiveGraph, Graph from rdflib.graph import Dataset from rdflib.plugin import Plugin -from rdflib.term import Identifier, Literal, Node, URIRef +from rdflib.term import IdentifiedNode, Identifier, Literal, Node, URIRef PluginT = TypeVar("PluginT") @@ -257,6 +258,23 @@ def assert_quad_sets_equals( else: assert lhs_set != rhs_set + @classmethod + def assert_collection_graphs_equal( + cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph + ) -> None: + """ + Assert that all graphs in provides collections are equal, + comparing named graphs with identically named graphs. + """ + cls.assert_triple_sets_equals(lhs.default_context, rhs.default_context) + graph_names = cls.non_default_graph_names(lhs) | cls.non_default_graph_names( + rhs + ) + for identifier in graph_names: + cls.assert_triple_sets_equals( + lhs.get_context(identifier), rhs.get_context(identifier) + ) + @classmethod def assert_sets_equals( cls, @@ -381,6 +399,21 @@ def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None: if object.datatype in datatypes: object._datatype = None + @classmethod + def non_default_graph_names( + cls, container: ConjunctiveGraph + ) -> Set[IdentifiedNode]: + return set(context.identifier for context in container.contexts()) - { + container.default_context.identifier + } + + @classmethod + def non_default_graphs(cls, container: ConjunctiveGraph) -> Sequence[Graph]: + result = [] + for name in cls.non_default_graph_names(container): + result.append(container.get_context(name)) + return result + def eq_(lhs, rhs, msg=None): """