Skip to content

Commit

Permalink
fix: SPARQL LOAD ... INTO GRAPH handling
Browse files Browse the repository at this point in the history
`LOAD ... INTO GRAPH` stopped working correctly after the change to
handling of the `publicID` `Graph.parse` parameter in RDFLib 7.0.0
(<RDFLib#2406>).

This is because `LOAD` evaluation relied on `publicID` to select the
graph name. So after <RDFLib#2406> data
would be loaded into the default graph even if a named graph is
specified.

This change adds tests for `LOAD ... INTO GRAPH` and fixes the load
evaluation.

A consequence of this change is also that relative IRI lookup for graphs
loaded with `LOAD ... INTO GRAPH` is now relative to the source document
URI instead of the base URI of the graph being loaded into, which is
more correct.
  • Loading branch information
aucampia committed Aug 28, 2023
1 parent 079f388 commit 6d14617
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 7 deletions.
21 changes: 16 additions & 5 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,23 @@ def dataset(self) -> ConjunctiveGraph:
)
return self._dataset

def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None:
def load(
self,
source: URIRef,
default: bool = False,
into: Optional[Identifier] = None,
**kwargs: Any,
) -> None:
"""
Load data from the source into the query context's.
:param source: The source to load from.
:param default: If `True`, triples from the source will be added to the
default graph, otherwise it will be loaded into a graph with
``source`` URI as its name.
:param default: If `True`, triples from the source will be added
to the default graph, otherwise it will be loaded into a
graph with ``source`` URI as its name.
:param into: The name of the graph to load the data into. If
`None`, the source URI will be used as as the name of the
graph.
:param kwargs: Keyword arguments to pass to
:meth:`rdflib.graph.Graph.parse`.
"""
Expand Down Expand Up @@ -353,7 +362,9 @@ def _load(graph, source):
if default:
_load(self.graph, source)
else:
_load(self.dataset.get_context(source), source)
if into is None:
into = source
_load(self.dataset.get_context(into), source)

def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
# in SPARQL BNodes are just labels
Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/sparql/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def evalLoad(ctx: QueryContext, u: CompValue) -> None:
assert isinstance(u.iri, URIRef)

if u.graphiri:
ctx.load(u.iri, default=False, publicID=u.graphiri)
ctx.load(u.iri, default=False, into=u.graphiri)
else:
ctx.load(u.iri, default=True)

Expand Down
67 changes: 67 additions & 0 deletions test/test_sparql/test_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import logging
from test.data import TEST_DATA_DIR
from test.utils import GraphHelper
from test.utils.graph import load_sources
from test.utils.namespace import EGDO
from typing import Callable

import pytest

from rdflib.graph import ConjunctiveGraph, Dataset, Graph


@pytest.mark.parametrize("graph_factory", [Graph, ConjunctiveGraph, Dataset])
def test_load_into_default(graph_factory: Callable[[], Graph]) -> None:
"""
Evaluation of ``LOAD <source>`` into default graph works correctly.
"""
source_path = TEST_DATA_DIR / "variants" / "simple_triple.ttl"

expected_graph = graph_factory()
load_sources(source_path, graph=expected_graph)

actual_graph = graph_factory()
actual_graph.update(f"LOAD <{source_path.as_uri()}>")

if logging.getLogger().isEnabledFor(logging.DEBUG):
debug_format = (
"trig" if isinstance(expected_graph, ConjunctiveGraph) else "turtle"
)
logging.debug(
"expected_graph = \n%s", expected_graph.serialize(format=debug_format)
)
logging.debug(
"actual_graph = \n%s", actual_graph.serialize(format=debug_format)
)

if isinstance(expected_graph, ConjunctiveGraph):
assert isinstance(actual_graph, ConjunctiveGraph)
GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
else:
GraphHelper.assert_triple_sets_equals(expected_graph, actual_graph)


@pytest.mark.parametrize("graph_factory", [ConjunctiveGraph, Dataset])
def test_load_into_named(graph_factory: Callable[[], ConjunctiveGraph]) -> None:
"""
Evaluation of ``LOAD <source> INTO GRAPH <name>`` works correctly.
"""
source_path = TEST_DATA_DIR / "variants" / "simple_triple.ttl"

expected_graph = graph_factory()
load_sources(source_path, graph=expected_graph.get_context(EGDO.graph))

actual_graph = graph_factory()

actual_graph.update(f"LOAD <{source_path.as_uri()}> INTO GRAPH <{EGDO.graph}>")

if logging.getLogger().isEnabledFor(logging.DEBUG):
debug_format = "trig"
logging.debug(
"expected_graph = \n%s", expected_graph.serialize(format=debug_format)
)
logging.debug(
"actual_graph = \n%s", actual_graph.serialize(format=debug_format)
)

GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
35 changes: 34 additions & 1 deletion test/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Iterable,
List,
Optional,
Sequence,
Set,
Tuple,
Type,
Expand All @@ -35,7 +36,7 @@
from rdflib import BNode, ConjunctiveGraph, Graph
from rdflib.graph import Dataset
from rdflib.plugin import Plugin
from rdflib.term import Identifier, Literal, Node, URIRef
from rdflib.term import IdentifiedNode, Identifier, Literal, Node, URIRef

PluginT = TypeVar("PluginT")

Expand Down Expand Up @@ -257,6 +258,23 @@ def assert_quad_sets_equals(
else:
assert lhs_set != rhs_set

@classmethod
def assert_collection_graphs_equal(
cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph
) -> None:
"""
Assert that all graphs in provides collections are equal,
comparing named graphs with identically named graphs.
"""
cls.assert_triple_sets_equals(lhs.default_context, rhs.default_context)
graph_names = cls.non_default_graph_names(lhs) | cls.non_default_graph_names(
rhs
)
for identifier in graph_names:
cls.assert_triple_sets_equals(
lhs.get_context(identifier), rhs.get_context(identifier)
)

@classmethod
def assert_sets_equals(
cls,
Expand Down Expand Up @@ -381,6 +399,21 @@ def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None:
if object.datatype in datatypes:
object._datatype = None

@classmethod
def non_default_graph_names(
cls, container: ConjunctiveGraph
) -> Set[IdentifiedNode]:
return set(context.identifier for context in container.contexts()) - {
container.default_context.identifier
}

@classmethod
def non_default_graphs(cls, container: ConjunctiveGraph) -> Sequence[Graph]:
result = []
for name in cls.non_default_graph_names(container):
result.append(container.get_context(name))
return result


def eq_(lhs, rhs, msg=None):
"""
Expand Down

0 comments on commit 6d14617

Please sign in to comment.