From db7313aa2612c415553549ec2efa813d27408d5f Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Tue, 23 May 2023 20:19:47 +0000 Subject: [PATCH] BREAKING CHANGE: Don't use `publicID` as the name for the default graph. When parsing data into a `ConjunctiveGraph` or `Dataset`, the triples in the default graphs in the sources were loaded into a graph named `publicID`. This behaviour has been changed, and now the triples from the default graph in source RDF documents will be loaded into `ConjunctiveGraph.default_context` or `Dataset.default_context`. The `publicID` parameter to `ConjunctiveGraph.parse` and `Dataset.parse` constructors will now only be used as the base URI for relative URI resolution. --- README.md | 2 +- docs/conf.py | 1 + docs/index.rst | 1 + docs/upgrade6to7.rst | 45 ++++++ pyproject.toml | 2 +- rdflib/graph.py | 71 +++++--- rdflib/plugins/sparql/sparql.py | 13 +- test/data/variants/more_quads-asserts.json | 2 +- test/data/variants/more_quads.jsonld | 107 ++++++------ test/data/variants/more_quads.nq | 8 +- test/data/variants/more_quads.trig | 5 + test/data/variants/simple_triple.n3 | 1 + test/data/variants/simple_triple.trig | 2 + .../test_conjunctive_graph.py | 2 +- .../test_dataset_default_graph.py | 152 ++++++++++++++++++ test/test_graph/test_variants.py | 13 +- test/test_issues/test_issue535.py | 2 +- test/test_store/test_store_berkeleydb.py | 37 ++++- test/test_trig.py | 9 -- test/test_w3c_spec/test_sparql10_w3c.py | 7 +- test/test_w3c_spec/test_sparql11_w3c.py | 7 +- test/test_w3c_spec/test_sparql_rdflib.py | 7 +- test/utils/__init__.py | 4 + test/utils/iri.py | 30 ++++ test/utils/sparql_checker.py | 55 ++----- test/utils/test/test_testutils.py | 30 ++-- 26 files changed, 448 insertions(+), 167 deletions(-) create mode 100644 docs/upgrade6to7.rst create mode 100644 test/data/variants/simple_triple.n3 create mode 100644 test/data/variants/simple_triple.trig create mode 100644 test/test_dataset/test_dataset_default_graph.py diff --git a/README.md b/README.md index 6157ef8ef..b2a92d16a 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Help with maintenance of all of the RDFLib family of packages is always welcome ## Versions & Releases -* `6.4.0a0` current `main` branch +* `7.0.0a0` current `main` branch * `6.x.y` current release and support Python 3.7+ only. Many improvements over 5.0.0 * see [Releases](https://github.com/RDFLib/rdflib/releases) * `5.x.y` supports Python 2.7 and 3.4+ and is [mostly backwards compatible with 4.2.2](https://rdflib.readthedocs.io/en/stable/upgrade4to5.html). diff --git a/docs/conf.py b/docs/conf.py index 1e2b7ef46..163947dce 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -55,6 +55,7 @@ # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html autodoc_default_options = {"special-members": True} +autodoc_inherit_docstrings = True # https://github.com/tox-dev/sphinx-autodoc-typehints always_document_param_types = True diff --git a/docs/index.rst b/docs/index.rst index e36962ea0..352ce00ef 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -66,6 +66,7 @@ If you are familiar with RDF and are looking for details on how RDFLib handles i namespaces_and_bindings persistence merging + upgrade6to7 upgrade5to6 upgrade4to5 security_considerations diff --git a/docs/upgrade6to7.rst b/docs/upgrade6to7.rst new file mode 100644 index 000000000..da3e4852f --- /dev/null +++ b/docs/upgrade6to7.rst @@ -0,0 +1,45 @@ +.. _upgrade4to5: Upgrading from RDFLib version 6 to 7 + +============================================ +Upgrading 6 to 7 +============================================ + +New behaviour for ``publicID`` in ``parse`` methods. +---------------------------------------------------- + +Before version 7, the ``publicID`` argument to the +:meth:`~rdflib.graph.ConjunctiveGraph.parse` and +:meth:`~rdflib.graph.Dataset.parse` methods was used as the name for the default +graph, and triples from the default graph in a source were loaded into the graph +named ``publicID``. + +In version 7, the ``publicID`` argument is only used as the base URI for relative +URI resolution as defined in `IETF RFC 3986 +`_. + +To accommodate this change, ensure that use of the ``publicID`` argument is +consistent with the new behaviour. + +If you want to load triples from a format that does not support named graphs +into a named graph, use the following code: + +.. code-block:: python + + from rdflib import ConjunctiveGraph + + cg = ConjunctiveGraph() + cg.get_context("example:graph_name").parse("http://example.com/source.trig", format="trig") + +If you want to move triples from the default graph into a named graph, use the +following code: + +.. code-block:: python + + from rdflib import ConjunctiveGraph + + cg = ConjunctiveGraph() + cg.parse("http://example.com/source.trig", format="trig") + destination_graph = cg.get_context("example:graph_name") + for triple in cg.default_context.triples((None, None, None)): + destination_graph.add(triple) + cg.default_context.remove(triple) diff --git a/pyproject.toml b/pyproject.toml index 5dee7655b..4d37e57c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rdflib" -version = "6.4.0a0" +version = "7.0.0a0" description = """RDFLib is a Python library for working with RDF, \ a simple yet powerful language for representing information.""" authors = ["Daniel 'eikeon' Krech "] diff --git a/rdflib/graph.py b/rdflib/graph.py index 6e2e50aff..4d8645b2f 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -1400,26 +1400,26 @@ def parse( :doc:`Security Considerations ` documentation. - :Parameters: - - - ``source``: An InputSource, file-like object, or string. In the case - of a string the string is the location of the source. - - ``location``: A string indicating the relative or absolute URL of - the source. Graph's absolutize method is used if a relative location + :param source: An `InputSource`, file-like object, `Path` like object, + or string. In the case of a string the string is the location of the + source. + :param location: A string indicating the relative or absolute URL of the + source. `Graph`'s absolutize method is used if a relative location is specified. - - ``file``: A file-like object. - - ``data``: A string containing the data to be parsed. - - ``format``: Used if format can not be determined from source, e.g. + :param file: A file-like object. + :param data: A string containing the data to be parsed. + :param format: Used if format can not be determined from source, e.g. file extension or Media Type. Defaults to text/turtle. Format support can be extended with plugins, but "xml", "n3" (use for turtle), "nt" & "trix" are built in. - - ``publicID``: the logical URI to use as the document base. If None + :param publicID: the logical URI to use as the document base. If None specified the document location is used (at least in the case where - there is a document location). - - :Returns: - - - self, the graph instance. + there is a document location). This is used as the base URI when + resolving relative URIs in the source document, as defined in `IETF + RFC 3986 + `_, + given the source document does not define a base URI. + :return: ``self``, i.e. the :class:`~rdflib.graph.Graph` instance. Examples: @@ -2206,15 +2206,18 @@ def parse( **args: Any, ) -> "Graph": """ - Parse source adding the resulting triples to its own context - (sub graph of this graph). + Parse source adding the resulting triples to its own context (sub graph + of this graph). See :meth:`rdflib.graph.Graph.parse` for documentation on arguments. + If the source is in a format that does not support named graphs it's triples + will be added to the default graph (i.e. `Dataset.default_context`). + :Returns: - The graph into which the source was parsed. In the case of n3 - it returns the root context. + The graph into which the source was parsed. In the case of n3 it returns + the root context. .. caution:: @@ -2228,6 +2231,14 @@ def parse( For information on available security measures, see the RDFLib :doc:`Security Considerations ` documentation. + + *Changed in 7.0*: The ``publicID`` argument is no longer used as the + identifier (i.e. name) of the default graph as was the case before + version 7.0. In the case of sources that do not support named graphs, + the ``publicID`` parameter will also not be used as the name for the + graph that the data is loaded into, and instead the triples from sources + that do not support named graphs will be loaded into the default graph + (i.e. `ConjunctionGraph.default_context`). """ source = create_input_source( @@ -2246,12 +2257,8 @@ def parse( # create_input_source will ensure that publicId is not None, though it # would be good if this guarantee was made more explicit i.e. by type # hint on InputSource (TODO/FIXME). - g_id: str = publicID and publicID or source.getPublicId() - if not isinstance(g_id, Node): - g_id = URIRef(g_id) - context = Graph(store=self.store, identifier=g_id) - context.remove((None, None, None)) # hmm ? + context = self.default_context context.parse(source, publicID=publicID, format=format, **args) # TODO: FIXME: This should not return context, but self. return context @@ -2459,6 +2466,14 @@ def parse( **args: Any, ) -> "Graph": """ + Parse an RDF source adding the resulting triples to the Graph. + + See :meth:`rdflib.graph.Graph.parse` for documentation on arguments. + + The source is specified using one of source, location, file or data. + + If the source is in a format that does not support named graphs it's triples + will be added to the default graph (i.e. `Dataset.default_context`). .. caution:: @@ -2472,6 +2487,14 @@ def parse( For information on available security measures, see the RDFLib :doc:`Security Considerations ` documentation. + + *Changed in 7.0*: The ``publicID`` argument is no longer used as the + identifier (i.e. name) of the default graph as was the case before + version 7.0. In the case of sources that do not support named graphs, + the ``publicID`` parameter will also not be used as the name for the + graph that the data is loaded into, and instead the triples from sources + that do not support named graphs will be loaded into the default graph + (i.e. `ConjunctionGraph.default_context`). """ c = ConjunctiveGraph.parse( diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py index 64230a645..7bfe28284 100644 --- a/rdflib/plugins/sparql/sparql.py +++ b/rdflib/plugins/sparql/sparql.py @@ -312,6 +312,17 @@ def dataset(self) -> ConjunctiveGraph: return self._dataset def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None: + """ + Load data from the source into the query context's. + + :param source: The source to load from. + :param default: If `True`, triples from the source will be added to the + default graph, otherwise it will be loaded into a graph with + ``source`` URI as its name. + :param kwargs: Keyword arguments to pass to + :meth:`rdflib.graph.Graph.parse`. + """ + def _load(graph, source): try: return graph.parse(source, format="turtle", **kwargs) @@ -342,7 +353,7 @@ def _load(graph, source): if default: _load(self.graph, source) else: - _load(self.dataset, source) + _load(self.dataset.get_context(source), source) def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]: # in SPARQL BNodes are just labels diff --git a/test/data/variants/more_quads-asserts.json b/test/data/variants/more_quads-asserts.json index 83ae1a8af..bd0224160 100644 --- a/test/data/variants/more_quads-asserts.json +++ b/test/data/variants/more_quads-asserts.json @@ -1,4 +1,4 @@ { - "quad_count": 6, + "quad_count": 8, "exact_match": true } diff --git a/test/data/variants/more_quads.jsonld b/test/data/variants/more_quads.jsonld index 08d6c9360..305497123 100644 --- a/test/data/variants/more_quads.jsonld +++ b/test/data/variants/more_quads.jsonld @@ -1,56 +1,65 @@ { - "@graph": [ - { - "@graph": [ + "@graph": [ { - "@id": "example:s20", - "example:p20": { - "@id": "example:o20" - } + "@id": "example:s10", + "example:p10": { + "@id": "example:o10" + } }, { - "@id": "example:s21", - "example:p21": { - "@id": "example:o21" - } + "@id": "example:s01", + "example:p01": { + "@id": "example:o01" + } + }, + { + "@id": "example:s00", + "example:p00": { + "@id": "example:o02" + } + }, + { + "@id": "example:s11", + "example:p11": { + "@id": "example:o11" + } + }, + { + "@id": "example:g3", + "@graph": [ + { + "@id": "example:s31", + "example:p31": { + "@id": "example:o31" + } + }, + { + "@id": "example:s30", + "example:p30": { + "@id": "example:o30" + } + } + ] + }, + { + "@id": "example:g2", + "@graph": [ + { + "@id": "example:s21", + "example:p21": { + "@id": "example:o21" + } + }, + { + "@id": "example:s20", + "example:p20": { + "@id": "example:o20" + } + } + ] } - ], - "@id": "example:g2" - }, - { - "@id": "example:s00", - "p00": "example:o02" - }, - { - "@id": "example:s01", - "p01": "example:o01" - }, - { - "@id": "example:s10", - "p10": "example:o10" - }, - { - "@id": "example:s11", - "p11": "example:o11" + ], + "@context": { + "example": "http://example.org/" } - ], - "@context": { - "p10": { - "@id": "http://example.org/p10", - "@type": "@id" - }, - "p01": { - "@id": "http://example.org/p01", - "@type": "@id" - }, - "p00": { - "@id": "http://example.org/p00", - "@type": "@id" - }, - "p11": { - "@id": "http://example.org/p11", - "@type": "@id" - }, - "example": "http://example.org/" - } } diff --git a/test/data/variants/more_quads.nq b/test/data/variants/more_quads.nq index 64b6ccf33..49ed7b49e 100644 --- a/test/data/variants/more_quads.nq +++ b/test/data/variants/more_quads.nq @@ -1,6 +1,8 @@ - . - . . + . + . . - . . + . + . + . diff --git a/test/data/variants/more_quads.trig b/test/data/variants/more_quads.trig index ddbf7020e..13d534d68 100644 --- a/test/data/variants/more_quads.trig +++ b/test/data/variants/more_quads.trig @@ -13,3 +13,8 @@ example:g2 { example:s20 example:p20 example:o20 . example:s21 example:p21 example:o21 . } + +example:g3 { + example:s30 example:p30 example:o30 . + example:s31 example:p31 example:o31 . +} diff --git a/test/data/variants/simple_triple.n3 b/test/data/variants/simple_triple.n3 new file mode 100644 index 000000000..0529c7857 --- /dev/null +++ b/test/data/variants/simple_triple.n3 @@ -0,0 +1 @@ + . diff --git a/test/data/variants/simple_triple.trig b/test/data/variants/simple_triple.trig new file mode 100644 index 000000000..e5ec98502 --- /dev/null +++ b/test/data/variants/simple_triple.trig @@ -0,0 +1,2 @@ + + . diff --git a/test/test_conjunctivegraph/test_conjunctive_graph.py b/test/test_conjunctivegraph/test_conjunctive_graph.py index 54393ac34..bbaedcdee 100644 --- a/test/test_conjunctivegraph/test_conjunctive_graph.py +++ b/test/test_conjunctivegraph/test_conjunctive_graph.py @@ -22,7 +22,7 @@ def test_bnode_publicid(): b = BNode() data = " ." print("Parsing %r into %r" % (data, b)) - g.parse(data=data, format="turtle", publicID=b) + g.get_context(b).parse(data=data, format="turtle", publicID=b) triples = list(g.get_context(b).triples((None, None, None))) if not triples: diff --git a/test/test_dataset/test_dataset_default_graph.py b/test/test_dataset/test_dataset_default_graph.py new file mode 100644 index 000000000..fb219770c --- /dev/null +++ b/test/test_dataset/test_dataset_default_graph.py @@ -0,0 +1,152 @@ +import itertools +import logging +from test.data import TEST_DATA_DIR +from typing import Iterable, Type, Union + +import pytest +from _pytest.mark.structures import ParameterSet + +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, ConjunctiveGraph, Dataset +from rdflib.term import BNode, URIRef + + +def make_load_default_and_named() -> Iterable[ParameterSet]: + for container_type, file_extension in itertools.product( + (Dataset, ConjunctiveGraph), ("trig", "nq", "jsonld") + ): + yield pytest.param( + container_type, + file_extension, + id=f"{container_type.__name__}-{file_extension}", + ) + + +EXTENSION_FORMATS = { + "trig": "trig", + "nq": "nquads", + "jsonld": "json-ld", + "nt": "ntriples", + "ttl": "turtle", + "hext": "hext", + "n3": "n3", +} + + +@pytest.mark.parametrize( + ["container_type", "file_extension"], make_load_default_and_named() +) +def test_load_default_and_named( + container_type: Union[Type[Dataset], Type[ConjunctiveGraph]], file_extension: str +) -> None: + logging.debug("container_type = %s", container_type) + container = container_type() + + if container_type is Dataset: + # An empty dataset has 1 default graph and no named graphs, so 1 graph in + # total. + assert 1 == sum(1 for _ in container.contexts()) + assert DATASET_DEFAULT_GRAPH_ID == next( + (context.identifier for context in container.contexts()), None + ) + assert container.default_context == next(container.contexts(), None) + else: + assert isinstance(container.default_context.identifier, BNode) + + # Load an RDF document with triples in three graphs into the container. + format = EXTENSION_FORMATS[file_extension] + source = TEST_DATA_DIR / "variants" / f"more_quads.{file_extension}" + container.parse(source=source, format=format) + + context_identifiers = set(context.identifier for context in container.contexts()) + + logging.info("context_identifiers = %s", context_identifiers) + logging.info( + "container.default_context.triples(...) = %s", + set(container.default_context.triples((None, None, None))), + ) + + all_contexts = set(container.contexts()) + logging.info( + "all_contexts = %s", set(context.identifier for context in all_contexts) + ) + + non_default_contexts = set(container.contexts()) - {container.default_context} + # There should now be two graphs in the container that are not the default graph. + logging.info( + "non_default_graphs = %s", + set(context.identifier for context in non_default_contexts), + ) + assert 2 == len(non_default_contexts) + + # The identifiers of the the non-default graphs should be the ones from the document. + assert { + URIRef("http://example.org/g2"), + URIRef("http://example.org/g3"), + } == set(context.identifier for context in non_default_contexts) + + # The default graph should have 4 triples. + assert 4 == len(container.default_context) + + +def make_load_default_only_cases() -> Iterable[ParameterSet]: + for container_type, file_extension in itertools.product( + (Dataset, ConjunctiveGraph), ("trig", "ttl", "nq", "nt", "jsonld", "hext", "n3") + ): + yield pytest.param( + container_type, + file_extension, + id=f"{container_type.__name__}-{file_extension}", + ) + + +@pytest.mark.parametrize( + ["container_type", "file_extension"], make_load_default_only_cases() +) +def test_load_default_only( + container_type: Union[Type[Dataset], Type[ConjunctiveGraph]], file_extension: str +) -> None: + logging.debug("container_type = %s", container_type) + container = container_type() + + if container_type is Dataset: + # An empty dataset has 1 default graph and no named graphs, so 1 graph in + # total. + assert 1 == sum(1 for _ in container.contexts()) + assert DATASET_DEFAULT_GRAPH_ID == next( + (context.identifier for context in container.contexts()), None + ) + assert container.default_context == next(container.contexts(), None) + else: + assert isinstance(container.default_context.identifier, BNode) + + # Load an RDF document with only triples in the default graph into the container. + format = EXTENSION_FORMATS[file_extension] + source = TEST_DATA_DIR / "variants" / f"simple_triple.{file_extension}" + container.parse(source=source, format=format) + + context_identifiers = set(context.identifier for context in container.contexts()) + + logging.info("context_identifiers = %s", context_identifiers) + logging.info( + "container.default_context.triples(...) = %s", + set(container.default_context.triples((None, None, None))), + ) + + all_contexts = set(container.contexts()) + logging.info( + "all_contexts = %s", set(context.identifier for context in all_contexts) + ) + + non_default_contexts = set(container.contexts()) - {container.default_context} + # There should now be no graphs in the container that are not the default graph. + logging.info( + "non_default_graphs = %s", + set(context.identifier for context in non_default_contexts), + ) + assert 0 == len(non_default_contexts) + + # The identifiers of the the non-default graphs should be an empty set. + assert set() == set(context.identifier for context in non_default_contexts) + + # The default graph should have 3 triples. + assert 1 == len(container.default_context) diff --git a/test/test_graph/test_variants.py b/test/test_graph/test_variants.py index 3cf931c44..09b2a156d 100644 --- a/test/test_graph/test_variants.py +++ b/test/test_graph/test_variants.py @@ -27,7 +27,7 @@ import rdflib.compare import rdflib.util -from rdflib.graph import ConjunctiveGraph +from rdflib.graph import Dataset from rdflib.namespace import XSD from rdflib.term import URIRef from rdflib.util import guess_format @@ -52,9 +52,7 @@ class GraphAsserts: exact_match: bool = False has_subject_iris: Optional[List[str]] = None - def check( - self, first_graph: Optional[ConjunctiveGraph], graph: ConjunctiveGraph - ) -> None: + def check(self, first_graph: Optional[Dataset], graph: Dataset) -> None: """ if `first_graph` is `None` then this is the first check before any other graphs have been processed. @@ -223,7 +221,7 @@ def test_variants(graph_variant: GraphVariants) -> None: logging.debug("graph_variant = %s", graph_variant) public_id = URIRef(f"example:{graph_variant.key}") assert len(graph_variant.variants) > 0 - first_graph: Optional[ConjunctiveGraph] = None + first_graph: Optional[Dataset] = None first_path: Optional[Path] = None logging.debug("graph_variant.asserts = %s", graph_variant.asserts) @@ -231,7 +229,7 @@ def test_variants(graph_variant: GraphVariants) -> None: logging.debug("variant_path = %s", variant_path) format = guess_format(variant_path.name, fmap=SUFFIX_FORMAT_MAP) assert format is not None, f"could not determine format for {variant_path.name}" - graph = ConjunctiveGraph() + graph = Dataset() graph.parse(variant_path, format=format, publicID=public_id) # Stripping data types as different parsers (e.g. hext) have different # opinions of when a bare string is of datatype XSD.string or not. @@ -243,8 +241,9 @@ def test_variants(graph_variant: GraphVariants) -> None: first_path = variant_path else: assert first_path is not None - GraphHelper.assert_isomorphic( + GraphHelper.assert_cgraph_isomorphic( first_graph, graph, + False, f"checking {variant_path.relative_to(VARIANTS_DIR)} against {first_path.relative_to(VARIANTS_DIR)}", ) diff --git a/test/test_issues/test_issue535.py b/test/test_issues/test_issue535.py index de38404d7..dbb7113ae 100644 --- a/test/test_issues/test_issue535.py +++ b/test/test_issues/test_issue535.py @@ -16,4 +16,4 @@ def test_nquads_default_graph(): assert len(ds) == 3, len(g) assert len(list(ds.contexts())) == 2, len(list(ds.contexts())) - assert len(ds.get_context(publicID)) == 2, len(ds.get_context(publicID)) + assert len(ds.default_context) == 2, len(ds.get_context(publicID)) diff --git a/test/test_store/test_store_berkeleydb.py b/test/test_store/test_store_berkeleydb.py index 0223fbad0..a0edecc54 100644 --- a/test/test_store/test_store_berkeleydb.py +++ b/test/test_store/test_store_berkeleydb.py @@ -1,18 +1,23 @@ +import logging import tempfile +from typing import Iterable, Optional, Tuple import pytest from rdflib import ConjunctiveGraph, URIRef from rdflib.plugins.stores.berkeleydb import has_bsddb +from rdflib.query import ResultRow from rdflib.store import VALID_STORE +logger = logging.getLogger(__name__) + pytestmark = pytest.mark.skipif( not has_bsddb, reason="skipping berkeleydb tests, modile not available" ) @pytest.fixture -def get_graph(): +def get_graph() -> Iterable[Tuple[str, ConjunctiveGraph]]: path = tempfile.NamedTemporaryFile().name g = ConjunctiveGraph("BerkeleyDB") rt = g.open(path, create=True) @@ -35,7 +40,7 @@ def get_graph(): g.destroy(path) -def test_write(get_graph): +def test_write(get_graph: Tuple[str, ConjunctiveGraph]): path, g = get_graph assert ( len(g) == 3 @@ -60,7 +65,7 @@ def test_write(get_graph): ), "There must still be four triples in the graph after the third data chunk parse" -def test_read(get_graph): +def test_read(get_graph: Tuple[str, ConjunctiveGraph]): path, g = get_graph sx = None for s in g.subjects( @@ -71,7 +76,7 @@ def test_read(get_graph): assert sx == URIRef("https://example.org/d") -def test_sparql_query(get_graph): +def test_sparql_query(get_graph: Tuple[str, ConjunctiveGraph]): path, g = get_graph q = """ PREFIX : @@ -83,11 +88,12 @@ def test_sparql_query(get_graph): c = 0 for row in g.query(q): + assert isinstance(row, ResultRow) c = int(row.c) assert c == 2, "SPARQL COUNT must return 2" -def test_sparql_insert(get_graph): +def test_sparql_insert(get_graph: Tuple[str, ConjunctiveGraph]): path, g = get_graph q = """ PREFIX : @@ -100,8 +106,15 @@ def test_sparql_insert(get_graph): assert len(g) == 4, "After extra triple insert, length must be 4" -def test_multigraph(get_graph): +def test_multigraph(get_graph: Tuple[str, ConjunctiveGraph]): path, g = get_graph + + if logger.isEnabledFor(logging.DEBUG): + logging.debug( + "graph before = \n%s", + g.serialize(format="trig"), + ) + q = """ PREFIX : @@ -116,6 +129,12 @@ def test_multigraph(get_graph): g.update(q) + if logger.isEnabledFor(logging.DEBUG): + logging.debug( + "graph after = \n%s", + g.serialize(format="trig"), + ) + q = """ SELECT (COUNT(?g) AS ?c) WHERE { @@ -129,11 +148,13 @@ def test_multigraph(get_graph): """ c = 0 for row in g.query(q): + assert isinstance(row, ResultRow) c = int(row.c) - assert c == 3, "SPARQL COUNT must return 3 (default, :m & :n)" + assert c == 2, "SPARQL COUNT must return 2 (default, :m & :n)" -def test_open_shut(get_graph): +def test_open_shut(get_graph: Tuple[str, ConjunctiveGraph]): + g: Optional[ConjunctiveGraph] path, g = get_graph assert len(g) == 3, "Initially we must have 3 triples from setUp" g.close() diff --git a/test/test_trig.py b/test/test_trig.py index 49572e445..de5c2108f 100644 --- a/test/test_trig.py +++ b/test/test_trig.py @@ -1,7 +1,5 @@ import re -import pytest - import rdflib TRIPLE = ( @@ -125,13 +123,6 @@ def test_graph_parsing(): assert len(list(g.contexts())) == 2 -@pytest.mark.xfail( - raises=AssertionError, - reason=""" - This is failing because conjuncitve graph assigns things in the default graph to - a graph with a bnode as name. On every parse iteration a new BNode is generated - resulting in the default graph content appearing multipile times in the output.""", -) def test_round_trips(): data = """ . diff --git a/test/test_w3c_spec/test_sparql10_w3c.py b/test/test_w3c_spec/test_sparql10_w3c.py index 73d06d89b..70df2d066 100644 --- a/test/test_w3c_spec/test_sparql10_w3c.py +++ b/test/test_w3c_spec/test_sparql10_w3c.py @@ -1,6 +1,7 @@ """ Runs the SPARQL 1.0 test suite from. """ +from contextlib import ExitStack from test.data import TEST_DATA_DIR from test.utils import ensure_suffix from test.utils.dawg_manifest import MarksDictType, params_from_sources @@ -118,5 +119,7 @@ def configure_rdflib() -> Generator[None, None, None]: report_prefix="rdflib_w3c_sparql10", ), ) -def test_entry_sparql10(monkeypatch: MonkeyPatch, manifest_entry: SPARQLEntry) -> None: - check_entry(monkeypatch, manifest_entry) +def test_entry_sparql10( + monkeypatch: MonkeyPatch, exit_stack: ExitStack, manifest_entry: SPARQLEntry +) -> None: + check_entry(monkeypatch, exit_stack, manifest_entry) diff --git a/test/test_w3c_spec/test_sparql11_w3c.py b/test/test_w3c_spec/test_sparql11_w3c.py index 6bfcb31f1..2afcf910a 100644 --- a/test/test_w3c_spec/test_sparql11_w3c.py +++ b/test/test_w3c_spec/test_sparql11_w3c.py @@ -1,6 +1,7 @@ """ Runs the SPARQL 1.1 test suite from. """ +from contextlib import ExitStack from test.data import TEST_DATA_DIR from test.utils import ensure_suffix from test.utils.dawg_manifest import MarksDictType, params_from_sources @@ -259,5 +260,7 @@ def configure_rdflib() -> Generator[None, None, None]: report_prefix="rdflib_w3c_sparql11", ), ) -def test_entry_sparql11(monkeypatch: MonkeyPatch, manifest_entry: SPARQLEntry) -> None: - check_entry(monkeypatch, manifest_entry) +def test_entry_sparql11( + monkeypatch: MonkeyPatch, exit_stack: ExitStack, manifest_entry: SPARQLEntry +) -> None: + check_entry(monkeypatch, exit_stack, manifest_entry) diff --git a/test/test_w3c_spec/test_sparql_rdflib.py b/test/test_w3c_spec/test_sparql_rdflib.py index 2a278461a..73809109a 100644 --- a/test/test_w3c_spec/test_sparql_rdflib.py +++ b/test/test_w3c_spec/test_sparql_rdflib.py @@ -1,6 +1,7 @@ """ Runs the RDFLib SPARQL test suite. """ +from contextlib import ExitStack from test.data import TEST_DATA_DIR from test.utils import ensure_suffix from test.utils.dawg_manifest import MarksDictType, params_from_sources @@ -61,5 +62,7 @@ def configure_rdflib() -> Generator[None, None, None]: report_prefix="rdflib_sparql", ), ) -def test_entry_rdflib(monkeypatch: MonkeyPatch, manifest_entry: SPARQLEntry) -> None: - check_entry(monkeypatch, manifest_entry) +def test_entry_rdflib( + monkeypatch: MonkeyPatch, exit_stack: ExitStack, manifest_entry: SPARQLEntry +) -> None: + check_entry(monkeypatch, exit_stack, manifest_entry) diff --git a/test/utils/__init__.py b/test/utils/__init__.py index a5c40e3f8..dc27251a3 100644 --- a/test/utils/__init__.py +++ b/test/utils/__init__.py @@ -349,6 +349,10 @@ def get_contexts(cgraph: ConjunctiveGraph) -> Dict[URIRef, Graph]: else: raise AssertionError("BNode labelled graphs not supported") elif isinstance(context.identifier, URIRef): + if len(context) == 0: + # If a context has no triples it does not exist in a + # meaningful way. + continue result[context.identifier] = context else: raise AssertionError( diff --git a/test/utils/iri.py b/test/utils/iri.py index 24f114b2c..ad7419d59 100644 --- a/test/utils/iri.py +++ b/test/utils/iri.py @@ -2,12 +2,17 @@ Various utilities for working with IRIs and URIs. """ +import email.utils +import http.client import logging +import mimetypes from dataclasses import dataclass from pathlib import Path, PurePath, PurePosixPath, PureWindowsPath from test.utils import ensure_suffix from typing import Callable, Optional, Set, Tuple, Type, TypeVar, Union from urllib.parse import quote, unquote, urljoin, urlparse, urlsplit, urlunsplit +from urllib.request import BaseHandler, OpenerDirector, Request +from urllib.response import addinfourl from nturl2path import url2pathname as nt_url2pathname @@ -148,3 +153,28 @@ def from_mappings( value = URIMapping.from_tuple(value) result.add(value) return cls(result) + + def opener(self) -> OpenerDirector: + opener = OpenerDirector() + + opener.add_handler(URIMapperHTTPHandler(self)) + + return opener + + +class URIMapperHTTPHandler(BaseHandler): + def __init__(self, mapper: URIMapper): + self.mapper = mapper + + def http_open(self, req: Request) -> addinfourl: + url = req.get_full_url() + local_uri, local_path = self.mapper.to_local(url) + stats = local_path.stat() + size = stats.st_size + modified = email.utils.formatdate(stats.st_mtime, usegmt=True) + mtype = mimetypes.guess_type(f"{local_path}")[0] + headers = email.message_from_string( + "Content-type: %s\nContent-length: %d\nLast-modified: %s\n" + % (mtype or "text/plain", size, modified) + ) + return addinfourl(local_path.open("rb"), headers, url, http.client.OK) diff --git a/test/utils/sparql_checker.py b/test/utils/sparql_checker.py index 836c040fd..477c9d3c8 100644 --- a/test/utils/sparql_checker.py +++ b/test/utils/sparql_checker.py @@ -6,24 +6,13 @@ from contextlib import ExitStack, contextmanager from dataclasses import dataclass, field from io import BytesIO, StringIO -from pathlib import Path from test.utils import BNodeHandling, GraphHelper from test.utils.dawg_manifest import Manifest, ManifestEntry from test.utils.iri import URIMapper from test.utils.namespace import MF, QT, UT from test.utils.result import ResultType, assert_bindings_collections_equal -from typing import ( - Any, - Callable, - Dict, - Generator, - Optional, - Set, - Tuple, - Type, - Union, - cast, -) +from test.utils.urlopen import context_urlopener +from typing import Dict, Generator, Optional, Set, Tuple, Type, Union, cast from urllib.parse import urljoin import pytest @@ -36,7 +25,6 @@ from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate from rdflib.plugins.sparql.parser import parseQuery, parseUpdate from rdflib.plugins.sparql.results.rdfresults import RDFResultParser -from rdflib.plugins.sparql.sparql import QueryContext from rdflib.query import Result from rdflib.term import BNode, IdentifiedNode, Identifier, Literal, Node, URIRef from rdflib.util import guess_format @@ -131,7 +119,7 @@ def load_into(self, manifest: Manifest, dataset: Dataset) -> None: logging.debug( "public_id = %s - graph = %s\n%s", public_id, graph_path, graph_text ) - dataset.parse( + dataset.get_context(public_id).parse( # type error: Argument 1 to "guess_format" has incompatible type "Path"; expected "str" data=graph_text, publicID=public_id, @@ -351,33 +339,11 @@ def check_update(monkeypatch: MonkeyPatch, entry: SPARQLEntry) -> None: rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True -def patched_query_context_load(uri_mapper: URIMapper) -> Callable[..., Any]: - def _patched_load( - self: QueryContext, source: URIRef, default: bool = False, **kwargs - ) -> None: - public_id = None - use_source: Union[URIRef, Path] = source - # type error: Argument 1 to "guess_format" has incompatible type "Union[URIRef, Path]"; expected "str" - format = guess_format(use_source) # type: ignore[arg-type] - if f"{source}".startswith(("https://", "http://")): - use_source = uri_mapper.to_local_path(source) - public_id = source - if default: - assert self.graph is not None - self.graph.parse(use_source, format=format, publicID=public_id) - else: - self.dataset.parse(use_source, format=format, publicID=public_id) - - return _patched_load - - -def check_query(monkeypatch: MonkeyPatch, entry: SPARQLEntry) -> None: +def check_query(exit_stack: ExitStack, entry: SPARQLEntry) -> None: assert entry.query is not None assert isinstance(entry.result, URIRef) - monkeypatch.setattr( - QueryContext, "load", patched_query_context_load(entry.uri_mapper) - ) + exit_stack.enter_context(context_urlopener(entry.uri_mapper.opener())) query_text = entry.query_text() dataset = entry.action_dataset() @@ -400,6 +366,11 @@ def check_query(monkeypatch: MonkeyPatch, entry: SPARQLEntry) -> None: assert expected_result.type == result.type if result.type == ResultType.SELECT: + if logger.isEnabledFor(logging.DEBUG): + logging.debug( + "expected_result.bindings = \n%s", + pprint.pformat(expected_result.bindings, indent=2, width=80), + ) if logger.isEnabledFor(logging.DEBUG): logging.debug( "entry.result_cardinality = %s, result.bindings = \n%s", @@ -441,7 +412,9 @@ def check_query(monkeypatch: MonkeyPatch, entry: SPARQLEntry) -> None: } -def check_entry(monkeypatch: MonkeyPatch, entry: SPARQLEntry) -> None: +def check_entry( + monkeypatch: MonkeyPatch, exit_stack: ExitStack, entry: SPARQLEntry +) -> None: if logger.isEnabledFor(logging.DEBUG): logging.debug( "entry = \n%s", @@ -452,5 +425,5 @@ def check_entry(monkeypatch: MonkeyPatch, entry: SPARQLEntry) -> None: if entry.type_info.query_type is QueryType.UPDATE: return check_update(monkeypatch, entry) elif entry.type_info.query_type is QueryType.QUERY: - return check_query(monkeypatch, entry) + return check_query(exit_stack, entry) raise ValueError(f"unsupported test {entry.type}") diff --git a/test/utils/test/test_testutils.py b/test/utils/test/test_testutils.py index a624c4456..44a0292ec 100644 --- a/test/utils/test/test_testutils.py +++ b/test/utils/test/test_testutils.py @@ -288,21 +288,21 @@ def test_assert_sets_equal(test_case: SetsEqualTestCase): rhs_graph: Graph = Graph().parse(data=test_case.rhs, format=test_case.rhs_format) public_id = URIRef("example:graph") - lhs_cgraph: ConjunctiveGraph = ConjunctiveGraph() - lhs_cgraph.parse( + lhs_dataset: Dataset = Dataset() + lhs_dataset.parse( data=test_case.lhs, format=test_case.lhs_format, publicID=public_id ) - rhs_cgraph: ConjunctiveGraph = ConjunctiveGraph() - rhs_cgraph.parse( + rhs_dataset: Dataset = Dataset() + rhs_dataset.parse( data=test_case.rhs, format=test_case.rhs_format, publicID=public_id ) - assert isinstance(lhs_cgraph, ConjunctiveGraph) - assert isinstance(rhs_cgraph, ConjunctiveGraph) + assert isinstance(lhs_dataset, Dataset) + assert isinstance(rhs_dataset, Dataset) graph: Graph - cgraph: ConjunctiveGraph - for graph, cgraph in ((lhs_graph, lhs_cgraph), (rhs_graph, rhs_cgraph)): + cgraph: Dataset + for graph, cgraph in ((lhs_graph, lhs_dataset), (rhs_graph, rhs_dataset)): GraphHelper.assert_sets_equals(graph, graph, BNodeHandling.COLLAPSE) GraphHelper.assert_sets_equals(cgraph, cgraph, BNodeHandling.COLLAPSE) GraphHelper.assert_triple_sets_equals(graph, graph, BNodeHandling.COLLAPSE) @@ -316,7 +316,7 @@ def test_assert_sets_equal(test_case: SetsEqualTestCase): ) with pytest.raises(AssertionError): GraphHelper.assert_sets_equals( - lhs_cgraph, rhs_cgraph, test_case.bnode_handling + lhs_dataset, rhs_dataset, test_case.bnode_handling ) with pytest.raises(AssertionError): GraphHelper.assert_triple_sets_equals( @@ -324,23 +324,25 @@ def test_assert_sets_equal(test_case: SetsEqualTestCase): ) with pytest.raises(AssertionError): GraphHelper.assert_triple_sets_equals( - lhs_cgraph, rhs_cgraph, test_case.bnode_handling + lhs_dataset, rhs_dataset, test_case.bnode_handling ) with pytest.raises(AssertionError): GraphHelper.assert_quad_sets_equals( - lhs_cgraph, rhs_cgraph, test_case.bnode_handling + lhs_dataset, rhs_dataset, test_case.bnode_handling ) else: GraphHelper.assert_sets_equals(lhs_graph, rhs_graph, test_case.bnode_handling) - GraphHelper.assert_sets_equals(lhs_cgraph, rhs_cgraph, test_case.bnode_handling) + GraphHelper.assert_sets_equals( + lhs_dataset, rhs_dataset, test_case.bnode_handling + ) GraphHelper.assert_triple_sets_equals( lhs_graph, rhs_graph, test_case.bnode_handling ) GraphHelper.assert_triple_sets_equals( - lhs_cgraph, rhs_cgraph, test_case.bnode_handling + lhs_dataset, rhs_dataset, test_case.bnode_handling ) GraphHelper.assert_quad_sets_equals( - lhs_cgraph, rhs_cgraph, test_case.bnode_handling + lhs_dataset, rhs_dataset, test_case.bnode_handling )