BREAKING CHANGE: Don't use publicID as the name for the default graph.

When parsing data into a `ConjunctiveGraph` or `Dataset`, the triples in the default graphs in the sources were loaded into a graph named `publicID`. This behaviour has been changed, and now the triples from the default graph in source RDF documents will be loaded into `ConjunctiveGraph.default_context` or `Dataset.default_context`. The `publicID` parameter to `ConjunctiveGraph.parse` and `Dataset.parse` constructors will now only be used as the base URI for relative URI resolution.
RDFLib · May 25, 2023 · db7313a · db7313a
1 parent ad56044
commit db7313a
Show file tree

Hide file tree

Showing 26 changed files with 448 additions and 167 deletions.
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ Help with maintenance of all of the RDFLib family of packages is always welcome
 
 ## Versions & Releases
 
-* `6.4.0a0` current `main` branch
+* `7.0.0a0` current `main` branch
 * `6.x.y` current release and support Python 3.7+ only. Many improvements over 5.0.0
     * see [Releases](https://github.com/RDFLib/rdflib/releases)
 * `5.x.y` supports Python 2.7 and 3.4+ and is [mostly backwards compatible with 4.2.2](https://rdflib.readthedocs.io/en/stable/upgrade4to5.html).

diff --git a/docs/conf.py b/docs/conf.py
@@ -55,6 +55,7 @@
 
 # https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html
 autodoc_default_options = {"special-members": True}
+autodoc_inherit_docstrings = True
 
 # https://github.com/tox-dev/sphinx-autodoc-typehints
 always_document_param_types = True

diff --git a/docs/index.rst b/docs/index.rst
@@ -66,6 +66,7 @@ If you are familiar with RDF and are looking for details on how RDFLib handles i
    namespaces_and_bindings
    persistence
    merging
+   upgrade6to7
    upgrade5to6
    upgrade4to5
    security_considerations

diff --git a/docs/upgrade6to7.rst b/docs/upgrade6to7.rst
@@ -0,0 +1,45 @@
+.. _upgrade4to5: Upgrading from RDFLib version 6 to 7
+
+============================================
+Upgrading 6 to 7
+============================================
+
+New behaviour for ``publicID`` in ``parse`` methods.
+----------------------------------------------------
+
+Before version 7, the ``publicID`` argument to the
+:meth:`~rdflib.graph.ConjunctiveGraph.parse` and
+:meth:`~rdflib.graph.Dataset.parse` methods was used as the name for the default
+graph, and triples from the default graph in a source were loaded into the graph
+named ``publicID``.
+
+In version 7, the ``publicID`` argument is only used as the base URI for relative
+URI resolution as defined in `IETF RFC 3986
+<https://datatracker.ietf.org/doc/html/rfc3986#section-5.1.4>`_.
+
+To accommodate this change, ensure that use of the ``publicID`` argument is
+consistent with the new behaviour.
+
+If you want to load triples from a format that does not support named graphs
+into a named graph, use the following code:
+
+.. code-block:: python
+    
+    from rdflib import ConjunctiveGraph
+
+    cg = ConjunctiveGraph()
+    cg.get_context("example:graph_name").parse("http://example.com/source.trig", format="trig")
+
+If you want to move triples from the default graph into a named graph, use the
+following code:
+
+.. code-block:: python
+
+    from rdflib import ConjunctiveGraph
+
+    cg = ConjunctiveGraph()
+    cg.parse("http://example.com/source.trig", format="trig")
+    destination_graph = cg.get_context("example:graph_name")
+    for triple in cg.default_context.triples((None, None, None)):
+        destination_graph.add(triple)
+        cg.default_context.remove(triple)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "rdflib"
-version = "6.4.0a0"
+version = "7.0.0a0"
 description = """RDFLib is a Python library for working with RDF, \
 a simple yet powerful language for representing information."""
 authors = ["Daniel 'eikeon' Krech <eikeon@eikeon.com>"]

diff --git a/rdflib/graph.py b/rdflib/graph.py
@@ -1400,26 +1400,26 @@ def parse(
            :doc:`Security Considerations </security_considerations>`
            documentation.
 
-        :Parameters:
-
-          - ``source``: An InputSource, file-like object, or string. In the case
-            of a string the string is the location of the source.
-          - ``location``: A string indicating the relative or absolute URL of
-            the source. Graph's absolutize method is used if a relative location
+        :param source: An `InputSource`, file-like object, `Path` like object,
+            or string. In the case of a string the string is the location of the
+            source.
+        :param location: A string indicating the relative or absolute URL of the
+            source. `Graph`'s absolutize method is used if a relative location
             is specified.
-          - ``file``: A file-like object.
-          - ``data``: A string containing the data to be parsed.
-          - ``format``: Used if format can not be determined from source, e.g.
+        :param file: A file-like object.
+        :param data: A string containing the data to be parsed.
+        :param format: Used if format can not be determined from source, e.g.
             file extension or Media Type. Defaults to text/turtle. Format
             support can be extended with plugins, but "xml", "n3" (use for
             turtle), "nt" & "trix" are built in.
-          - ``publicID``: the logical URI to use as the document base. If None
+        :param publicID: the logical URI to use as the document base. If None
             specified the document location is used (at least in the case where
-            there is a document location).
-
-        :Returns:
-
-          - self, the graph instance.
+            there is a document location). This is used as the base URI when
+            resolving relative URIs in the source document, as defined in `IETF
+            RFC 3986
+            <https://datatracker.ietf.org/doc/html/rfc3986#section-5.1.4>`_,
+            given the source document does not define a base URI.
+        :return: ``self``, i.e. the :class:`~rdflib.graph.Graph` instance.
 
         Examples:
 
@@ -2206,15 +2206,18 @@ def parse(
         **args: Any,
     ) -> "Graph":
         """
-        Parse source adding the resulting triples to its own context
-        (sub graph of this graph).
+        Parse source adding the resulting triples to its own context (sub graph
+        of this graph).
 
         See :meth:`rdflib.graph.Graph.parse` for documentation on arguments.
 
+        If the source is in a format that does not support named graphs it's triples
+        will be added to the default graph (i.e. `Dataset.default_context`).
+
         :Returns:
 
-        The graph into which the source was parsed. In the case of n3
-        it returns the root context.
+        The graph into which the source was parsed. In the case of n3 it returns
+        the root context.
 
         .. caution::
 
@@ -2228,6 +2231,14 @@ def parse(
            For information on available security measures, see the RDFLib
            :doc:`Security Considerations </security_considerations>`
            documentation.
+
+        *Changed in 7.0*: The ``publicID`` argument is no longer used as the
+        identifier (i.e. name) of the default graph as was the case before
+        version 7.0. In the case of sources that do not support named graphs,
+        the ``publicID`` parameter will also not be used as the name for the
+        graph that the data is loaded into, and instead the triples from sources
+        that do not support named graphs will be loaded into the default graph
+        (i.e. `ConjunctionGraph.default_context`).
         """
 
         source = create_input_source(
@@ -2246,12 +2257,8 @@ def parse(
         # create_input_source will ensure that publicId is not None, though it
         # would be good if this guarantee was made more explicit i.e. by type
         # hint on InputSource (TODO/FIXME).
-        g_id: str = publicID and publicID or source.getPublicId()
-        if not isinstance(g_id, Node):
-            g_id = URIRef(g_id)
 
-        context = Graph(store=self.store, identifier=g_id)
-        context.remove((None, None, None))  # hmm ?
+        context = self.default_context
         context.parse(source, publicID=publicID, format=format, **args)
         # TODO: FIXME: This should not return context, but self.
         return context
@@ -2459,6 +2466,14 @@ def parse(
         **args: Any,
     ) -> "Graph":
         """
+        Parse an RDF source adding the resulting triples to the Graph.
+
+        See :meth:`rdflib.graph.Graph.parse` for documentation on arguments.
+
+        The source is specified using one of source, location, file or data.
+
+        If the source is in a format that does not support named graphs it's triples
+        will be added to the default graph (i.e. `Dataset.default_context`).
 
         .. caution::
 
@@ -2472,6 +2487,14 @@ def parse(
            For information on available security measures, see the RDFLib
            :doc:`Security Considerations </security_considerations>`
            documentation.
+
+        *Changed in 7.0*: The ``publicID`` argument is no longer used as the
+        identifier (i.e. name) of the default graph as was the case before
+        version 7.0. In the case of sources that do not support named graphs,
+        the ``publicID`` parameter will also not be used as the name for the
+        graph that the data is loaded into, and instead the triples from sources
+        that do not support named graphs will be loaded into the default graph
+        (i.e. `ConjunctionGraph.default_context`).
         """
 
         c = ConjunctiveGraph.parse(

diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py
@@ -312,6 +312,17 @@ def dataset(self) -> ConjunctiveGraph:
         return self._dataset
 
     def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None:
+        """
+        Load data from the source into the query context's.
+
+        :param source: The source to load from.
+        :param default: If `True`, triples from the source will be added to the
+            default graph, otherwise it will be loaded into a graph with
+            ``source`` URI as its name.
+        :param kwargs: Keyword arguments to pass to
+            :meth:`rdflib.graph.Graph.parse`.
+        """
+
         def _load(graph, source):
             try:
                 return graph.parse(source, format="turtle", **kwargs)
@@ -342,7 +353,7 @@ def _load(graph, source):
             if default:
                 _load(self.graph, source)
             else:
-                _load(self.dataset, source)
+                _load(self.dataset.get_context(source), source)
 
     def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
         # in SPARQL BNodes are just labels

diff --git a/test/data/variants/more_quads-asserts.json b/test/data/variants/more_quads-asserts.json
@@ -1,4 +1,4 @@
 {
-  "quad_count": 6,
+  "quad_count": 8,
   "exact_match": true
 }
diff --git a/test/data/variants/more_quads.jsonld b/test/data/variants/more_quads.jsonld
@@ -1,56 +1,65 @@
 {
-  "@graph": [
-    {
-      "@graph": [
+    "@graph": [
         {
-          "@id": "example:s20",
-          "example:p20": {
-            "@id": "example:o20"
-          }
+            "@id": "example:s10",
+            "example:p10": {
+                "@id": "example:o10"
+            }
         },
         {
-          "@id": "example:s21",
-          "example:p21": {
-            "@id": "example:o21"
-          }
+            "@id": "example:s01",
+            "example:p01": {
+                "@id": "example:o01"
+            }
+        },
+        {
+            "@id": "example:s00",
+            "example:p00": {
+                "@id": "example:o02"
+            }
+        },
+        {
+            "@id": "example:s11",
+            "example:p11": {
+                "@id": "example:o11"
+            }
+        },
+        {
+            "@id": "example:g3",
+            "@graph": [
+                {
+                    "@id": "example:s31",
+                    "example:p31": {
+                        "@id": "example:o31"
+                    }
+                },
+                {
+                    "@id": "example:s30",
+                    "example:p30": {
+                        "@id": "example:o30"
+                    }
+                }
+            ]
+        },
+        {
+            "@id": "example:g2",
+            "@graph": [
+                {
+                    "@id": "example:s21",
+                    "example:p21": {
+                        "@id": "example:o21"
+                    }
+                },
+                {
+                    "@id": "example:s20",
+                    "example:p20": {
+                        "@id": "example:o20"
+                    }
+                }
+            ]
         }
-      ],
-      "@id": "example:g2"
-    },
-    {
-      "@id": "example:s00",
-      "p00": "example:o02"
-    },
-    {
-      "@id": "example:s01",
-      "p01": "example:o01"
-    },
-    {
-      "@id": "example:s10",
-      "p10": "example:o10"
-    },
-    {
-      "@id": "example:s11",
-      "p11": "example:o11"
+    ],
+    "@context": {
+        "example": "http://example.org/"
     }
-  ],
-  "@context": {
-    "p10": {
-      "@id": "http://example.org/p10",
-      "@type": "@id"
-    },
-    "p01": {
-      "@id": "http://example.org/p01",
-      "@type": "@id"
-    },
-    "p00": {
-      "@id": "http://example.org/p00",
-      "@type": "@id"
-    },
-    "p11": {
-      "@id": "http://example.org/p11",
-      "@type": "@id"
-    },
-    "example": "http://example.org/"
-  }
 }
diff --git a/test/data/variants/more_quads.nq b/test/data/variants/more_quads.nq
@@ -1,6 +1,8 @@
-<http://example.org/s00> <http://example.org/p00> <http://example.org/o02> .
-<http://example.org/s01> <http://example.org/p01> <http://example.org/o01> .
 <http://example.org/s10> <http://example.org/p10> <http://example.org/o10> .
+<http://example.org/s01> <http://example.org/p01> <http://example.org/o01> .
+<http://example.org/s00> <http://example.org/p00> <http://example.org/o02> .
 <http://example.org/s11> <http://example.org/p11> <http://example.org/o11> .
-<http://example.org/s20> <http://example.org/p20> <http://example.org/o20> <http://example.org/g2> .
 <http://example.org/s21> <http://example.org/p21> <http://example.org/o21> <http://example.org/g2> .
+<http://example.org/s20> <http://example.org/p20> <http://example.org/o20> <http://example.org/g2> .
+<http://example.org/s31> <http://example.org/p31> <http://example.org/o31> <http://example.org/g3> .
+<http://example.org/s30> <http://example.org/p30> <http://example.org/o30> <http://example.org/g3> .
diff --git a/test/data/variants/more_quads.trig b/test/data/variants/more_quads.trig
@@ -13,3 +13,8 @@ example:g2 {
 	example:s20 example:p20 example:o20 .
 	example:s21 example:p21 example:o21 .
 }
+
+example:g3 {
+    example:s30 example:p30 example:o30 .
+    example:s31 example:p31 example:o31 .
+}
diff --git a/test/data/variants/simple_triple.n3 b/test/data/variants/simple_triple.n3
@@ -0,0 +1 @@
+<http://example.org/subject> <http://example.org/predicate> <http://example.org/object> .
diff --git a/test/data/variants/simple_triple.trig b/test/data/variants/simple_triple.trig
@@ -0,0 +1,2 @@
+<http://example.org/subject>
+        <http://example.org/predicate>  <http://example.org/object> .
diff --git a/test/test_conjunctivegraph/test_conjunctive_graph.py b/test/test_conjunctivegraph/test_conjunctive_graph.py
@@ -22,7 +22,7 @@ def test_bnode_publicid():
     b = BNode()
     data = "<d:d> <e:e> <f:f> ."
     print("Parsing %r into %r" % (data, b))
-    g.parse(data=data, format="turtle", publicID=b)
+    g.get_context(b).parse(data=data, format="turtle", publicID=b)
 
     triples = list(g.get_context(b).triples((None, None, None)))
     if not triples: