From 3721b7517fd90c469344315348baab8190ec100c Mon Sep 17 00:00:00 2001 From: Benjamin Cogrel Date: Thu, 12 Jun 2014 11:14:29 +0200 Subject: [PATCH 1/4] SPARQLStore.contexts() can now find empty named graphs (if supported by the SPARQL endpoint) --- rdflib/plugins/stores/sparqlstore.py | 26 +++++++++++++++----------- test/test_sparqlupdatestore.py | 10 ++++++++++ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index faffe36ef..5fec87f6c 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -407,24 +407,28 @@ def __len__(self, context=None): def contexts(self, triple=None): """ - Iterates over results to SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } } - returning instances of this store with the SPARQL wrapper - object updated via addNamedGraph(?NAME) + Iterates over results to "SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }" + or "SELECT ?NAME { GRAPH ?NAME {} }" if triple is `None`. + + Returns instances of this store with the SPARQL wrapper + object updated via addNamedGraph(?NAME). + This causes a named-graph-uri key / value pair to be sent over - the protocol + the protocol. + + Please note that some SPARQL endpoints are not able to find empty named + graphs. """ if triple: s, p, o = triple + params = ((s if s else Variable('s')).n3(), + (p if p else Variable('p')).n3(), + (o if o else Variable('o')).n3()) + self.setQuery('SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params) else: - s = p = o = None - - params = ((s if s else Variable('s')).n3(), - (p if p else Variable('p')).n3(), - (o if o else Variable('o')).n3()) + self.setQuery('SELECT ?name WHERE { GRAPH ?name {} }') - self.setQuery( - 'SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params) doc = ElementTree.parse(SPARQLWrapper.query(self).response) return (rt.get(Variable("name")) diff --git a/test/test_sparqlupdatestore.py b/test/test_sparqlupdatestore.py index eacad25ed..2d54d0cb6 100644 --- a/test/test_sparqlupdatestore.py +++ b/test/test_sparqlupdatestore.py @@ -258,6 +258,16 @@ def testNamedGraphUpdateWithInitBindings(self): 'only michel likes pizza' ) + def testEmptyNamedGraph(self): + empty_graph_iri = u"urn:empty-graph-1" + self.graph.update(u"CREATE GRAPH <%s>" % empty_graph_iri) + named_graphs = [unicode(r[0]) for r in self.graph.query( + "SELECT ?name WHERE { GRAPH ?name {} }")] + # Some SPARQL endpoint backends (like TDB) are not able to find empty named graphs + # (at least with this query) + if empty_graph_iri in named_graphs: + self.assertTrue(empty_graph_iri in [unicode(g.identifier) + for g in self.graph.contexts()]) from nose import SkipTest import urllib2 From 8411b45fcd9d572d9d984c440024e4c16ea3bbb6 Mon Sep 17 00:00:00 2001 From: Benjamin Cogrel Date: Thu, 12 Jun 2014 12:11:20 +0200 Subject: [PATCH 2/4] The named graph query was polluted by the non-reset default-graph-uri parameter --- rdflib/plugins/stores/sparqlstore.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 5fec87f6c..3d6ddcedb 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -419,6 +419,7 @@ def contexts(self, triple=None): Please note that some SPARQL endpoints are not able to find empty named graphs. """ + self.resetQuery() if triple: s, p, o = triple From f07b2a67c4851988a613ab3e21991cc427fd85cd Mon Sep 17 00:00:00 2001 From: Benjamin Cogrel Date: Thu, 12 Jun 2014 12:37:48 +0200 Subject: [PATCH 3/4] Basic graph awareness for the SPARQLStore (default graph treated as a named graph) --- rdflib/plugins/stores/sparqlstore.py | 20 ++++++++++++ test/test_dataset.py | 47 +++++++++++++++++++++------- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 3d6ddcedb..1e91ac0b4 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -185,6 +185,7 @@ class SPARQLStore(NSSPARQLWrapper, Store): """ formula_aware = False transaction_aware = False + graph_aware = True regex_matching = NATIVE_REGEX def __init__(self, @@ -198,6 +199,7 @@ def __init__(self, self.nsBindings = {} self.sparql11 = sparql11 self.context_aware = context_aware + self.graph_aware = context_aware # Database Management Methods def create(self, configuration): @@ -452,6 +454,12 @@ def namespaces(self): for prefix, ns in self.nsBindings.items(): yield prefix, ns + def add_graph(self, graph): + raise TypeError('The SPARQL store is read only') + + def remove_graph(self, graph): + raise TypeError('The SPARQL store is read only') + class SPARQLUpdateStore(SPARQLStore): """ @@ -804,3 +812,15 @@ def _insert_named_graph(self, query, query_graph): modified_query.append(query[pos:]) return "".join(modified_query) + + def add_graph(self, graph): + if not self.graph_aware: + Store.add_graph(self, graph) + else: + self.update("CREATE GRAPH <%s>" % graph.identifier) + + def remove_graph(self, graph): + if not self.graph_aware: + Store.remove_graph(self, graph) + else: + self.update("DROP GRAPH <%s>" % graph.identifier) diff --git a/test/test_dataset.py b/test/test_dataset.py index 0559364fb..88c449115 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -24,9 +24,14 @@ def setUp(self): if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') + elif self.store == "SPARQLUpdateStore": + root = "http://localhost:3030/ukpp/" + self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() - self.graph.open(self.tmppath, create=True) + + if self.store != "SPARQLUpdateStore": + self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') @@ -35,18 +40,25 @@ def setUp(self): self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') - self.c1 = URIRef(u'context-1') - self.c2 = URIRef(u'context-2') + # Use regular URIs because SPARQL endpoints like Fuseki alter short names + self.c1 = URIRef(u'urn:context-1') + self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) + for c in self.graph.contexts(): + c.remove((None, None, None)) + assert len(c) == 0 def tearDown(self): self.graph.close() - if os.path.isdir(self.tmppath): - shutil.rmtree(self.tmppath) + if self.store == "SPARQLUpdateStore": + pass else: - os.remove(self.tmppath) + if os.path.isdir(self.tmppath): + shutil.rmtree(self.tmppath) + else: + os.remove(self.tmppath) def testGraphAware(self): @@ -55,7 +67,6 @@ def testGraphAware(self): g = self.graph g1 = g.graph(self.c1) - # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) @@ -66,7 +77,7 @@ def testGraphAware(self): g1.add( (self.tarek, self.likes, self.pizza) ) # added graph still exists - self.assertEquals(set(x.identifier for x in self.graph.contexts()), + self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple @@ -87,7 +98,7 @@ def testGraphAware(self): self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) - def testDefaultGraph(self): + def testDefaultGraph(self): self.graph.add(( self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) @@ -103,7 +114,11 @@ def testDefaultGraph(self): self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) - def testNotUnion(self): + def testNotUnion(self): + # Depends on the SPARQL endpoint configuration + # not the SPARQLUpdateStore + if self.store == "SPARQLUpdateStore": + return g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) @@ -120,13 +135,23 @@ def testNotUnion(self): pluginname = sys.argv[1] tests = 0 + for s in plugin.plugins(pluginname, plugin.Store): if s.name in ('default', 'IOMemory', 'Auditable', - 'Concurrent', 'SPARQLStore', 'SPARQLUpdateStore'): + 'Concurrent', 'SPARQLStore'): continue # these are tested by default + if not s.getClass().graph_aware: continue + if s.name == "SPARQLUpdateStore": + import urllib2 + try: + assert len(urllib2.urlopen("http://localhost:3030/").read()) > 0 + except: + sys.stderr.write("No SPARQL endpoint for %s (tests skipped)\n" % s.name) + continue + locals()["t%d" % tests] = type("%sContextTestCase" % s.name, ( DatasetTestCase,), {"store": s.name}) tests += 1 From e1d115bc43ecf396c4a79e7a83a9b5ed4dcce9f0 Mon Sep 17 00:00:00 2001 From: Benjamin Cogrel Date: Thu, 12 Jun 2014 16:04:53 +0200 Subject: [PATCH 4/4] The dataset default graph is now the default graph of the SPARQL endpoint --- rdflib/plugins/stores/sparqlstore.py | 30 ++++++++++++----- test/test_dataset.py | 48 +++++++++++++++++----------- 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index 1e91ac0b4..757a13bc9 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -52,6 +52,7 @@ from rdflib.store import Store from rdflib.query import Result from rdflib import Variable, Namespace, BNode, URIRef, Literal +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID import httplib import urlparse @@ -277,7 +278,7 @@ def query(self, query, " ".join(initBindings[x].n3() for x in v)) self.resetQuery() - if self.context_aware and queryGraph and queryGraph != '__UNION__': + if self._is_contextual(queryGraph): self.addDefaultGraph(queryGraph) self.setQuery(query) @@ -370,7 +371,7 @@ def triples(self, (s, p, o), context=None): pass self.resetQuery() - if self.context_aware and context is not None: + if self._is_contextual(context): self.addDefaultGraph(context.identifier) self.setQuery(query) @@ -399,7 +400,7 @@ def __len__(self, context=None): else: self.resetQuery() q = "SELECT (count(*) as ?c) WHERE {?s ?p ?o .}" - if self.context_aware and context is not None: + if self._is_contextual(context): self.addDefaultGraph(context.identifier) self.setQuery(q) doc = ElementTree.parse(SPARQLWrapper.query(self).response) @@ -460,6 +461,17 @@ def add_graph(self, graph): def remove_graph(self, graph): raise TypeError('The SPARQL store is read only') + def _is_contextual(self, graph): + """ Returns `True` if the "GRAPH" keyword must appear + in the final SPARQL query sent to the endpoint. + """ + if (not self.context_aware) or (graph is None): + return False + if isinstance(graph, basestring): + return graph != '__UNION__' + else: + return graph.identifier != DATASET_DEFAULT_GRAPH_ID + class SPARQLUpdateStore(SPARQLStore): """ @@ -628,7 +640,7 @@ def add(self, spo, context=None, quoted=False): triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3()) - if self.context_aware and context is not None: + if self._is_contextual(context): q = "INSERT DATA { GRAPH %s { %s } }" % ( context.identifier.n3(), triple) else: @@ -678,7 +690,7 @@ def remove(self, spo, context): obj = Variable("O") triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3()) - if self.context_aware and context is not None: + if self._is_contextual(context): q = "DELETE { GRAPH %s { %s } } WHERE { GRAPH %s { %s } }" % ( context.identifier.n3(), triple, context.identifier.n3(), triple) @@ -740,7 +752,7 @@ def update(self, query, self.setNamespaceBindings(initNs) query = self.injectPrefixes(query) - if self.context_aware and queryGraph and queryGraph != '__UNION__': + if self._is_contextual(queryGraph): query = self._insert_named_graph(query, queryGraph) if initBindings: @@ -816,11 +828,13 @@ def _insert_named_graph(self, query, query_graph): def add_graph(self, graph): if not self.graph_aware: Store.add_graph(self, graph) - else: + elif graph.identifier != DATASET_DEFAULT_GRAPH_ID: self.update("CREATE GRAPH <%s>" % graph.identifier) def remove_graph(self, graph): if not self.graph_aware: Store.remove_graph(self, graph) + elif graph.identifier == DATASET_DEFAULT_GRAPH_ID: + self.update("DROP DEFAULT") else: - self.update("DROP GRAPH <%s>" % graph.identifier) + self.update("DROP GRAPH <%s>" % graph.identifier) \ No newline at end of file diff --git a/test/test_dataset.py b/test/test_dataset.py index 88c449115..8ead20c43 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -32,13 +32,13 @@ def setUp(self): if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) - self.michel = URIRef(u'michel') - self.tarek = URIRef(u'tarek') - self.bob = URIRef(u'bob') - self.likes = URIRef(u'likes') - self.hates = URIRef(u'hates') - self.pizza = URIRef(u'pizza') - self.cheese = URIRef(u'cheese') + self.michel = URIRef(u'urn:michel') + self.tarek = URIRef(u'urn:tarek') + self.bob = URIRef(u'urn:bob') + self.likes = URIRef(u'urn:likes') + self.hates = URIRef(u'urn:hates') + self.pizza = URIRef(u'urn:pizza') + self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') @@ -49,6 +49,7 @@ def setUp(self): for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 + self.graph.remove_graph(c) def tearDown(self): self.graph.close() @@ -61,15 +62,19 @@ def tearDown(self): os.remove(self.tmppath) - def testGraphAware(self): + def testGraphAware(self): + if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) - - # added graph exists - self.assertEquals(set(x.identifier for x in self.graph.contexts()), - set([self.c1, DATASET_DEFAULT_GRAPH_ID])) + + # Some SPARQL endpoint backends (e.g. TDB) do not consider + # empty named graphs + if self.store != "SPARQLUpdateStore": + # added graph exists + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) @@ -88,9 +93,12 @@ def testGraphAware(self): # added graph is empty self.assertEquals(len(g1), 0) - # graph still exists, although empty - self.assertEquals(set(x.identifier for x in self.graph.contexts()), - set([self.c1, DATASET_DEFAULT_GRAPH_ID])) + # Some SPARQL endpoint backends (e.g. TDB) do not consider + # empty named graphs + if self.store != "SPARQLUpdateStore": + # graph still exists, although empty + self.assertEquals(set(x.identifier for x in self.graph.contexts()), + set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) @@ -99,6 +107,10 @@ def testGraphAware(self): set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): + # Something the default graph is read-only (e.g. TDB in union mode) + if self.store == "SPARQLUpdateStore": + print "Please make sure updating the default graph " \ + "is supported by your SPARQL endpoint" self.graph.add(( self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) @@ -115,10 +127,10 @@ def testDefaultGraph(self): set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): - # Depends on the SPARQL endpoint configuration - # not the SPARQLUpdateStore + # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": - return + print "Please make sure your SPARQL endpoint has not configured " \ + "its default graph as the union of the named graphs" g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza))