Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPARQLStore graph awareness #402

Merged
merged 4 commits into from
Jul 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 56 additions & 17 deletions rdflib/plugins/stores/sparqlstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from rdflib.store import Store
from rdflib.query import Result
from rdflib import Variable, Namespace, BNode, URIRef, Literal
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID

import httplib
import urlparse
Expand Down Expand Up @@ -185,6 +186,7 @@ class SPARQLStore(NSSPARQLWrapper, Store):
"""
formula_aware = False
transaction_aware = False
graph_aware = True
regex_matching = NATIVE_REGEX

def __init__(self,
Expand All @@ -198,6 +200,7 @@ def __init__(self,
self.nsBindings = {}
self.sparql11 = sparql11
self.context_aware = context_aware
self.graph_aware = context_aware

# Database Management Methods
def create(self, configuration):
Expand Down Expand Up @@ -275,7 +278,7 @@ def query(self, query,
" ".join(initBindings[x].n3() for x in v))

self.resetQuery()
if self.context_aware and queryGraph and queryGraph != '__UNION__':
if self._is_contextual(queryGraph):
self.addDefaultGraph(queryGraph)
self.setQuery(query)

Expand Down Expand Up @@ -368,7 +371,7 @@ def triples(self, (s, p, o), context=None):
pass

self.resetQuery()
if self.context_aware and context is not None:
if self._is_contextual(context):
self.addDefaultGraph(context.identifier)
self.setQuery(query)

Expand Down Expand Up @@ -397,7 +400,7 @@ def __len__(self, context=None):
else:
self.resetQuery()
q = "SELECT (count(*) as ?c) WHERE {?s ?p ?o .}"
if self.context_aware and context is not None:
if self._is_contextual(context):
self.addDefaultGraph(context.identifier)
self.setQuery(q)
doc = ElementTree.parse(SPARQLWrapper.query(self).response)
Expand All @@ -407,24 +410,29 @@ def __len__(self, context=None):

def contexts(self, triple=None):
"""
Iterates over results to SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }
returning instances of this store with the SPARQL wrapper
object updated via addNamedGraph(?NAME)
Iterates over results to "SELECT ?NAME { GRAPH ?NAME { ?s ?p ?o } }"
or "SELECT ?NAME { GRAPH ?NAME {} }" if triple is `None`.

Returns instances of this store with the SPARQL wrapper
object updated via addNamedGraph(?NAME).

This causes a named-graph-uri key / value pair to be sent over
the protocol
the protocol.

Please note that some SPARQL endpoints are not able to find empty named
graphs.
"""
self.resetQuery()

if triple:
s, p, o = triple
params = ((s if s else Variable('s')).n3(),
(p if p else Variable('p')).n3(),
(o if o else Variable('o')).n3())
self.setQuery('SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params)
else:
s = p = o = None
self.setQuery('SELECT ?name WHERE { GRAPH ?name {} }')

params = ((s if s else Variable('s')).n3(),
(p if p else Variable('p')).n3(),
(o if o else Variable('o')).n3())

self.setQuery(
'SELECT ?name WHERE { GRAPH ?name { %s %s %s }}' % params)
doc = ElementTree.parse(SPARQLWrapper.query(self).response)

return (rt.get(Variable("name"))
Expand All @@ -447,6 +455,23 @@ def namespaces(self):
for prefix, ns in self.nsBindings.items():
yield prefix, ns

def add_graph(self, graph):
raise TypeError('The SPARQL store is read only')

def remove_graph(self, graph):
raise TypeError('The SPARQL store is read only')

def _is_contextual(self, graph):
""" Returns `True` if the "GRAPH" keyword must appear
in the final SPARQL query sent to the endpoint.
"""
if (not self.context_aware) or (graph is None):
return False
if isinstance(graph, basestring):
return graph != '__UNION__'
else:
return graph.identifier != DATASET_DEFAULT_GRAPH_ID


class SPARQLUpdateStore(SPARQLStore):
"""
Expand Down Expand Up @@ -615,7 +640,7 @@ def add(self, spo, context=None, quoted=False):


triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3())
if self.context_aware and context is not None:
if self._is_contextual(context):
q = "INSERT DATA { GRAPH %s { %s } }" % (
context.identifier.n3(), triple)
else:
Expand Down Expand Up @@ -665,7 +690,7 @@ def remove(self, spo, context):
obj = Variable("O")

triple = "%s %s %s ." % (subject.n3(), predicate.n3(), obj.n3())
if self.context_aware and context is not None:
if self._is_contextual(context):
q = "DELETE { GRAPH %s { %s } } WHERE { GRAPH %s { %s } }" % (
context.identifier.n3(), triple,
context.identifier.n3(), triple)
Expand Down Expand Up @@ -727,7 +752,7 @@ def update(self, query,
self.setNamespaceBindings(initNs)
query = self.injectPrefixes(query)

if self.context_aware and queryGraph and queryGraph != '__UNION__':
if self._is_contextual(queryGraph):
query = self._insert_named_graph(query, queryGraph)

if initBindings:
Expand Down Expand Up @@ -799,3 +824,17 @@ def _insert_named_graph(self, query, query_graph):
modified_query.append(query[pos:])

return "".join(modified_query)

def add_graph(self, graph):
if not self.graph_aware:
Store.add_graph(self, graph)
elif graph.identifier != DATASET_DEFAULT_GRAPH_ID:
self.update("CREATE GRAPH <%s>" % graph.identifier)

def remove_graph(self, graph):
if not self.graph_aware:
Store.remove_graph(self, graph)
elif graph.identifier == DATASET_DEFAULT_GRAPH_ID:
self.update("DROP DEFAULT")
else:
self.update("DROP GRAPH <%s>" % graph.identifier)
91 changes: 64 additions & 27 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,65 @@ def setUp(self):
if self.store == "SQLite":
_, self.tmppath = mkstemp(
prefix='test', dir='/tmp', suffix='.sqlite')
elif self.store == "SPARQLUpdateStore":
root = "http://localhost:3030/ukpp/"
self.graph.open((root + "sparql", root + "update"))
else:
self.tmppath = mkdtemp()
self.graph.open(self.tmppath, create=True)
self.michel = URIRef(u'michel')
self.tarek = URIRef(u'tarek')
self.bob = URIRef(u'bob')
self.likes = URIRef(u'likes')
self.hates = URIRef(u'hates')
self.pizza = URIRef(u'pizza')
self.cheese = URIRef(u'cheese')

self.c1 = URIRef(u'context-1')
self.c2 = URIRef(u'context-2')

if self.store != "SPARQLUpdateStore":
self.graph.open(self.tmppath, create=True)
self.michel = URIRef(u'urn:michel')
self.tarek = URIRef(u'urn:tarek')
self.bob = URIRef(u'urn:bob')
self.likes = URIRef(u'urn:likes')
self.hates = URIRef(u'urn:hates')
self.pizza = URIRef(u'urn:pizza')
self.cheese = URIRef(u'urn:cheese')

# Use regular URIs because SPARQL endpoints like Fuseki alter short names
self.c1 = URIRef(u'urn:context-1')
self.c2 = URIRef(u'urn:context-2')

# delete the graph for each test!
self.graph.remove((None, None, None))
for c in self.graph.contexts():
c.remove((None, None, None))
assert len(c) == 0
self.graph.remove_graph(c)

def tearDown(self):
self.graph.close()
if os.path.isdir(self.tmppath):
shutil.rmtree(self.tmppath)
if self.store == "SPARQLUpdateStore":
pass
else:
os.remove(self.tmppath)
if os.path.isdir(self.tmppath):
shutil.rmtree(self.tmppath)
else:
os.remove(self.tmppath)


def testGraphAware(self):
def testGraphAware(self):

if not self.graph.store.graph_aware: return

g = self.graph
g1 = g.graph(self.c1)


# added graph exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# Some SPARQL endpoint backends (e.g. TDB) do not consider
# empty named graphs
if self.store != "SPARQLUpdateStore":
# added graph exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# added graph is empty
self.assertEquals(len(g1), 0)

g1.add( (self.tarek, self.likes, self.pizza) )

# added graph still exists
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

# added graph contains one triple
Expand All @@ -77,17 +93,24 @@ def testGraphAware(self):
# added graph is empty
self.assertEquals(len(g1), 0)

# graph still exists, although empty
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))
# Some SPARQL endpoint backends (e.g. TDB) do not consider
# empty named graphs
if self.store != "SPARQLUpdateStore":
# graph still exists, although empty
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([self.c1, DATASET_DEFAULT_GRAPH_ID]))

g.remove_graph(self.c1)

# graph is gone
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

def testDefaultGraph(self):
def testDefaultGraph(self):
# Something the default graph is read-only (e.g. TDB in union mode)
if self.store == "SPARQLUpdateStore":
print "Please make sure updating the default graph " \
"is supported by your SPARQL endpoint"

self.graph.add(( self.tarek, self.likes, self.pizza))
self.assertEquals(len(self.graph), 1)
Expand All @@ -103,7 +126,11 @@ def testDefaultGraph(self):
self.assertEquals(set(x.identifier for x in self.graph.contexts()),
set([DATASET_DEFAULT_GRAPH_ID]))

def testNotUnion(self):
def testNotUnion(self):
# Union depends on the SPARQL endpoint configuration
if self.store == "SPARQLUpdateStore":
print "Please make sure your SPARQL endpoint has not configured " \
"its default graph as the union of the named graphs"
g1 = self.graph.graph(self.c1)
g1.add((self.tarek, self.likes, self.pizza))

Expand All @@ -120,13 +147,23 @@ def testNotUnion(self):
pluginname = sys.argv[1]

tests = 0

for s in plugin.plugins(pluginname, plugin.Store):
if s.name in ('default', 'IOMemory', 'Auditable',
'Concurrent', 'SPARQLStore', 'SPARQLUpdateStore'):
'Concurrent', 'SPARQLStore'):
continue # these are tested by default

if not s.getClass().graph_aware:
continue

if s.name == "SPARQLUpdateStore":
import urllib2
try:
assert len(urllib2.urlopen("http://localhost:3030/").read()) > 0
except:
sys.stderr.write("No SPARQL endpoint for %s (tests skipped)\n" % s.name)
continue

locals()["t%d" % tests] = type("%sContextTestCase" % s.name, (
DatasetTestCase,), {"store": s.name})
tests += 1
Expand Down
10 changes: 10 additions & 0 deletions test/test_sparqlupdatestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,16 @@ def testNamedGraphUpdateWithInitBindings(self):
'only michel likes pizza'
)

def testEmptyNamedGraph(self):
empty_graph_iri = u"urn:empty-graph-1"
self.graph.update(u"CREATE GRAPH <%s>" % empty_graph_iri)
named_graphs = [unicode(r[0]) for r in self.graph.query(
"SELECT ?name WHERE { GRAPH ?name {} }")]
# Some SPARQL endpoint backends (like TDB) are not able to find empty named graphs
# (at least with this query)
if empty_graph_iri in named_graphs:
self.assertTrue(empty_graph_iri in [unicode(g.identifier)
for g in self.graph.contexts()])

from nose import SkipTest
import urllib2
Expand Down