Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow distinct blank node contexts from one NTriples parser to the next (#980) #1107

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def find_version(filename):
exclude_trees = ["_build", "draft"]

# The reST default role (used for this markup: `text`) to use for all documents.
# default_role = None
default_role = 'py:obj'

# If true, '()' will be appended to :func: etc. cross-reference text.
add_function_parentheses = True
Expand Down
37 changes: 29 additions & 8 deletions rdflib/plugins/parsers/ntriples.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,43 @@ class NTriplesParser(object):

p = NTriplesParser(sink=MySink())
sink = p.parse(f) # file; use parsestring for a string

To define a context in which blank node identifiers refer to the same blank node
across instances of NTriplesParser, pass the same dict as `bnode_context` to each
instance. By default, a new blank node context is created for each instance of
`NTriplesParser`.
"""

_bnode_ids = {}
def __init__(self, sink=None, bnode_context=None):
if bnode_context is not None:
self._bnode_ids = bnode_context
else:
self._bnode_ids = {}

self._parse_bnode_ids = None

def __init__(self, sink=None):
if sink is not None:
self.sink = sink
else:
self.sink = Sink()

def parse(self, f):
"""Parse f as an N-Triples file."""
def parse(self, f, bnode_context=None):
"""
Parse f as an N-Triples file.

:param f: the N-Triples source
:param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``)
to `.BNode` instances. An empty dict can be passed in to
define a distinct context for a given call to `parse`.
"""
if not hasattr(f, "read"):
raise ParseError("Item to parse must be a file-like object.")

# since N-Triples 1.1 files can and should be utf-8 encoded
f = codecs.getreader("utf-8")(f)

self.file = f
self.buffer = ""
self._parse_bnode_ids = bnode_context
while True:
self.line = self.readline()
if self.line is None:
Expand All @@ -150,14 +167,14 @@ def parse(self, f):
raise ParseError("Invalid line: %r" % self.line)
return self.sink

def parsestring(self, s):
def parsestring(self, s, **kwargs):
"""Parse s as an N-Triples string."""
if not isinstance(s, str):
raise ParseError("Item to parse must be a string instance.")
f = BytesIO()
f.write(cast_bytes(s))
f.seek(0)
self.parse(f)
self.parse(f, **kwargs)

def readline(self):
"""Read an N-Triples line from buffered input."""
Expand Down Expand Up @@ -243,8 +260,12 @@ def uriref(self):
def nodeid(self):
if self.peek("_"):
# Fix for https://github.com/RDFLib/rdflib/issues/204
if self._parse_bnode_ids is not None:
bnode_ids = self._parse_bnode_ids
else:
bnode_ids = self._bnode_ids
bnode_id = self.eat(r_nodeid).group(1)
new_id = self._bnode_ids.get(bnode_id, None)
new_id = bnode_ids.get(bnode_id, None)
if new_id is not None:
# Re-map to id specfic to this doc
return bNode(new_id)
Expand Down
67 changes: 67 additions & 0 deletions test/test_nt_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,72 @@ def test_cover_subjectobjectliteral(self):
# self.assertRaises(ntriples.ParseError, p.literal)


class BNodeContextTestCase(unittest.TestCase):
def test_bnode_shared_across_instances(self):
my_sink = FakeSink()
bnode_context = dict()
p = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')

q = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')

self.assertEqual(len(my_sink.subs), 1)

def test_bnode_distinct_across_instances(self):
my_sink = FakeSink()
p = ntriples.NTriplesParser(my_sink)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')

q = ntriples.NTriplesParser(my_sink)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')

self.assertEqual(len(my_sink.subs), 2)

def test_bnode_distinct_across_parse(self):
my_sink = FakeSink()
p = ntriples.NTriplesParser(my_sink)

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''', bnode_context=dict())

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''', bnode_context=dict())

self.assertEqual(len(my_sink.subs), 2)

def test_bnode_shared_across_parse(self):
my_sink = FakeSink()
p = ntriples.NTriplesParser(my_sink)

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')

self.assertEqual(len(my_sink.subs), 1)


class FakeSink(object):
def __init__(self):
self.subs = set()

def triple(self, s, p, o):
self.subs.add(s)


if __name__ == "__main__":
unittest.main()