Skip to content

Commit

Permalink
Merge pull request #1107 from mwatts15/feature/#980-ntriples-blank-no…
Browse files Browse the repository at this point in the history
…de-collisions

Allow distinct blank node contexts from one NTriples parser to the next (#980)
  • Loading branch information
nicholascar authored Jun 1, 2020
2 parents db4b66f + 92f66b3 commit 6b5bd37
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 9 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def find_version(filename):
exclude_trees = ["_build", "draft"]

# The reST default role (used for this markup: `text`) to use for all documents.
# default_role = None
default_role = 'py:obj'

# If true, '()' will be appended to :func: etc. cross-reference text.
add_function_parentheses = True
Expand Down
37 changes: 29 additions & 8 deletions rdflib/plugins/parsers/ntriples.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,26 +120,43 @@ class NTriplesParser(object):
p = NTriplesParser(sink=MySink())
sink = p.parse(f) # file; use parsestring for a string
To define a context in which blank node identifiers refer to the same blank node
across instances of NTriplesParser, pass the same dict as `bnode_context` to each
instance. By default, a new blank node context is created for each instance of
`NTriplesParser`.
"""

_bnode_ids = {}
def __init__(self, sink=None, bnode_context=None):
if bnode_context is not None:
self._bnode_ids = bnode_context
else:
self._bnode_ids = {}

self._parse_bnode_ids = None

def __init__(self, sink=None):
if sink is not None:
self.sink = sink
else:
self.sink = Sink()

def parse(self, f):
"""Parse f as an N-Triples file."""
def parse(self, f, bnode_context=None):
"""
Parse f as an N-Triples file.
:param f: the N-Triples source
:param bnode_context: a dict mapping blank node identifiers (e.g., ``a`` in ``_:a``)
to `.BNode` instances. An empty dict can be passed in to
define a distinct context for a given call to `parse`.
"""
if not hasattr(f, "read"):
raise ParseError("Item to parse must be a file-like object.")

# since N-Triples 1.1 files can and should be utf-8 encoded
f = codecs.getreader("utf-8")(f)

self.file = f
self.buffer = ""
self._parse_bnode_ids = bnode_context
while True:
self.line = self.readline()
if self.line is None:
Expand All @@ -150,14 +167,14 @@ def parse(self, f):
raise ParseError("Invalid line: %r" % self.line)
return self.sink

def parsestring(self, s):
def parsestring(self, s, **kwargs):
"""Parse s as an N-Triples string."""
if not isinstance(s, str):
raise ParseError("Item to parse must be a string instance.")
f = BytesIO()
f.write(cast_bytes(s))
f.seek(0)
self.parse(f)
self.parse(f, **kwargs)

def readline(self):
"""Read an N-Triples line from buffered input."""
Expand Down Expand Up @@ -243,8 +260,12 @@ def uriref(self):
def nodeid(self):
if self.peek("_"):
# Fix for https://github.com/RDFLib/rdflib/issues/204
if self._parse_bnode_ids is not None:
bnode_ids = self._parse_bnode_ids
else:
bnode_ids = self._bnode_ids
bnode_id = self.eat(r_nodeid).group(1)
new_id = self._bnode_ids.get(bnode_id, None)
new_id = bnode_ids.get(bnode_id, None)
if new_id is not None:
# Re-map to id specfic to this doc
return bNode(new_id)
Expand Down
67 changes: 67 additions & 0 deletions test/test_nt_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,72 @@ def test_cover_subjectobjectliteral(self):
# self.assertRaises(ntriples.ParseError, p.literal)


class BNodeContextTestCase(unittest.TestCase):
def test_bnode_shared_across_instances(self):
my_sink = FakeSink()
bnode_context = dict()
p = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')

q = ntriples.NTriplesParser(my_sink, bnode_context=bnode_context)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')

self.assertEqual(len(my_sink.subs), 1)

def test_bnode_distinct_across_instances(self):
my_sink = FakeSink()
p = ntriples.NTriplesParser(my_sink)
p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')

q = ntriples.NTriplesParser(my_sink)
q.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')

self.assertEqual(len(my_sink.subs), 2)

def test_bnode_distinct_across_parse(self):
my_sink = FakeSink()
p = ntriples.NTriplesParser(my_sink)

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''', bnode_context=dict())

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''', bnode_context=dict())

self.assertEqual(len(my_sink.subs), 2)

def test_bnode_shared_across_parse(self):
my_sink = FakeSink()
p = ntriples.NTriplesParser(my_sink)

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
''')

p.parsestring('''
_:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
''')

self.assertEqual(len(my_sink.subs), 1)


class FakeSink(object):
def __init__(self):
self.subs = set()

def triple(self, s, p, o):
self.subs.add(s)


if __name__ == "__main__":
unittest.main()

0 comments on commit 6b5bd37

Please sign in to comment.