Skip to content

Commit

Permalink
ensure query/result files are opened as binary, handle encoding issue…
Browse files Browse the repository at this point in the history
…s in each parser. Fixes #344
  • Loading branch information
gromgull committed Dec 30, 2013
1 parent 3db6a64 commit 98fc6b3
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 31 deletions.
17 changes: 12 additions & 5 deletions rdflib/plugins/sparql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from parserutils import Comp, Param, ParamList

from . import operators as op
from rdflib.py3compat import decodeStringEscape
from rdflib.py3compat import decodeUnicodeEscape, bytestype

import rdflib

Expand Down Expand Up @@ -287,14 +287,14 @@ def _hexExpand(match):
# ) + ZeroOrMore( ~ Literal("'\\") | ECHAR ) ) + "'''"
STRING_LITERAL_LONG1 = Regex(ur"'''((?:'|'')?(?:[^'\\]|\\['ntbrf\\]))*'''")
STRING_LITERAL_LONG1.setParseAction(
lambda x: rdflib.Literal(decodeStringEscape(x[0][3:-3])))
lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][3:-3])))

# [159] STRING_LITERAL_LONG2 ::= '"""' ( ( '"' | '""' )? ( [^"\] | ECHAR ) )* '"""'
# STRING_LITERAL_LONG2 = Literal('"""') + ( Optional( Literal('"') | '""'
# ) + ZeroOrMore( ~ Literal('"\\') | ECHAR ) ) + '"""'
STRING_LITERAL_LONG2 = Regex(ur'"""(?:(?:"|"")?(?:[^"\\]|\\["ntbrf\\]))*"""')
STRING_LITERAL_LONG2.setParseAction(
lambda x: rdflib.Literal(decodeStringEscape(x[0][3:-3])))
lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][3:-3])))

# [156] STRING_LITERAL1 ::= "'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'"
# STRING_LITERAL1 = Literal("'") + ZeroOrMore(
Expand All @@ -303,7 +303,7 @@ def _hexExpand(match):
STRING_LITERAL1 = Regex(
ur"'(?:[^'\n\r\\]|\\['ntbrf\\])*'(?!')", flags=re.U)
STRING_LITERAL1.setParseAction(
lambda x: rdflib.Literal(decodeStringEscape(x[0][1:-1])))
lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][1:-1])))

# [157] STRING_LITERAL2 ::= '"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"'
# STRING_LITERAL2 = Literal('"') + ZeroOrMore (
Expand All @@ -312,7 +312,7 @@ def _hexExpand(match):
STRING_LITERAL2 = Regex(
ur'"(?:[^"\n\r\\]|\\["ntbrf\\])*"(?!")', flags=re.U)
STRING_LITERAL2.setParseAction(
lambda x: rdflib.Literal(decodeStringEscape(x[0][1:-1])))
lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][1:-1])))

# [161] NIL ::= '(' WS* ')'
NIL = Literal('(') + ')'
Expand Down Expand Up @@ -1045,13 +1045,20 @@ def expand(m):
def parseQuery(q):
if hasattr(q, 'read'):
q = q.read()
if isinstance(q, bytestype):
q = q.decode('utf-8')

q = expandUnicodeEscapes(q)
return Query.parseString(q, parseAll=True)


def parseUpdate(q):
if hasattr(q, 'read'):
q = q.read()

if isinstance(q, bytestype):
q = q.decode('utf-8')

q = expandUnicodeEscapes(q)
return UpdateUnit.parseString(q, parseAll=True)[0]

Expand Down
5 changes: 4 additions & 1 deletion rdflib/plugins/sparql/results/csvresults.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""


import codecs
import csv

from rdflib import Variable, BNode, URIRef, Literal, py3compat
Expand All @@ -23,6 +23,9 @@ def parse(self, source):

r = Result('SELECT')

if hasattr(source, 'mode') and 'b' in source.mode:
source = codecs.getreader('utf-8')(source)

reader = csv.reader(source, delimiter=self.delim)
r.vars = [Variable(x) for x in reader.next()]
r.bindings = []
Expand Down
8 changes: 7 additions & 1 deletion rdflib/plugins/sparql/results/jsonresults.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
Result, ResultException, ResultSerializer, ResultParser)
from rdflib import Literal, URIRef, BNode, Variable

from rdflib.py3compat import bytestype


import jsonlayer

"""A Serializer for SPARQL results in JSON:
Expand All @@ -19,7 +22,10 @@
class JSONResultParser(ResultParser):

def parse(self, source):
return JSONResult(jsonlayer.decode(source.read()))
inp = source.read()
if isinstance(inp, bytestype):
inp = inp.decode('utf-8')
return JSONResult(jsonlayer.decode(inp))


class JSONResultSerializer(ResultSerializer):
Expand Down
5 changes: 5 additions & 0 deletions rdflib/plugins/sparql/results/tsvresults.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
It is implemented with pyparsing, reusing the elements from the SPARQL Parser
"""

import codecs

from pyparsing import (
Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress)

Expand Down Expand Up @@ -38,6 +40,9 @@
class TSVResultParser(ResultParser):
def parse(self, source):

if hasattr(source, 'mode') and 'b' in source.mode:
source = codecs.getreader('utf-8')(source)

try:
r = Result('SELECT')

Expand Down
5 changes: 5 additions & 0 deletions rdflib/py3compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def b(s):
def ascii(stream):
return codecs.getreader('ascii')(stream)

def bopen(*args, **kwargs):
return open(*args, mode = 'rb', **kwargs)

bytestype = bytes

# Abstract u'abc' syntax:
Expand Down Expand Up @@ -102,6 +105,8 @@ def b(s):
def ascii(stream):
return stream

bopen = open

bytestype = str

# Abstract u'abc' syntax:
Expand Down
2 changes: 1 addition & 1 deletion test/DAWG/rdflib/unicode.ttl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<urn:a> <urn:p> "孫子兵法" .
<urn:a> <urn:p> "\u5b6b\u5b50\u5175\u6cd5" .
46 changes: 23 additions & 23 deletions test/test_dawg.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def most_common(self, N):
from rdflib.plugins.sparql.results.rdfresults import RDFResultParser
from rdflib.plugins.sparql.update import evalUpdate

from rdflib.py3compat import decodeStringEscape
from rdflib.py3compat import decodeStringEscape, bopen

from nose.tools import nottest, eq_
from nose import SkipTest
Expand Down Expand Up @@ -217,10 +217,10 @@ def update_test(t):

if not res:
if syntax:
translateUpdate(parseUpdate(open(query[7:])))
translateUpdate(parseUpdate(bopen(query[7:])))
else:
try:
translateUpdate(parseUpdate(open(query[7:])))
translateUpdate(parseUpdate(bopen(query[7:])))
raise AssertionError("Query shouldn't have parsed!")
except:
pass # negative syntax test
Expand All @@ -236,7 +236,7 @@ def update_test(t):
for x, l in graphdata:
g.load(x, publicID=URIRef(l), format=_fmt(x))

req = translateUpdate(parseUpdate(open(query[7:])))
req = translateUpdate(parseUpdate(bopen(query[7:])))
evalUpdate(g, req)

# read expected results
Expand Down Expand Up @@ -284,33 +284,33 @@ def update_test(t):
if data:
print "----------------- DATA --------------------"
print ">>>", data
print open(data[7:]).read()
print bopen(data[7:]).read()
if graphdata:
print "----------------- GRAPHDATA --------------------"
for x, l in graphdata:
print ">>>", x, l
print open(x[7:]).read()
print bopen(x[7:]).read()

print "----------------- Request -------------------"
print ">>>", query
print open(query[7:]).read()
print bopen(query[7:]).read()

if res:
if resdata:
print "----------------- RES DATA --------------------"
print ">>>", resdata
print open(resdata[7:]).read()
print bopen(resdata[7:]).read()
if resgraphdata:
print "----------------- RES GRAPHDATA -------------------"
for x, l in resgraphdata:
print ">>>", x, l
print open(x[7:]).read()
print bopen(x[7:]).read()

print "------------- MY RESULT ----------"
print g.serialize(format='trig')

try:
pq = translateUpdate(parseUpdate(open(query[7:]).read()))
pq = translateUpdate(parseUpdate(bopen(query[7:]).read()))
print "----------------- Parsed ------------------"
pprintAlgebra(pq)
# print pq
Expand All @@ -336,7 +336,7 @@ def query_test(t):

def skip(reason='(none)'):
print "Skipping %s from now on." % uri
f = open("skiptests.list", "a")
f = bopen("skiptests.list", "a")
f.write("%s\t%s\n" % (uri, reason))
f.close()

Expand All @@ -354,20 +354,20 @@ def skip(reason='(none)'):

if syntax:
translateQuery(parseQuery(
open(query[7:]).read()), base=urljoin(query, '.'))
bopen(query[7:]).read()), base=urljoin(query, '.'))
else:
# negative syntax test
try:
translateQuery(parseQuery(
open(query[7:]).read()), base=urljoin(query, '.'))
bopen(query[7:]).read()), base=urljoin(query, '.'))

assert False, 'Query should not have parsed!'
except:
pass # it's fine - the query should not parse
return

# eval test - carry out query
res2 = g.query(open(query[7:]).read(), base=urljoin(query, '.'))
res2 = g.query(bopen(query[7:]).read(), base=urljoin(query, '.'))

if resfile.endswith('ttl'):
resg = Graph()
Expand All @@ -378,12 +378,12 @@ def skip(reason='(none)'):
resg.load(resfile, publicID=resfile)
res = RDFResultParser().parse(resg)
elif resfile.endswith('srj'):
res = Result.parse(open(resfile[7:]), format='json')
res = Result.parse(bopen(resfile[7:]), format='json')
elif resfile.endswith('tsv'):
res = Result.parse(open(resfile[7:]), format='tsv')
res = Result.parse(bopen(resfile[7:]), format='tsv')

elif resfile.endswith('csv'):
res = Result.parse(open(resfile[7:]), format='csv')
res = Result.parse(bopen(resfile[7:]), format='csv')

# CSV is lossy, round-trip our own resultset to
# lose the same info :)
Expand All @@ -396,7 +396,7 @@ def skip(reason='(none)'):
res2 = Result.parse(s, format='csv')

else:
res = Result.parse(open(resfile[7:]), format='xml')
res = Result.parse(bopen(resfile[7:]), format='xml')

if not DETAILEDASSERT:
eq(res.type, res2.type, 'Types do not match')
Expand Down Expand Up @@ -461,23 +461,23 @@ def skip(reason='(none)'):
if data:
print "----------------- DATA --------------------"
print ">>>", data
print open(data[7:]).read()
print bopen(data[7:]).read()
if graphdata:
print "----------------- GRAPHDATA --------------------"
for x in graphdata:
print ">>>", x
print open(x[7:]).read()
print bopen(x[7:]).read()

print "----------------- Query -------------------"
print ">>>", query
print open(query[7:]).read()
print bopen(query[7:]).read()
if resfile:
print "----------------- Res -------------------"
print ">>>", resfile
print open(resfile[7:]).read()
print bopen(resfile[7:]).read()

try:
pq = parseQuery(open(query[7:]).read())
pq = parseQuery(bopen(query[7:]).read())
print "----------------- Parsed ------------------"
pprintAlgebra(translateQuery(pq, base=urljoin(query, '.')))
except:
Expand Down

0 comments on commit 98fc6b3

Please sign in to comment.