Merge branch 'master' into 5.0.0-dev

* master: (38 commits) hardening + warning for serializing invalid numericals (e.g., '"xy.z"^^xsd:float') make sure inf/Infinity and nan from numerical types are serialized as INF/NaN except inf/nan from plain serialization more tests for float and decimal inf/nan serialization (ensures datatypes) Only doubles can be infinity or NaN Serialisation of nan is "NaN" Remove test on serialisation (order is not fixed) Add tests requested by @joernhees change xxx to issue number (655) Test serialisation of infinite values Serialisation of infinity is "INF" Update requirements files for html5lib version Support newer transport_encoding parameter in html5lib parser call Set identifier of the parser sink to target graph. Fixes #432. Fix TurtleSerializer.isValidList failure on falsy values only run flake8 in latest py2 and py3 version pinning html5lib to 1.0b8 for now removed now unnecessary version restriction from SPARQLWrapper updated fuseki used in travis tests to 2.4.0 travis updates setuptools and pip before installing ...
RDFLib · Oct 15, 2016 · 470fb04 · 470fb04
2 parents d267133 + 11e835a
commit 470fb04
Show file tree

Hide file tree

Showing 29 changed files with 505 additions and 103 deletions.
diff --git a/.travis.fuseki_install_optional.sh b/.travis.fuseki_install_optional.sh
@@ -2,11 +2,11 @@
 
 set -v
 
-uri="http://archive.eu.apache.org/dist/jena/binaries/jena-fuseki1-1.1.2-distribution.tar.gz"
+uri="http://archive.apache.org/dist/jena/binaries/apache-jena-fuseki-2.4.0.tar.gz"
 
 if wget "$uri" &&
-       tar -zxf jena-fuseki*-distribution.tar.gz &&
-       mv jena-fuseki*/ fuseki &&
+       tar -zxf *jena*fuseki*.tar.gz &&
+       mv *jena*fuseki*/ fuseki &&
        cd fuseki ; then
     # normal SPARQLStore & Dataset tests:
     bash fuseki-server --port 3030 --debug --update --mem /db &>fuseki.log &

diff --git a/.travis.yml b/.travis.yml
@@ -1,10 +1,13 @@
 # http://travis-ci.org/#!/RDFLib/rdflib
-language: python
 sudo: false
+language: python
 branches:
   only:
+    # only build master and release branches (merge request are built anyhow)
     - master
     - /^\d+\.\d+\.\d+(-.*)?$/
+git:
+  depth: 3
 
 python:
     # - 2.5
@@ -17,10 +20,11 @@ python:
     # - "pypy"
 
 before_install:
+    - pip install -U setuptools pip  # seems travis comes with a too old setuptools for html5lib
     - bash .travis.fuseki_install_optional.sh
 
 install:
-    - if [[ ${TRAVIS_PYTHON_VERSION%%.*} == '2' ]]; then pip install --default-timeout 60 -r requirements.py2.txt;  pip install --default-timeout 60 "html5lib";fi
+    - if [[ ${TRAVIS_PYTHON_VERSION%%.*} == '2' ]]; then pip install --default-timeout 60 -r requirements.py2.txt; fi
     - if [[ ${TRAVIS_PYTHON_VERSION%%.*} == '3' ]]; then pip install --default-timeout 60 -r requirements.py3.txt; fi
     - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --default-timeout 60 ordereddict 'networkx<1.10' ; fi
     - if [[ $TRAVIS_PYTHON_VERSION != '2.6' ]]; then pip install --default-timeout 60 networkx ; fi
@@ -30,7 +34,7 @@ install:
     - python setup.py install
 
 before_script:
-    - if [[ ${TRAVIS_PYTHON_VERSION%%.*} == '2' ]]; then flake8 --exclude=pyRdfa,extras,host,transform,rdfs,sparql,results,pyMicrodata --exit-zero rdflib; fi
+    - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]] || [[ $TRAVIS_PYTHON_VERSION == '3.5' ]]; then flake8 --exclude=pyRdfa,extras,host,transform,rdfs,sparql,results,pyMicrodata --exit-zero rdflib; fi
 
 script:
     # Must run the tests in build/src so python3 doesn't get confused and run

diff --git a/README.md b/README.md
@@ -1,5 +1,12 @@
 RDFLib
 ======
+[![Build Status](https://travis-ci.org/RDFLib/rdflib.png?branch=master)](https://travis-ci.org/RDFLib/rdflib)
+[![Coveralls branch](https://img.shields.io/coveralls/RDFLib/rdflib/master.svg)](https://coveralls.io/r/RDFLib/rdflib?branch=master)
+[![GitHub stars](https://img.shields.io/github/stars/RDFLib/rdflib.svg)](https://github.com/RDFLib/rdflib/stargazers)
+[![PyPI](https://img.shields.io/pypi/v/rdflib.svg)](https://pypi.python.org/pypi/rdflib)
+[![PyPI](https://img.shields.io/pypi/dm/rdflib.svg)](https://pypi.python.org/pypi/rdflib)
+[![PyPI](https://img.shields.io/pypi/pyversions/rdflib.svg)](https://pypi.python.org/pypi/rdflib)
+
 
 RDFLib is a Python library for working with RDF, a simple yet
 powerful language for representing information as graphs.
@@ -30,7 +37,7 @@ g=rdflib.Graph()
 g.load('http://dbpedia.org/resource/Semantic_Web')
 
 for s,p,o in g:
-  print s,p,o
+    print s,p,o
 ```
 
 The components of the triples are URIs (resources) or Literals
@@ -54,7 +61,7 @@ dbpedia=Namespace('http://dbpedia.org/ontology/')
 abstracts=list(x for x in g.objects(semweb, dbpedia['abstract']) if x.language=='en')
 ```
 
-See also *./examples*
+See also [./examples](./examples)
 
 
 Features
@@ -85,11 +92,6 @@ More information is available on the project webpage:
 
 https://github.com/RDFLib/rdflib/
 
-Continuous integration status details available from travis.ci, test coverage from coveralls:
-
-[![Build Status](https://travis-ci.org/RDFLib/rdflib.png?branch=master)](https://travis-ci.org/RDFLib/rdflib)
-[![Coverage Status](https://coveralls.io/repos/RDFLib/rdflib/badge.png?branch=master)](https://coveralls.io/r/RDFLib/rdflib?branch=master)
-
 The documentation can be built by doing::
 
     $ python setup.py build_sphinx

diff --git a/rdflib/__init__.py b/rdflib/__init__.py
@@ -73,10 +73,17 @@
 del sys
 
 import logging
-import __main__
-if not hasattr(__main__, '__file__'):
-    # show log messages in interactive mode
-    logging.basicConfig(level=logging.INFO)
+try:
+    import __main__
+    if not hasattr(__main__, '__file__'):
+        # show log messages in interactive mode
+        logging.basicConfig(level=logging.INFO)
+except ImportError:
+    #Main already imported from elsewhere
+    import warnings
+    warnings.warn('__main__ already imported', ImportWarning)
+    del warnings
+
 logger = logging.getLogger(__name__)
 logger.info("RDFLib Version: %s" % __version__)
 

diff --git a/rdflib/collection.py b/rdflib/collection.py
@@ -129,7 +129,7 @@ def __setitem__(self, key, value):
         """TODO"""
         c = self._get_container(key)
         if c:
-            self.graph.add((c, RDF.first, value))
+            self.graph.set((c, RDF.first, value))
         else:
             raise IndexError(key)
 

diff --git a/rdflib/compare.py b/rdflib/compare.py
@@ -143,11 +143,15 @@ def wrapped_f(*args, **kwargs):
 class IsomorphicGraph(ConjunctiveGraph):
     """An implementation of the RGDA1 graph digest algorithm.
 
-    An implementation of RGDA1 (publication forthcoming),
+    An implementation of RGDA1 (publication below),
     a combination of Sayers & Karp's graph digest algorithm using
     sum and SHA-256 <http://www.hpl.hp.com/techreports/2003/HPL-2003-235R1.pdf>
     and traces <http://pallini.di.uniroma1.it>, an average case
     polynomial time algorithm for graph canonicalization.
+    
+    McCusker, J. P. (2015). WebSig: A Digital Signature Framework for the Web.
+    Rensselaer Polytechnic Institute, Troy, NY.
+    http://gradworks.umi.com/3727015.pdf
     """
 
     def __init__(self, **kwargs):

diff --git a/rdflib/graph.py b/rdflib/graph.py
@@ -925,7 +925,7 @@ def serialize(self, destination=None, format="xml",
         string. Format defaults to xml (AKA rdf/xml).
 
         Format support can be extended with plugins,
-        but 'xml', 'n3', 'turtle', 'nt', 'pretty-xml', trix' are built in.
+        but 'xml', 'n3', 'turtle', 'nt', 'pretty-xml', 'trix', 'trig' and 'nquads' are built in.
         """
         serializer = plugin.get(format, Serializer)(self)
         if destination is None:
@@ -1079,10 +1079,12 @@ def query(self, query_object, processor='sparql',
             query_object, initBindings, initNs, **kwargs))
 
     def update(self, update_object, processor='sparql',
-              initNs={}, initBindings={},
+              initNs=None, initBindings=None,
               use_store_provided=True, **kwargs):
-        """
-        """
+        """Update this graph with the given update query."""
+        initBindings = initBindings or {}
+        initNs = initNs or dict(self.namespaces())
+
         if hasattr(self.store, "update") and use_store_provided:
             try:
                 return self.store.update(

diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
@@ -207,7 +207,7 @@ def eat(self, pattern):
         if not m:  # @@ Why can't we get the original pattern?
             # print(dir(pattern))
             # print repr(self.line), type(self.line)
-            raise ParseError("Failed to eat %s at %s" % (pattern, self.line))
+            raise ParseError("Failed to eat %s at %s" % (pattern.pattern, self.line))
         self.line = self.line[m.end():]
         return m
 

diff --git a/rdflib/plugins/parsers/pyRdfa/__init__.py b/rdflib/plugins/parsers/pyRdfa/__init__.py
@@ -609,7 +609,10 @@ def copyErrors(tog, options) :
 					if self.charset :
 						# This means the HTTP header has provided a charset, or the
 						# file is a local file when we suppose it to be a utf-8
-						dom = parser.parse(input, encoding=self.charset)
+						try:
+							dom = parser.parse(input, encoding=self.charset)
+						except TypeError:
+							dom = parser.parse(input, transport_encoding=self.charset)
 					else :
 						# No charset set. The HTMLLib parser tries to sniff into the
 						# the file to find a meta header for the charset; if that

diff --git a/rdflib/plugins/parsers/trig.py b/rdflib/plugins/parsers/trig.py
@@ -146,7 +146,7 @@ def parse(self, source, graph, encoding="utf-8"):
         # we're currently being handed a Graph, not a ConjunctiveGraph
         assert graph.store.context_aware, "TriG Parser needs a context-aware store!"
 
-        conj_graph = ConjunctiveGraph(store=graph.store)
+        conj_graph = ConjunctiveGraph(store=graph.store, identifier=graph.identifier)
         conj_graph.default_context = graph  # TODO: CG __init__ should have a
                                             # default_context arg
          # TODO: update N3Processor so that it can use conj_graph as the sink

diff --git a/rdflib/plugins/serializers/turtle.py b/rdflib/plugins/serializers/turtle.py
@@ -345,7 +345,7 @@ def isValidList(self, l):
         Checks if l is a valid RDF list, i.e. no nodes have other properties.
         """
         try:
-            if not self.store.value(l, RDF.first):
+            if self.store.value(l, RDF.first) is None:
                 return False
         except:
             return False

diff --git a/rdflib/plugins/sparql/aggregates.py b/rdflib/plugins/sparql/aggregates.py
@@ -13,10 +13,14 @@
 """
 
 
-def _eval_rows(expr, group):
+def _eval_rows(expr, group, distinct):
+    seen = set()
     for row in group:
         try:
-            yield _eval(expr, row)
+            val = _eval(expr, row)
+            if not distinct or not val in seen:
+                yield val
+                seen.add(val)
         except:
             pass
 
@@ -25,9 +29,8 @@ def agg_Sum(a, group, bindings):
     c = 0
 
     dt = None
-    for x in group:
+    for e in _eval_rows(a.vars, group, a.distinct):
         try:
-            e = _eval(a.vars, x)
             n = numeric(e)
             if dt == None:
                 dt = e.datatype
@@ -51,15 +54,15 @@ def agg_Sum(a, group, bindings):
 def agg_Min(a, group, bindings):
     m = None
 
-    for x in group:
+    for v in _eval_rows(a.vars, group, None): # DISTINCT makes no difference for MIN
         try:
-            v = numeric(_eval(a.vars, x))
+            v = numeric(v)
             if m is None:
                 m = v
             else:
                 m = num_min(v, m)
         except:
-            return  # error in aggregate => no binding
+            continue # try other values
 
     if m is not None:
         bindings[a.res] = Literal(m)
@@ -68,9 +71,9 @@ def agg_Min(a, group, bindings):
 def agg_Max(a, group, bindings):
     m = None
 
-    for x in group:
+    for v in _eval_rows(a.vars, group, None): # DISTINCT makes no difference for MAX
         try:
-            v = numeric(_eval(a.vars, x))
+            v = numeric(v)
             if m is None:
                 m = v
             else:
@@ -83,46 +86,44 @@ def agg_Max(a, group, bindings):
 
 
 def agg_Count(a, group, bindings):
-
-    c = 0
-    for x in group:
-        try:
-            if a.vars != '*':
-                val = _eval(a.vars, x)
-                if isinstance(val, NotBoundError):
-                    continue
+    if a.vars == '*':
+        c = len(group)
+    else:
+        c = 0
+        for e in _eval_rows(a.vars, group, a.distinct):
             c += 1
-        except:
-            return  # error in aggregate => no binding
-            # pass  # simply dont count
 
     bindings[a.res] = Literal(c)
 
 
 def agg_Sample(a, group, bindings):
     for ctx in group:
-        val = _eval(a.vars, ctx)
-        if not isinstance(val, NotBoundError):
-            bindings[a.res] = val
+        try:
+            bindings[a.res] = _eval(a.vars, ctx)
             break
+        except NotBoundError:
+            pass
 
 
 def agg_GroupConcat(a, group, bindings):
 
     sep = a.separator or " "
+    if a.distinct:
+        agg = lambda x: x
+    else:
+        add = set
 
     bindings[a.res] = Literal(
-        sep.join(unicode(x) for x in _eval_rows(a.vars, group)))
+        sep.join(unicode(x) for x in _eval_rows(a.vars, group, a.distinct)))
 
 
 def agg_Avg(a, group, bindings):
 
     c = 0
     s = 0
     dt = None
-    for x in group:
+    for e in _eval_rows(a.vars, group, a.distinct):
         try:
-            e = _eval(a.vars, x)
             n = numeric(e)
             if dt == None:
                 dt = e.datatype

diff --git a/rdflib/plugins/sparql/evaluate.py b/rdflib/plugins/sparql/evaluate.py
@@ -282,7 +282,7 @@ def evalGroup(ctx, group):
     else:
         res = collections.defaultdict(list)
         for c in p:
-            k = tuple(_eval(e, c) for e in group.expr)
+            k = tuple(_eval(e, c, False) for e in group.expr)
             res[k].append(c)
         return res
 

diff --git a/rdflib/plugins/sparql/evalutils.py b/rdflib/plugins/sparql/evalutils.py
@@ -61,7 +61,7 @@ def _ebv(expr, ctx):
     return False
 
 
-def _eval(expr, ctx):
+def _eval(expr, ctx, raise_not_bound_error=True):
     if isinstance(expr, (Literal, URIRef)):
         return expr
     if isinstance(expr, Expr):
@@ -70,7 +70,10 @@ def _eval(expr, ctx):
         try:
             return ctx[expr]
         except KeyError:
-            return NotBoundError("Variable %s is not bound" % expr)
+            if raise_not_bound_error:
+                raise NotBoundError("Variable %s is not bound" % expr)
+            else:
+                return None
     elif isinstance(expr, CompValue):
         raise Exception(
             "Weird - _eval got a CompValue without evalfn! %r" % expr)