Skip to content

Commit

Permalink
Implement variable MRU style REDUCED
Browse files Browse the repository at this point in the history
  • Loading branch information
tiko-tiko authored Jan 5, 2017
1 parent 46eeb06 commit 636f685
Showing 1 changed file with 35 additions and 1 deletion.
36 changes: 35 additions & 1 deletion rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,41 @@ def evalSlice(ctx, slice):


def evalReduced(ctx, part):
return evalPart(ctx, part.p) # TODO!
"""apply REDUCED to result
REDUCED is not as strict as DISTINCT, but if the incoming rows were sorted
it should produce the same result with limited extra memory and time per
incoming row.
"""

# This implementation uses a most recently used strategy and a limited
# buffer size. It relates to a LRU caching algorithm:
# https://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used_.28LRU.29
MAX = 1
# TODO: add configuration or determine "best" size for most use cases
# 0: No reduction
# 1: compare only with the last row, almost no reduction with
# unordered incoming rows
# N: The greater the buffer size the greater the reduction but more
# memory and time are needed

# mixed data structure: set for lookup, deque for append/pop/remove
mru_set = set()
mru_queue = collections.deque()

for row in evalPart(ctx, part.p):
if row in mru_set:
# forget last position of row
mru_queue.remove(row)
else:
#row seems to be new
yield row
mru_set.add(row)
if len(mru_set) > MAX:
# drop the least recently used row from buffer
mru_set.remove(mru_queue.pop())
# put row to the front
mru_queue.appendleft(row)


def evalDistinct(ctx, part):
Expand Down

0 comments on commit 636f685

Please sign in to comment.