Skip to content

Commit

Permalink
Merge pull request #16 from pakozm/devel
Browse files Browse the repository at this point in the history
New version v0.3.5
  • Loading branch information
pakozm committed Nov 14, 2014
2 parents cf7100c + 7b2253c commit fb8c436
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 55 deletions.
2 changes: 1 addition & 1 deletion mapreduce/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ local tuple = require "mapreduce.tuple"
local persistent_table = require "mapreduce.persistent_table"

local mapreduce = {
_VERSION = "0.3.4",
_VERSION = "0.3.5",
_NAME = "mapreduce",
worker = worker,
server = server,
Expand Down
216 changes: 162 additions & 54 deletions mapreduce/tuple.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
--[[
This file is part of Lua-MapReduce
This file is part of Lua-Tuple (https://github.com/pakozm/lua-tuple)
This file is part of Lua-MapReduce (https://github.com/pakozm/lua-mapreduce)
Copyright 2014, Francisco Zamora-Martinez
Expand All @@ -17,85 +18,137 @@
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
]]

-- The job class is used by workers to execute map/reduce job. This class allows
-- to write job status and to update job statistics in MongoDB. Execution of
-- user map/reduce/combiner modules is done in job class. Intermediate data is
-- written here in the storage given at 'task' collection.
-- Linear implementation of in-mutable and interned tuples for Lua. It is linear
-- because tuples are stored into a linear table. A different approach would be
-- store tuples into an inverted prefix tree (trie). Major difference between
-- both approaches is that linear implementation needs more memory but has
-- better indexing time, while prefix tree implementation needs less memory but
-- has worst indexing time.

local tuple = {
_VERSION = "0.1",
_NAME = "tuple",
}

-- libraries import
local assert = assert
local getmetatable = getmetatable
local ipairs = ipairs
local pairs = pairs
local select = select
local tostring = tostring
local type = type
local bit32_band = bit32.band
local bit32_lshift = bit32.lshift
local bit32_rshift = bit32.rshift
local bit32_bxor = bit32.bxor
local math_max = math.max
local string_byte = string.byte
local string_format = string.format
local string_sub = string.sub
local table_concat = table.concat
local table_pack = table.pack

-- constants
local BYTE_MASK = 0x000000FF
local WORD_MASK = 0xFFFFFFFF
local MAX_NUMBER = 2^32
local MAX_BUCKET_HOLES_RATIO = 100
local NUM_BUCKETS = 2^20
local list_of_tuples = setmetatable({}, { __mode="v" })
local WEAK_MT = { __mode="v" }

-- the list of tuples is a hash table with a maximum of NUM_BUCKETS
local list_of_tuples = {}

-- converts a number into a binary string, for hash computation purposes
local function dump_number(n)
return string.format("%c%c%c%c%c%c%c%c",
bit32.band(n,0xFF),
bit32.band(bit32.rshift(n,8),0x00000000000000FF),
bit32.band(bit32.rshift(n,16),0x00000000000000FF),
bit32.band(bit32.rshift(n,24),0x00000000000000FF),
bit32.band(bit32.rshift(n,32),0x00000000000000FF),
bit32.band(bit32.rshift(n,40),0x00000000000000FF),
bit32.band(bit32.rshift(n,48),0x00000000000000FF),
bit32.band(bit32.rshift(n,56),0x00000000000000FF))
assert(n < MAX_NUMBER, "Only valid for 32 bit numbers")
return string_format("%c%c%c%c",
bit32_band(n,BYTE_MASK),
bit32_band(bit32_rshift(n,8),BYTE_MASK),
bit32_band(bit32_rshift(n,16),BYTE_MASK),
bit32_band(bit32_rshift(n,24),BYTE_MASK))
end

-- computes the hash of a given tuple candidate
local function compute_hash(t)
local h = 0
for i=1,#t do
local v = t[i]
local tt = type(v)
-- dump the value if it is a number, another tuple or a nil value
if tt == "number" then v = dump_number(v)
elseif tt == "table" then v = dump_number(compute_hash(v))
elseif tt == "nil" then v = "nil"
end
-- sanity check
assert(type(v) == "string",
"Needs an array with numbers, tables or strings")
-- hash computation for every char in the string v
for j=1,#v do
h = h + string.byte(string.sub(v,j,j))
h = h + bit32.lshift(h,10)
h = bit32.bxor(h, bit32.rshift(h,6))
h = bit32.band(h, 0x00000000FFFFFFFF)
h = h + string_byte(string_sub(v,j,j))
h = h + bit32_lshift(h,10)
h = bit32_bxor(h, bit32_rshift(h,6))
-- compute hash modules 2^32
h = bit32_band(h, WORD_MASK)
end
end
h = h + bit32.rshift(h,3)
h = bit32.bxor(h, bit32.lshift(h,11))
h = h + bit32.lshift(h,15)
h = bit32.band(h, 0x00000000FFFFFFFF)
h = h + bit32_rshift(h,3)
h = bit32_bxor(h, bit32_lshift(h,11))
h = h + bit32_lshift(h,15)
-- compute hash modules 2^32
h = bit32_band(h, WORD_MASK)
return h
end

-- tuple instances has this metatable
local tuple_instance_mt = {
-- disallow to change metatable
__metatable = false,
-- avoid to insert new elements
__newindex = function(self) error("Unable to modify a tuple") end,
-- convert it to a string like: tuple{ a, b, ... }
__tostring = function(self)
local result = {}
for i=1,#self do result[#result+1] = tostring(self[i]) end
return table.concat({"tuple(",table.concat(result, ", "),")"}, " ")
for i=1,#self do
local v = self[i]
if type(v) == "string" then v = string_format("%q",v) end
result[#result+1] = tostring(v)
end
return table_concat({"tuple{",table_concat(result, ", "),"}"}, " ")
end,
__concat = function(self,other)
-- concatenates two tuples or a tuple with a number, string or another table
__concat = function(a,b)
if type(a) ~= "table" then a,b=b,a end
local aux = {}
for i=1,#self do aux[#aux+1] = self[i] end
if type(other) == "table" then
for i=1,#other do aux[#aux+1] = other[i] end
for i=1,#a do aux[#aux+1] = a[i] end
if type(b) == "table" then
for i=1,#b do aux[#aux+1] = b[i] end
else
aux[#aux+1] = other
aux[#aux+1] = b
end
return tuple(aux)
end,
}

local function proxy(t)
setmetatable(t, tuple_instance_mt)
return setmetatable({},{
__newindex = function(self) error("Unable to modify a tuple") end,
__index = function(self,k) if k == "is_tuple" then return true end return t[k] end,
__len = function(self) return #t end,
__tostring = function(self) return tostring(t) end,
-- returns a wrapper table (proxy) which shades the data table, allowing
-- in-mutability in Lua, it receives the table data and the number of elements
local function proxy(tpl,n)
setmetatable(tpl, tuple_instance_mt)
return setmetatable({}, {
-- the proxy table has an in-mutable metatable, and stores in __metatable
-- a string identifier, the real tuple data and the number of elements
__metatable = { "is_tuple", tpl , n },
__index = tpl,
__newindex = function(self) error("Tuples are in-mutable data") end,
__len = function(self) return getmetatable(self)[3] end,
__tostring = function(self) return tostring(getmetatable(self)[2]) end,
__lt = function(self,other)
local t = getmetatable(self)[2]
if type(other) ~= "table" then return false
elseif #t < #other then return true
elseif #t > #other then return false
elseif t == other then return false
else
for i=1,#t do
if t[i] > other[i] then return false end
Expand All @@ -104,51 +157,83 @@ local function proxy(t)
end
end,
__le = function(self,other)
local t = getmetatable(self)[2]
-- equality is comparing references (tuples are in-mutable and interned)
if self == other then return true end
return self < other
end,
__pairs = function(self) return pairs(t) end,
__ipairs = function(self) return ipairs(t) end,
__concat = function(self,other) return t .. other end,
__pairs = function(self) return pairs(getmetatable(self)[2]) end,
__ipairs = function(self) return ipairs(getmetatable(self)[2]) end,
__concat = function(self,other) return getmetatable(self)[2] .. other end,
__mode = "v",
})
end

-- builds a candidate tuple given a table, recursively converting tables in new
-- tuples
local function tuple_constructor(t)
local h = 0
local new_tuple = {}
for i,v in ipairs(t) do
if type(v) == "table" then
new_tuple[i] = tuple(v)
else
new_tuple[i] = v
for i,v in pairs(t) do
-- ignore the field "n" introduced by variadic args
if i~="n" then
assert(type(i) == "number" and i>0, "Needs integer keys > 0")
if type(v) == "table" then
-- recursively converts tables in new tuples
new_tuple[i] = tuple(v)
else
-- copies the value
new_tuple[i] = v
end
end
end
return proxy(new_tuple)
-- returns a proxy to the new_tuple table with #t length
return proxy(new_tuple,#t)
end

-- metatable of tuple "class" table
local tuple_mt = {
-- tuple constructor doesn't allow table loops
__call = function(self, ...)
local t = { ... } if #t == 1 then t = t[1] end
local n = select('#', ...)
local t = table_pack(...) assert(#t == n) if #t == 1 then t = t[1] end
if type(t) ~= "table" then
-- non-table elements are unpacked when only one is given
return t
elseif #t == 1 then
return tuple(t[1])
else
if t.is_tuple then return t end
-- check if the given table is a tuple, if it is the case, just return it
local mt = getmetatable(t) if mt and mt[1]=="is_tuple" then return t end
-- create a new tuple candidate
local new_tuple = tuple_constructor(t)
local p = compute_hash(new_tuple) % NUM_BUCKETS
local bucket = (list_of_tuples[p] or setmetatable({}, { __mode="v" }))
local bucket = (list_of_tuples[p] or setmetatable({}, WEAK_MT))
list_of_tuples[p] = bucket
for i,vi in ipairs(bucket) do
-- Count the number of elements in the bucket and the maximum non-nil key.
-- In case the relation between this two values was greater than
-- MAX_BUCKET_HOLES_RATIO, the bucket will be rearranged to remove all nil
-- holes.
local max,n = 0,0
for i,vi in pairs(bucket) do
local equals = true
-- check equality by comparing all the elements one-by-one
for j,vj in ipairs(vi) do
if vj ~= new_tuple[j] then equals=false break end
end
-- BREAKS the execution flow in case the tuple exists in the bucket
if equals == true then return vi end
max = math_max(max,i)
n = n+1
end
-- rearrange the bucket when the ratio achieves the threshold
if max/n > MAX_BUCKET_HOLES_RATIO then
local new_bucket = {}
for i,vi in pairs(bucket) do new_bucket[#new_bucket+1] = vi end
list_of_tuples[p], bucket = new_bucket, new_bucket
max = #bucket
collectgarbage("collect")
end
table.insert(bucket, new_tuple)
bucket[max+1] = new_tuple
-- take note of the bucket into __metatable array, position 4
getmetatable(new_tuple)[4] = p
return new_tuple
end
end,
Expand All @@ -166,6 +251,29 @@ tuple.utest = function()
assert(a == c)
assert(b == a[2])
assert(b == c[2])
a,b,c = nil,nil,nil
collectgarbage("collect")
--
local aux = {} for i=1,10000 do aux[tuple(i,i)] = i end
assert(tuple.stats() == 10000)
collectgarbage("collect")
assert(tuple.stats() == 10000)
aux = nil
collectgarbage("collect")
assert(tuple.stats() == 0)
end

-- returns the number of tuples "alive", the number of used buckets, and the
-- loading factor of the hash table
tuple.stats = function()
local num_buckets = 0
local size = 0
for k1,v1 in pairs(list_of_tuples) do
num_buckets = num_buckets + 1
for k2,v2 in pairs(v1) do size=size+1 end
end
if num_buckets == 0 then num_buckets = 1 end
return size,num_buckets,size/NUM_BUCKETS
end

return tuple

0 comments on commit fb8c436

Please sign in to comment.