Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Key-value log passes correctness tests #23

Merged
merged 3 commits into from
Sep 1, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 85 additions & 60 deletions src/kvlog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,12 @@ fn my_hash<T>(obj: T) -> u64
hasher.finish()
}

//const CHUNK_BITS : u32 = 0x3ff; // 4 + 4 + 2 = 10 bits
const CHUNK_BITS : u32 = 0x3; // 2 bits
const CHUNK_BITC : usize = 2;
const CHUNK_BITS : u32 = 0x3ff; // 4 + 4 + 2 = 10 bits
const CHUNK_BITC : usize = 10;

//const CHUNK_BITS : u32 = 0x3; // 2 bits
//const CHUNK_BITC : usize = 2;

const KEY_BITC : usize = 64;

#[derive(Clone,PartialEq,Eq,Debug,Hash)]
Expand Down Expand Up @@ -396,6 +399,27 @@ impl <K:'static+Hash+PartialEq+Eq+Clone+Debug,
return best_match
}

// find the longest matching bit index (and length), if any
fn all_matches (bits:KeyBits, kvs:&Vec<Kv<K,V>>) -> Vec<(usize, usize)> {
let mut all_matches : Vec<(usize, usize)> = vec![];
for kv_idx in 0..kvs.len() {
let mut match_len = 0;
for bit_idx in 0..KEY_BITC {
let mask = 1 << bit_idx;
if (kvs[kv_idx].bits.0 & mask) == (bits.0 & mask) {
match_len += 1;
} else {
// invariant for each use of this function: chunk
// bits cannot differ here
assert!( bit_idx >= CHUNK_BITC );
break
}
};
all_matches.push((kv_idx, match_len))
}
return all_matches
}

// build the (conceptual) paths in the (conceptual) hash trie.
//
// concretely, this function extends a vector of jumps as it
Expand All @@ -415,8 +439,10 @@ impl <K:'static+Hash+PartialEq+Eq+Clone+Debug,
jumps: &mut Vec<Option<Art<Chunk<K,V>>>>)
-> Option<Art<Chunk<K,V>>>
{
use std::cmp::Ordering;
match chunk.here {
Here::Vec(ref kvs) => {
// look for matching key bits in this vector of key-value pairs:
for &(ref _k, ref kbits, ref _v) in kvs.iter().rev() {
if bits.0 == kbits.0 {
assert_eq!(chunk_art, None);
Expand All @@ -439,43 +465,36 @@ impl <K:'static+Hash+PartialEq+Eq+Clone+Debug,
return None
}
Some(kvs) => {
// first, look for an exact KeyBits match among the Kvs
for kv in kvs.iter() {
// compare kv.bits with bits. How many of them match?
// assert invariant: at least depth (+1?) bits match?
if bits == kv.bits {
return chunk_art.map(|x|x.clone())
}
};
// no exact match for all KeyBits here. So,
// we need to "jump" to another chunk.
let mut next_chunk_art : Option<Art<Chunk<K,V>>> = None;
Self::longest_match(bits.clone(), kvs).map( |(match_idx, match_len)| {
let curr_idx = CHUNK_BITC + jumps.len();
// Question: How to prove this?
assert!( curr_idx <= match_len );
if curr_idx <= match_len {
// progress! conceptually, we are
// "traversing" more of the trie's
// internal structure as we "traverse"
// these additional bits at >= curr_idx.
for i in curr_idx..match_len {
// copy jumps for each matching bit
jumps.push(kvs[match_idx].jumps
.get(i - CHUNK_BITC).unwrap_or(&None).clone())
let mut all_matches = Self::all_matches(bits.clone(), kvs);
all_matches.sort_by(
|&(_i, leni), &(_j, lenj)|
if leni < lenj { Ordering::Less } else { Ordering::Greater }
);
let mut pos = jumps.len() + CHUNK_BITC;
for (kv_idx, differ_pos) in all_matches {
if differ_pos == KEY_BITC {
// Exact match; we have processed all shorter matches already.
for i in pos..(kvs[kv_idx].jumps.len() + CHUNK_BITC) {
jumps.push(kvs[kv_idx].jumps[i - CHUNK_BITC].clone());
};
return chunk_art.map(|x|x.clone())

} else if pos <= differ_pos {
// this key extends match with more bits; for each, copy a jump bit
for i in pos..differ_pos {
// skip this idx if we are at it already;
jumps.push(kvs[kv_idx].jumps.get(i - CHUNK_BITC).unwrap_or(&None).clone());
};
next_chunk_art = kvs[match_idx].jumps
.get(match_len - CHUNK_BITC).unwrap_or(&None).clone();
pos = differ_pos;
jumps.push(chunk_art.map(|x|x.clone()));
pos += 1;
next_chunk_art = kvs[kv_idx].jumps
.get(differ_pos - CHUNK_BITC).unwrap_or(
&kvs[kv_idx].prev // ???
).clone();
}
else {
// TODO: Handle the case where we
// limit the jump bits, and sometimes
// we get to a "max length"
next_chunk_art = panic!("TODO: bits:{:b}, kv.bits:{:b} curr_idx:{}, match_len:{}",
bits.0, kvs[match_idx].bits.0, curr_idx, match_len)
}
});
};
// search in the next chunk, if any
match next_chunk_art {
None => None,
Expand Down Expand Up @@ -536,11 +555,14 @@ impl <K:'static+Hash+PartialEq+Eq+Clone+Debug,
// if no exact match, recur using the jumps of the longest match, if any
if vo == None {
Self::longest_match(key_bits, kvs).map(|(match_idx, match_len)|{
println!("longest_match:: key:{:?}, match_idx:{}, match_len:{}", key, match_idx, match_len);
match *kvs[match_idx].jumps.get(match_len - CHUNK_BITC).unwrap_or(&None) {
None => (),
Some(ref x) => { vo = Log(Some(force(x))).get(key) }
}})
Some(ref x) => {
// jump to chunk x and search, recursively:
vo = Log(Some(force(x))).get(key)
}
}
})
} else { None };
vo
}
Expand Down Expand Up @@ -590,23 +612,27 @@ impl <K:'static+Hash+PartialEq+Eq+Clone+Debug,
},
Here::Vec(kvs) => {
// initial kv table consists of most recent key bindings from _this chunk_:
let mut uniq_keys : HashMap<KeyBits, ()> = HashMap::new();
for (k, kbits, v) in kvs.into_iter().rev() {
let chunk_bits = Self::chunk_bits(&kbits);
if ! tab.contains_key(&chunk_bits) {
tab.insert(chunk_bits.clone(), vec![] );
};
match tab.get_mut(&chunk_bits) {
None => unreachable!(), // impossible: just inserted an empty vector here.
Some(kvs) => {
let mut jumps = Vec::new();
let prev = match chunk.prev {
None => None,
Some(ref chunk_art) => {
Self::build_path_rec
(kbits.clone(), Some(chunk_art), &force(chunk_art), &mut jumps)
}
};
kvs.push(Kv{key:k, val:v, bits:kbits, prev:prev, jumps:jumps})
if ! uniq_keys.contains_key(&kbits) {
uniq_keys.insert(kbits.clone(), ());
if ! tab.contains_key(&chunk_bits) {
tab.insert(chunk_bits.clone(), vec![] );
};
match tab.get_mut(&chunk_bits) {
None => unreachable!(), // impossible: just inserted an empty vector here.
Some(kvs) => {
let mut jumps = Vec::new();
let prev = match chunk.prev {
None => None,
Some(ref chunk_art) => {
Self::build_path_rec
(kbits.clone(), Some(chunk_art), &force(chunk_art), &mut jumps)
}
};
kvs.push(Kv{key:k, val:v, bits:kbits, prev:prev, jumps:jumps})
}
}
}
};
Expand Down Expand Up @@ -766,7 +792,6 @@ fn kvlog_vs_hashmap () {
use adapton::engine::{manage,name_of_usize,name_unit};
let mut rng = thread_rng();
let numops = 10000;
let numkeys = 100;
let gauged = true;
let gauge = 10;

Expand All @@ -776,13 +801,13 @@ fn kvlog_vs_hashmap () {
let mut t = Log::emp();

for i in 0..numops {
let r1 : usize = rng.gen(); let r1 = r1 % numkeys;
let r2 : usize = rng.gen(); let r2 = r2 % numkeys;
let r1 : usize = rng.gen(); let r1 = r1 % (i + 1);
let r2 : usize = rng.gen(); let r2 = r2 % (i + 1);
let nm = if gauged && i % gauge == 0 { Some(name_of_usize(i)) } else { None };

// Test random insertion
//if !(nm == None) { println!("=========\nname {:?}:", nm); };
println!("put-{:?}:: key {:?} maps to {:?}", i, r1, r2);
println!("put-{:?}:: k:{:?} maps to {:?}", i, r1, r2);
m.insert(r1, r2);
t.put(r1, r2);
t = match nm {
Expand All @@ -793,8 +818,8 @@ fn kvlog_vs_hashmap () {

// Test random lookup
let r3 : usize = rng.gen();
let r3 = r3 % (numkeys * 2); // Look for non-existent keys with prob 0.5
println!("get-{:?}:: key {:?} maps to {:?}", i, r3, m.get(&r3));
let r3 = r3 % (i * 2 + 1); // Look for non-existent keys with prob 0.5
println!("get-{:?}:: k:{:?} maps to {:?}", i, r3, m.get(&r3));
assert_eq!(m.get(&r3).map(|&n|n.clone()), t.get(&r3));
}
}