Skip to content

Commit

Permalink
deterministic generator
Browse files Browse the repository at this point in the history
  • Loading branch information
youknowone authored and progval committed Mar 1, 2023
1 parent e5f3485 commit 7f70a76
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 39 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ path = "unicode_names2_macros"
version = ">=0.3, <0.7"

[dev-dependencies]
rand = "0.5.1"
rand = "0.8.5"
rand_xorshift = "0.3.0"
70 changes: 65 additions & 5 deletions generator/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion generator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ unstable = []
time = "0.1.10"
log = "0"
getopts = "0.2.21"
rand = "0.3.14"
rand = "0.8.5"
rand_xorshift = "0.3.0"
15 changes: 7 additions & 8 deletions generator/src/phf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
extern crate time;

use rand::{XorShiftRng, Rng, self};

use rand::prelude::{Rng, SeedableRng, SliceRandom, StdRng};
use std::iter::repeat;

static NOVAL: char = '\0';
Expand Down Expand Up @@ -35,7 +34,7 @@ fn split(hash: u64) -> Hash {
struct Hash { g: u32, f1: u32, f2: u32 }

fn try_phf_table(values: &[(char, String)],
lambda: usize, seed: u64) -> Option<(Vec<(u32, u32)>, Vec<char>)> {
lambda: usize, seed: u64, rng: &mut StdRng) -> Option<(Vec<(u32, u32)>, Vec<char>)> {

let hashes: Vec<_> =
values.iter().map(|&(n, ref s)| (split(hash(s, seed)), n)).collect();
Expand Down Expand Up @@ -80,9 +79,8 @@ fn try_phf_table(values: &[(char, String)],
// shuffle them.
let mut d1s = (0..(table_len as u32)).collect::<Vec<_>>();
let mut d2s = d1s.clone();
let mut rng: XorShiftRng = rand::random();
rng.shuffle(&mut d1s);
rng.shuffle(&mut d2s);
d1s.shuffle(rng);
d2s.shuffle(rng);

// run through each bucket and try to fit the elements into the
// array by choosing appropriate adjusting factors
Expand Down Expand Up @@ -132,14 +130,15 @@ fn try_phf_table(values: &[(char, String)],

pub fn create_phf(data: &[(char, String)], lambda: usize,
max_tries: usize) -> (u64, Vec<(u32, u32)>, Vec<char>) {
let mut rng = StdRng::seed_from_u64(0xf0f0f0f0);
let start = time::precise_time_s();

for i in 0..(max_tries) {
let my_start = time::precise_time_s();
println!("PHF #{}: starting {:.2}", i, my_start - start);

let seed = rand::random();
match try_phf_table(data, lambda, seed) {
let seed = rng.gen();
match try_phf_table(data, lambda, seed, &mut rng) {
Some((disp, map)) => {
let end = time::precise_time_s();
println!("PHF took: total {:.2} s, successive {:.2} s",
Expand Down
38 changes: 16 additions & 22 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,13 @@ static ASCII_UPPER_MAP: [u8; 256] = [
mod tests {
use std::prelude::v1::*;
use std::char;
use rand::{seq, XorShiftRng, SeedableRng};
use rand::{
prelude::{SeedableRng, StdRng},
distributions::{Standard, Distribution},
};

use test::{self, Bencher};
use super::{generated, name, character, is_cjk_unified_ideograph, jamo, Name};
use super::{generated, name, character, is_cjk_unified_ideograph, jamo};

static DATA: &'static str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"),
"/data/UnicodeData.txt"));
Expand Down Expand Up @@ -620,18 +623,14 @@ mod tests {
#[bench]
fn name_10000_invalid(b: &mut Bencher) {
// be consistent across runs, but avoid sequential/caching.
let mut rng: XorShiftRng = SeedableRng::from_seed([0xFF, 0x00, 0xFF, 0x00, 0xF0, 0xF0, 0xF0, 0xF0,
0x00, 0xFF, 0x00, 0xFF, 0x0F, 0x0F, 0x0F, 0x0F]);
let chars = seq::sample_iter(&mut rng,
(0u32..0x10FFFF)
.filter_map(|x| {
match char::from_u32(x) {
Some(c) if name(c).is_none() => Some(c),
let mut rng = StdRng::seed_from_u64(0x12345678);
let chars: Vec<char> = Standard.sample_iter(&mut rng).take(10000)
.filter_map(|c| {
match c {
c if name(c).is_none() => Some(c),
_ => None
}
}),
10000)
.unwrap();
}).collect();

b.iter(|| {
for &c in chars.iter() {
Expand Down Expand Up @@ -662,16 +661,11 @@ mod tests {
#[bench]
fn character_10000(b: &mut Bencher) {
// be consistent across runs, but avoid sequential/caching.
let mut rng: XorShiftRng = SeedableRng::from_seed([0xFF, 0x00, 0xFF, 0x00, 0xF0, 0xF0, 0xF0, 0xF0,
0x00, 0xFF, 0x00, 0xFF, 0x0F, 0x0F, 0x0F, 0x0F]);

let names = seq::sample_iter(&mut rng,
(0u32..0x10FFFFF).filter_map(|x| char::from_u32(x).and_then(name)),
10000)
.unwrap()
.iter()
.map(|n: &Name| n.to_string())
.collect::<Vec<_>>();
let mut rng = StdRng::seed_from_u64(0x12345678);

let names: Vec<_> = Standard.sample_iter(&mut rng).take(10000).filter_map(name)
.map(|name| name.to_string())
.collect();

b.iter(|| {
for n in names.iter() {
Expand Down
4 changes: 2 additions & 2 deletions src/phf.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
extern crate time;
use std::rand::{XorShiftRng, Rng, mod};
use std::rand::{StdRng, Rng, mod};
use std::collections::HashMap;
use std::hash::sip;

Expand Down Expand Up @@ -40,7 +40,7 @@ fn try_phf_table(values: &[String], lambda: uint, seed: u64) -> Option<(Vec<(u32

let mut d1s = Vec::from_fn(table_len, |i| i as u32);
let mut d2s = d1s.clone();
let mut rng: XorShiftRng = rand::random();
let mut rng: StdRng = rand::random();

'next_bucket: for &(bkt_idx, ref bkt_keys) in buckets.iter() {
rng.shuffle(d1s.as_mut_slice());
Expand Down

0 comments on commit 7f70a76

Please sign in to comment.