Skip to content

Commit

Permalink
add random sampling function
Browse files Browse the repository at this point in the history
  • Loading branch information
torao committed Mar 30, 2020
1 parent 55b8f34 commit a428c18
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 0 deletions.
102 changes: 102 additions & 0 deletions libs/rand/sampling.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package rand

import (
"fmt"
s "sort"
)

// Interface for performing weighted deterministic random selection.
type Candidate interface {
Priority() uint64
LessThan(other *Candidate) bool
}

const uint64Mask = uint64(0x7FFFFFFFFFFFFFFF)

// Select a specified number of candidates randomly from the candidate set based on each priority. This function is
// deterministic and will produce the same result for the same input.
//
// Inputs:
// seed - 64bit integer used for random selection.
// candidates - A set of candidates. The order is disregarded.
// sampleSize - The number of candidates to select at random.
// totalPriority - The exact sum of the priorities of each candidate.
//
// Returns:
// samples - A randomly selected candidate from a set of candidates. NOTE that the same candidate may have been
// selected in duplicate.
func RandomSamplingWithPriority(seed uint64, candidates []Candidate, sampleSize int, totalPriority uint64) (samples []Candidate) {

// generates a random selection threshold for candidates' cumulative priority
thresholds := make([]uint64, sampleSize)
for i := 0; i < sampleSize; i++ {
// calculating [gross weights] × [(0,1] random number]
thresholds[i] = uint64(float64(nextRandom(&seed)&uint64Mask) / float64(uint64Mask+1) * float64(totalPriority))
}
s.Slice(thresholds, func(i, j int) bool { return thresholds[i] < thresholds[j] })

// generates a copy of the set to keep the given array order
candidates = sort(candidates)

// extract candidates with a cumulative priority threshold
samples = make([]Candidate, sampleSize)
cumulativePriority := uint64(0)
undrawn := 0
for _, candidate := range candidates {
for thresholds[undrawn] < cumulativePriority+candidate.Priority() {
samples[undrawn] = candidate
undrawn++
if undrawn == len(samples) {
return
}
}
cumulativePriority += candidate.Priority()
}

// Possible factors: 1) the given total priority is less than the actual cumulative on, 2) the given candidates is
// an empty set, or 3) a bug.
actualTotalPriority := uint64(0)
for i := 0; i < len(candidates); i++ {
actualTotalPriority += candidates[i].Priority()
}
msg := fmt.Sprintf("totalPriority=%d, actualTotalPriority=%d,"+
" seed=%d, sampleSize=%d, undrawn=%d, threshold[%d]=%d",
actualTotalPriority, totalPriority, seed, sampleSize, undrawn, undrawn, thresholds[undrawn])
if len(candidates) == 0 {
msg = fmt.Sprintf("The given candidate is an empty set: %s", msg)
} else if totalPriority < actualTotalPriority {
msg = fmt.Sprintf("The given total priority %d is less than the actual one %d, or a bug: %s",
totalPriority, actualTotalPriority, msg)
}
panic(msg)
}

// SplitMix64
// http://xoshiro.di.unimi.it/splitmix64.c
//
// The PRNG used for this random selection:
// 1. must be deterministic.
// 2. should easily portable, independent of language or library
// 3. is not necessary to keep a long period like MT, since there aren't many random numbers to generate and
// we expect a certain amount of randomness in the seed.
func nextRandom(rand *uint64) uint64 {
*rand += uint64(0x9e3779b97f4a7c15)
var z = *rand
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9
z = (z ^ (z >> 27)) * 0x94d049bb133111eb
return z ^ (z >> 31)
}

// sort candidates in descending priority and ascending nature order
func sort(candidates []Candidate) []Candidate {
temp := make([]Candidate, len(candidates))
copy(temp, candidates)
s.Slice(temp, func(i, j int) bool {
if temp[i].Priority() != temp[j].Priority() {
return temp[i].Priority() > temp[j].Priority()
} else {
return temp[i].LessThan(&temp[j])
}
})
return temp
}
113 changes: 113 additions & 0 deletions libs/rand/sampling_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package rand

import (
"fmt"
"math"
s "sort"
"testing"
)

type Element struct {
Id uint32
Weight uint64
}

func (e *Element) Priority() uint64 {
return e.Weight
}

func (e *Element) LessThan(other *Candidate) bool {
o, ok := (*other).(*Element)
if ! ok {
panic("incompatible type")
}
return e.Id < o.Id
}

func TestRandomSamplingWithPriority(t *testing.T) {
candidates := newCandidates(100, func(i int) uint64 { return uint64(i) })

elected := RandomSamplingWithPriority(0, candidates, 10, uint64(len(candidates)))
if len(elected) != 10 {
t.Errorf(fmt.Sprintf("unexpected sample size: %d", len(elected)))
}

// ----
// The same result can be obtained for the same input.
others := newCandidates(100, func(i int) uint64 { return uint64(i) })
secondTimeElected := RandomSamplingWithPriority(0, others, 10, uint64(len(others)))
if len(elected) != len(secondTimeElected) || !sameCandidates(elected, secondTimeElected) {
t.Errorf(fmt.Sprintf("undeterministic: %+v != %+v", elected, others))
}

// ----
// Make sure the winning frequency will be even
candidates = newCandidates(100, func(i int) uint64 { return 1 })
counts := make([]int, len(candidates))
for i := 0; i < 100000; i++ {
elected = RandomSamplingWithPriority(uint64(i), candidates, 10, uint64(len(candidates)))
for _, e := range elected {
counts[e.(*Element).Id] += 1
}
}
expected := float64(1) / float64(100)
mean, variance, z := calculateZ(expected, counts)
if z >= 1e-15 || math.Abs(mean-expected) >= 1e-15 || variance >= 1e-5 {
t.Errorf("winning frequency is uneven: mean=%f, variance=%e, z=%e", mean, variance, z)
}
}


func newCandidates(length int, prio func(int) uint64) (candidates []Candidate) {
candidates = make([]Candidate, 100)
for i := 0; i < length; i++ {
candidates[i] = &Element{uint32(i), prio(i)}
}
return
}

func sameCandidates(c1 []Candidate, c2 []Candidate) bool {
if len(c1) != len(c2) {
return false
}
s.Slice(c1, func(i, j int) bool { return c1[i].LessThan(&c1[j]) })
s.Slice(c2, func(i, j int) bool { return c2[i].LessThan(&c2[j]) })
for i := 0; i < len(c1); i++ {
if c1[i].(*Element).Id != c2[i].(*Element).Id {
return false
}
}
return true
}

// The cumulative VotingPowers should follow a normal distribution with a mean as the expected value.
// A risk factor will be able to acquire from the value using a standard normal distribution table by
// applying the transformation to normalize to the expected value.
func calculateZ(expected float64, values []int) (mean, variance, z float64) {
sum := 0.0
for i := 0; i < len(values); i++ {
sum += float64(values[i])
}
actuals := make([]float64, len(values))
for i := 0; i < len(values); i++ {
actuals[i] = float64(values[i]) / sum
}
mean, variance = calculateMeanAndVariance(actuals)
z = (mean - expected) / math.Sqrt(variance/float64(len(values)))
return
}

func calculateMeanAndVariance(values []float64) (mean float64, variance float64) {
sum := 0.0
for _, x := range values {
sum += float64(x)
}
mean = float64(sum) / float64(len(values))
sum2 := 0.0
for _, x := range values {
dx := float64(x) - mean
sum2 += dx * dx
}
variance = sum2 / float64(len(values))
return
}

0 comments on commit a428c18

Please sign in to comment.