forked from duboviy/misc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_by_distribution.py
62 lines (42 loc) · 1.3 KB
/
get_by_distribution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python
"""
Provide convenient way to select list elements with different probability.
"""
import random
import bisect
import collections
import logging
def set_log():
logger = logging.getLogger()
logger.setLevel(LOG_LEVEL)
fm = logging.Formatter('%(filename)s [LINE:%(lineno)d]# %(levelname)-8s [%(asctime)s] %(message)s')
console = logging.StreamHandler()
console.setLevel(LOG_LEVEL)
console.setFormatter(fm)
logger.addHandler(console)
def cdf(weights):
total = sum(weights)
result = []
cum_sum = 0
for w in weights:
cum_sum += w
result.append(cum_sum/total)
return result
def get_by_distribution(collection, weights):
assert len(collection) == len(weights)
cdf_values = cdf(weights)
x = random.random()
idx = bisect.bisect(cdf_values, x)
logging.debug("cdf_values: %s x: %d idx: %d", cdf_values, x, idx)
return collection[idx]
if __name__ == '__main__':
population = 'ABC'
distribution = [0.3, 0.4, 0.3]
LOG_LEVEL = 'INFO' # 'DEBUG'
set_log()
counts = collections.defaultdict(int)
for i in range(10000):
counts[get_by_distribution(population, distribution)] += 1
logging.info(counts)
# % test.py
# defaultdict(<type 'int'>, {'A': 3066, 'C': 2964, 'B': 3970})