forked from jliszka/probability-monad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Distribution.scala
206 lines (170 loc) · 6.54 KB
/
Distribution.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import scala.util.Random
import scala.annotation.tailrec
trait Distribution[A] {
self =>
protected def get: A
override def toString = "<distribution>"
def map[B](f: A => B): Distribution[B] = new Distribution[B] {
override def get = f(self.get)
}
def flatMap[B](f: A => Distribution[B]): Distribution[B] = new Distribution[B] {
override def get = f(self.get).get
}
def filter(pred: A => Boolean): Distribution[A] = new Distribution[A] {
@tailrec
override def get = {
val s = self.get
if (pred(s)) s else this.get
}
}
def given(pred: A => Boolean): Distribution[A] = filter(pred)
def until(pred: List[A] => Boolean): Distribution[List[A]] = new Distribution[List[A]] {
override def get = {
@tailrec
def helper(sofar: List[A]): List[A] = {
if (pred(sofar)) sofar
else helper(self.get :: sofar)
}
helper(Nil)
}
}
def repeat(n: Int): Distribution[List[A]] = until(_.length == n)
@tailrec
final def iterate(n: Int, f: A => Distribution[A]): Distribution[A] = {
if (n == 0) this
else this.flatMap(f).iterate(n-1, f)
}
def iterateUntil(pred: A => Boolean, f: A => Distribution[A]): Distribution[A] = new Distribution[A] {
override def get = {
@tailrec
def helper(a: A): A = {
if (pred(a)) a
else helper(f(a).get)
}
helper(self.get)
}
}
private val nn = 1000
def pr(pred: A => Boolean, given: A => Boolean = (a: A) => true, samples: Int = nn): Double = {
1.0 * this.filter(given).sample(samples).count(pred) / samples
}
// NB: Expected value only makes sense for real-valued distributions. If you want to find the expected
// value of a die roll, for example, you have to do die.map(_.toDouble).ev.
def ev(implicit f: Fractional[A]): A = f.div(sample(nn).foldLeft(f.zero)(f.plus), f.fromInt(nn))
def sample(n: Int = nn): List[A] = List.fill(n)(self.get)
def zip[B](d: Distribution[B]): Distribution[(A, B)] = new Distribution[(A, B)] {
override def get = (self.get, d.get)
}
def zipWith[B, C](d: Distribution[B])(f: (A, B) => C): Distribution[C] = new Distribution[C] {
override def get = f(self.get, d.get)
}
def +(d: Distribution[A])(implicit n: Numeric[A]): Distribution[A] = new Distribution[A] {
override def get = n.plus(self.get, d.get)
}
def -(d: Distribution[A])(implicit n: Numeric[A]): Distribution[A] = new Distribution[A] {
override def get = n.minus(self.get, d.get)
}
def *(d: Distribution[A])(implicit n: Numeric[A]): Distribution[A] = new Distribution[A] {
override def get = n.times(self.get, d.get)
}
def /(d: Distribution[A])(implicit f: Fractional[A]): Distribution[A] = new Distribution[A] {
override def get = f.div(self.get, d.get)
}
def hist = {
this.sample(nn).groupBy(x=>x).mapValues(_.length.toDouble / nn)
}
def plotHist(implicit ord: Ordering[A] = null) = {
val histogram = hist.toList
val sorted = if (ord == null) histogram else histogram.sortBy(_._1)(ord)
doPlot(sorted)
}
def plotBucketedHist(buckets: Int)(implicit ord: Ordering[A], frac: Fractional[A]) = {
val histogram = hist.toList.sortBy(_._1)(ord)
val min = histogram.first._1
val max = histogram.last._1
val bucketWidth = frac.div(frac.minus(max, min), frac.fromInt(buckets))
val halfBucketWidth = frac.div(bucketWidth, frac.fromInt(2))
def toBucket(a: A) = frac.toInt(frac.div(frac.minus(a, min), bucketWidth))
def fromBucket(n: Int) = frac.plus(frac.plus(frac.times(frac.fromInt(n), bucketWidth), min), halfBucketWidth)
val bucketed = histogram.map{ case (a, p) => (fromBucket(toBucket(a)), p) }
.groupBy(_._1)
.mapValues(_.map(_._2).sum)
.toList.sortBy(_._1)(ord)
doPlot(bucketed)
}
private def doPlot(data: Iterable[(A, Double)]) = {
val scale = 100
data.foreach{ case (a, p) => {
val hashes = (p * scale).toInt
println("%s %5.2f%% %s".format(a.toString, p*100, "#" * hashes))
}}
}
}
object Distribution {
private val rand = new Random()
def always[A](value: A) = new Distribution[A] {
override def get = value
}
object u extends Distribution[Double] {
override def get = rand.nextDouble()
}
object normal extends Distribution[Double] {
override def get = rand.nextGaussian()
}
sealed abstract class Coin
case object H extends Coin
case object T extends Coin
def coin = discreteUniform(List(H, T))
def biasedCoin(p: Double) = discrete(List(H -> p, T -> (1-p)))
def d(n: Int) = discreteUniform(1 to n)
def die = d(6)
def dice(n: Int) = die.repeat(n)
def tf(p: Double = 0.5) = discrete(List(true -> p, false -> (1-p)))
def uniform(lo: Double, hi: Double) = u.map(x => (x * (hi - lo)) + lo)
def discreteUniform[A](values: Iterable[A]): Distribution[A] = new Distribution[A] {
private val vec = Vector() ++ values
override def get = vec(rand.nextInt(vec.length))
}
def discrete[A](weightedValues: Iterable[(A, Double)]): Distribution[A] = new Distribution[A] {
val len = weightedValues.size
val scale = len / weightedValues.map(_._2).sum
val scaled = weightedValues.map{ case (a, p) => (a, p * scale) }.toList
val (smaller, bigger) = scaled.partition(_._2 < 1.0)
private def alias(smaller: List[(A, Double)], bigger: List[(A, Double)]): List[(A, Double, Option[A])] = {
smaller match {
case Nil => bigger.map{ case (a, _) => (a, 1.0, None) }
case (s, sp)::ss => {
val (b, pb)::bb = bigger
val remainder = (b, pb - (1.0 - sp))
val rest = if (remainder._2 < 0.9999) alias(remainder :: ss, bb) else alias(ss, remainder :: bb)
(s, sp, Some(b)) :: rest
}
}
}
val table = Vector() ++ alias(smaller, bigger)
private def select(p1: Double, p2: Double, table: Vector[(A, Double, Option[A])]): A = {
table((p1 * len).toInt) match {
case (a, _, None) => a
case (a, p, Some(b)) => if (p2 <= p) a else b
}
}
override def get = {
select(u.get, u.get, table)
}
}
def poisson(lambda: Double): Distribution[Int] = new Distribution[Int] {
override def get = {
val m = math.exp(-lambda)
val s = Stream.iterate(1.0)(_ * u.get)
s.tail.takeWhile(_ > m).length
}
}
def chi2(n: Int) = List.fill(n)(normal).map(x => x*x).reduceLeft[Distribution[Double]](_ + _)
lazy val cauchy = normal / normal
def markov[A](init: A, steps: Int)(transition: A => Distribution[A]): Distribution[A] = {
always(init).iterate(steps, transition)
}
def markov[A](init: A)(stop: A => Boolean, transition: A => Distribution[A]): Distribution[A] = {
always(init).iterateUntil(stop, transition)
}
}