This repository has been archived by the owner on Aug 13, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
approximate_counter.go
113 lines (97 loc) · 2.69 KB
/
approximate_counter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package main
import (
"encoding/json"
"sort"
)
// ApproximateCounter is a counter using streaming counting algorithm
//
// Lossy counting algorithm [Manku-Motwani2002]
// bucketSize = 1 / epsilon
//
// [Manku-Motwani2002]:
// Gurmeet Singh Manku & Rajeev Motwani. (2002)
// Approximate Frequency Counts over Data Streams.
// VLDB'02, 346 - 357.
// http://dl.acm.org/citation.cfm?id=1287400
type ApproximateCounter struct {
counts map[string]int
errors map[string]int
epsilon float64
supportThreshold float64
iBucket int
iItem int
bucketSize int
Counter
}
func (counter ApproximateCounter) getCountingResult() map[string]int {
return counter.counts
}
func (counter *ApproximateCounter) initialize() {
counter.counts = make(map[string]int, counter.bucketSize)
counter.errors = make(map[string]int, counter.bucketSize)
}
func (counter *ApproximateCounter) truncateNegligibleItems() {
for k := range counter.counts {
if counter.counts[k]+counter.errors[k] <= counter.iBucket {
delete(counter.counts, k)
delete(counter.errors, k)
}
}
counter.iBucket++
}
func (counter *ApproximateCounter) truncateBySupport() {
for k := range counter.counts {
if float64(counter.counts[k]) < (counter.supportThreshold-counter.epsilon)*float64(counter.iItem) {
delete(counter.counts, k)
delete(counter.errors, k)
}
}
}
func (counter *ApproximateCounter) increment(item string) int {
counter.iItem++
_, itemExists := counter.counts[item]
if itemExists {
counter.counts[item]++
} else {
counter.counts[item] = 1
counter.errors[item] = counter.iBucket - 1
}
if counter.iItem%counter.bucketSize == 0 {
counter.truncateNegligibleItems()
}
return counter.counts[item]
}
func (counter ApproximateCounter) get(item string) int {
c, exists := counter.counts[item]
if exists {
return c + counter.errors[item]
}
return 0
}
func (counter ApproximateCounter) getSize() uint64 {
return uint64(len(counter.counts))
}
func (counter ApproximateCounter) toJSON() string {
for k, v := range counter.counts {
counter.counts[k] = v + counter.errors[k]
}
entryList := EntryList{}
for k, v := range counter.counts {
entry := Entry{k, v}
entryList = append(entryList, entry)
}
sort.Sort(sort.Reverse(entryList))
s, _ := json.Marshal(entryList)
return string(s)
}
// NewApproximateCounter constructs ApproximateCounter struct
func NewApproximateCounter(epsilon float64, supportThreshold float64) *ApproximateCounter {
counter := &ApproximateCounter{}
counter.iBucket = 1
counter.iItem = 0
counter.supportThreshold = supportThreshold
counter.epsilon = epsilon
counter.bucketSize = int(1.0 / epsilon)
counter.initialize()
return counter
}