-
Notifications
You must be signed in to change notification settings - Fork 4
/
utility.h
226 lines (202 loc) · 7.45 KB
/
utility.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// This header defines many utility functions
#ifndef UTILITY_H
#define UTILITY_H
#include "base.h"
#include <iostream>
#include <vector>
#include <cmath>
namespace db_compress {
/*
* Dynamic List behaves like multi-dimensional array, except that the number of dimensions
* can vary across different instances. The number of dimensions need to be specified in
* the constructor.
*/
template<class T>
class DynamicList {
private:
std::vector<T> dynamic_list_;
std::vector<size_t> index_cap_;
public:
DynamicList(const std::vector<size_t>& index_cap);
T& operator[](const std::vector<size_t>& index);
const T& operator[](const std::vector<size_t>& index) const;
T& operator[](int index) { return dynamic_list_[index]; }
const T& operator[] (int index) const { return dynamic_list_[index]; }
size_t size() const { return dynamic_list_.size(); }
};
template<class T>
DynamicList<T>::DynamicList(const std::vector<size_t>& index_cap) :
index_cap_(index_cap) {
int size = 1;
for (size_t i = 0; i < index_cap.size(); ++i)
size *= index_cap[i];
dynamic_list_.resize(size);
dynamic_list_.shrink_to_fit();
}
template<class T>
T& DynamicList<T>::operator[](const std::vector<size_t>& index) {
if (index.size() != index_cap_.size()) {
std::cerr << "Inconsistent Dynamic List Index Length\n";
}
size_t pos = 0;
for (size_t i = 0; i < index.size(); ++i )
pos = pos * index_cap_[i] + index[i];
return dynamic_list_[pos];
}
template<class T>
const T& DynamicList<T>::operator[](const std::vector<size_t>& index) const {
if (index.size() != index_cap_.size()) {
std::cerr << "Inconsistent Dynamic List Index Length\n";
}
size_t pos = 0;
for (size_t i = 0; i < index.size(); ++i )
pos = pos * index_cap_[i] + index[i];
return dynamic_list_[pos];
}
/*
* The quantization procedure transforms raw count of each individual bins to probability
* interval boundaries, such that all probability boundary will have base 16. This function
* will try to minimize cross entropy during quantization
*/
void Quantization(std::vector<Prob>* prob, const std::vector<int>& cnt, int base);
/*
* The following functions are used to generate Prob structure, or cast Prob
* structure to primitive types
*/
// Prob = count / (2^base)
inline Prob GetProb(int count, int base) { return Prob(count, base); }
// Round to base 16
inline Prob GetProb(double value) { return Prob((int)round(value * (1 << 16)), 16); }
inline Prob GetZeroProb() { return Prob(0, 0); }
inline Prob GetOneProb() { return Prob(1, 0); }
inline UnitProbInterval GetWholeProbInterval() { return UnitProbInterval(0, 0); }
// Trunc to nearest value within that base
inline int CastInt(const Prob& prob, int base) {
if (prob.exp <= base)
return prob.num << (base - prob.exp);
else
return prob.num >> (prob.exp - base);
}
inline double CastDouble(const Prob& prob) {
return (double)prob.num / ((long long)1 << prob.exp);
}
/*
* The following are operators on Prob structure
*/
inline bool operator<(const Prob& left, const Prob& right) {
return (left.num << (40 - left.exp)) < (right.num << (40 - right.exp));
}
inline bool operator<=(const Prob& left, const Prob& right) {
return (left.num << (40 - left.exp)) <= (right.num << (40 - right.exp));
}
inline bool operator>=(const Prob& left, const Prob& right) {
return (left.num << (40 - left.exp)) >= (right.num << (40 - right.exp));
}
inline bool operator>(const Prob& left, const Prob& right) {
return (left.num << (40 - left.exp)) > (right.num << (40 - right.exp));
}
inline bool operator==(const Prob& left, const Prob& right) {
return (left.num << (40 - left.exp)) == (right.num << (40 - right.exp));
}
inline bool operator!=(const Prob& left, const Prob& right) {
return (left.num << (40 - left.exp)) != (right.num << (40 - right.exp));
}
inline Prob operator+(const Prob& left, const Prob& right) {
if (left.exp < right.exp)
return Prob((left.num << (right.exp - left.exp)) + right.num, right.exp);
else
return Prob(left.num + (right.num << (left.exp - right.exp)), left.exp);
}
inline Prob operator-(const Prob& left, const Prob& right) {
if (left.exp < right.exp)
return Prob((left.num << (right.exp - left.exp)) - right.num, right.exp);
else
return Prob(left.num - (right.num << (left.exp - right.exp)), left.exp);
}
inline Prob operator*(const Prob& left, const Prob& right) {
return Prob(left.num * right.num, left.exp + right.exp);
}
// Get the length of given ProbInterval
inline Prob GetLen(const ProbInterval& PI) { return PI.r - PI.l; }
/*
* Compute the product of probability intervals and emits bytes when possible,
* the emitted bytes are directly concatenated to the end of emit_bytes (i.e., do
* not initialize emit_bytes)
*/
ProbInterval GetPIProduct(const ProbInterval& left, const ProbInterval& right,
std::vector<unsigned char>* emit_bytes);
/*
* Reduce a vector of probability intervals to their product
*/
ProbInterval ReducePIProduct(const std::vector<ProbInterval>& vec,
std::vector<unsigned char>* emit_bytes);
/*
* Compute the ratio point of the ProbInterval
*/
Prob GetPIRatioPoint(const ProbInterval& PI, const Prob& ratio);
/*
* Reduce ProbInterval according given bytes
*/
void ReducePI(UnitProbInterval* PIt, const std::vector<unsigned char>& bytes);
/*
* Get value of cumulative distribution function of exponential distribution
*/
inline double GetCDFExponential(double lambda, double x) {
return 1 - exp(-x / lambda);
}
/*
* Convert val to a single precision float number.
*/
void QuantizationToFloat32Bit(double* val);
/*
* Convert single precision float number to raw bytes.
*/
void ConvertSinglePrecision(double val, unsigned char bytes[4]);
/*
* Convert single precision float number from raw bytes.
*/
double ConvertSinglePrecision(unsigned char bytes[4]);
/*
* Extract one byte from 32-bit unsigned int
*/
inline unsigned char GetByte(unsigned bits, int start_pos) {
return (unsigned char)(((bits << start_pos) >> 24) & 0xff);
}
/*
* BitStirng stucture stores bit strings in the format of:
* bits[0], bits[1], ..., bits[n]
* 1-32 bits, 33-64 bits, ..., n*32+1-n*32+k bits
*
* bits[n] = **************000000000000000
* |-- k bits---|---32-k bits--|
* length = n*32+k
*/
struct BitString {
std::vector<unsigned> bits;
unsigned length;
inline void Clear() {
bits.clear();
length = 0;
}
};
// Concatenate one byte
void StrCat(BitString* str, unsigned char byte);
// Only takes the least significant bits
void StrCat(BitString* str, unsigned bits, int len);
// Concatenate two BitStrings
void StrCat(BitString* str, const BitString& cat);
// PadBitString is used to pad zeros in the BitString as suffixes
inline void PadBitString(BitString* bit_string, size_t target_length) {
bit_string->length = target_length;
while (bit_string->bits.size() < (target_length + 31) / 32)
bit_string->bits.push_back(0);
}
// Note that prefix_length need to be less than 32.
inline unsigned ComputePrefix(const BitString& bit_string, int prefix_length) {
int shift_bit = 32 - prefix_length;
return (bit_string.bits[0] >> shift_bit) & ((1 << prefix_length) - 1);
}
// Find the minimal BitString which has the corresponding probability interval lies within [l, r]
void GetBitStringFromProbInterval(BitString *str, const ProbInterval& prob);
} // namespace db_compress
#endif