-
Notifications
You must be signed in to change notification settings - Fork 0
/
Huffman.cpp
415 lines (336 loc) · 11.1 KB
/
Huffman.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
#include "Huffman.h"
#include "File.h"
#include <cmath>
/*
Helper method used to print any huffman tree
Not used in the actual application
*/
void PrintInorderTraversal(HuffNode* root)
{
if(root == nullptr) return;
PrintInorderTraversal(root->left);
PrintInorderTraversal(root->right);
if(root->key != -1)
{
std::cout << "Key : " << root->key << " Freq : " << root->frequency << '\n';
}
else
{
std::cout << "Key : NULL Freq : " << root->frequency << '\n';
}
}
/*
Helper method to print huffman tree in depth first traversel
Not used in the actual applcation
*/
void PrintDFS(HuffNode* root)
{
std::queue<HuffNode*> q;
q.push(root);
while(!q.empty())
{
HuffNode* front = q.front();
q.pop();
if(front->key != -1)
std::cout << "Key : " << front->key << " Freq : " << front->frequency << '\n';
else
std::cout << "Key : NULL Freq : " << front->frequency << '\n';
if(front->left != nullptr) q.push(front->left);
if(front->right != nullptr) q.push(front->right);
}
}
/*
Generate the min heap of huffman tree from given message
*/
HuffNode* CreateMinHeap(const std::string& message)
{
// count frequncy of occurance of each character in the message
std::unordered_map<char, int> freq_counter;
for(const char& c : message)
{
if(freq_counter.find(c) != freq_counter.end())
freq_counter[c]++;
else
freq_counter[c] = 1;
}
// extract the data in a vector to convert it into heap
// HuffNodeComparator is a comparsion fuction used to sort the priority queue by frequency of chars
std::priority_queue<HuffNode*, std::vector<HuffNode*>, HuffNodeComparator> pq;
// push nodes into priority queue to consturct the min heap
for(const auto& item : freq_counter)
{
HuffNode* newNode = new HuffNode(item.first, item.second, nullptr, nullptr);
pq.push(newNode);
}
// delete the data from hashmap to free memory
freq_counter.clear();
// print priority queue
/*const int pqsize = pq.size();
for(int i = 0; i < pqsize; i++)
{
auto top = pq.top(); pq.pop();
std::cout << "Key : " << top->key << " freq : " << top->frequency << '\n';
}*/
// constructing the min heap
while(pq.size() > 1)
{
HuffNode* node1 = pq.top(); pq.pop();
HuffNode* node2 = pq.top(); pq.pop();
HuffNode* newNode = new HuffNode(-1, node1->frequency + node2->frequency, node1, node2);
pq.push(newNode);
}
return pq.top();
}
/*
Helper method to find if a given HuffNode is leaf i.e node beloning to a character
*/
bool isLeafNode(HuffNode* node) {
if(node->left == nullptr && node->right == nullptr) return true;
else return false;
}
/*
consturct symbol table from min-heap
Takes in vector reference and stores the symbol table in this vector (2nd arguement)
*/
void FindSymbolTable(HuffNode* root, std::string prevCode,std::vector<SymbolCodePair>& symbolTable)
{
if(root == nullptr) return;
if(isLeafNode(root))
{
SymbolCodePair scp(root->key, prevCode);
symbolTable.push_back(scp);
}
else
{
FindSymbolTable(root->left, prevCode + "0", symbolTable);
FindSymbolTable(root->right, prevCode + "1", symbolTable);
}
}
/*
Takes a symbol table and message and encodes it
*/
std::string EncodeMessage(const std::string& message, const std::vector<SymbolCodePair>& symbolTable)
{
std::unordered_map <char, std::string> table;
for(const SymbolCodePair& scp : symbolTable)
{
table[scp.key] = scp.code;
}
std::string output = "";
for(const char& c : message)
{
output += table[c];
}
return output;
}
int BinaryStringToInt(const std::string& bin)
{
int res = 0;
int factor = 1;
for(const char& c : bin)
{
if(c == '1') res += factor;
factor *= 2;
}
return res;
}
/*
Saves symbol table and encoded message
*/
void SaveHuffmanData(const std::string& filepath, const std::vector<SymbolCodePair>& symbolTable, std::string& encodedMessage)
{
/*
1. Create a output stream
2. Write size of symbol table and symbol table
3. Write size of encoded message
4. Byte complete the encoded message by appedning 0s at end
5. Take one byte(8 chars) at a time from encodedMessage and convert to char and write
*/
std::cout << "Saving huffman data\n";
/*for(int i = 0; i < encodedMessage.length(); i++)
{
if(i % 8 == 0) std::cout << ' ';
std::cout << encodedMessage[i];
}*/
std::ofstream writeStream(filepath, std::ios::out | std::ios::binary);
int sizeOfSymbolTable = symbolTable.size();
writeStream << sizeOfSymbolTable << '\n';
for(int i = 0; i < symbolTable.size(); i++)
{
writeStream << (int)symbolTable[i].key << '\n' << symbolTable[i].code << '\n';
}
int encodedMessageLength = encodedMessage.length();
writeStream << encodedMessageLength << '\n';
int charsToAdd = 8 - (encodedMessageLength % 8);
charsToAdd = charsToAdd == 8 ? 0 : charsToAdd;
for(int i = 0; i < charsToAdd; i++)
encodedMessage += '0';
// convert encoded message to string of bytes
// find size of buffer needed
size_t nBytesToWrite = encodedMessage.length() / 8;
// construct buffer and fill random value
std::string byteData(nBytesToWrite, ' ');
// index for running through encoded message, bytesConverted to run through buffer
size_t index = 0, bytesConverted = 0;
// var to store current byte under proceessing
std::string currentByteString = "";
std::cout << "\n\n";
while(index < encodedMessage.length() && bytesConverted < nBytesToWrite)
{
currentByteString += encodedMessage[index];
if(currentByteString.length() == 8 && bytesConverted < nBytesToWrite)
{
// process
int byteValue = BinaryStringToInt(currentByteString) - 128;
byteData[bytesConverted] = (char)byteValue;
currentByteString.clear();
bytesConverted++;
}
index++;
}
writeStream.write(&byteData[0], nBytesToWrite);
/*int index = 0;
std::string currentByte = "";
while(index < encodedMessage.size())
{
currentByte += encodedMessage[index];
if(currentByte.length() == 8)
{
// process byte
int byteValue = 0;
int factor = 1;
for(char c : currentByte)
{
if(c == '1') byteValue += factor;
factor *= 2;
}
// convert from range 0 to 255 to -128 to 127
byteValue -= 128;
writeStream << (char)byteValue;
currentByte.clear();
}
index++;
}*/
writeStream.close();
}
void HuffmanCompression(const std::string& inputFilepath, const std::string& outputFilepath)
{
std::vector<byte> byteStream = ReadFileDataRaw(inputFilepath);
std::string message(byteStream.begin(), byteStream.end());
HuffNode* root = CreateMinHeap(message);
std::vector<SymbolCodePair> symbolTable;
FindSymbolTable(root, "", symbolTable);
std::string encodedMessage = EncodeMessage(message, symbolTable);
std::cout << "\n\n";
//std::cout << "Orignial message : " << message << '\n';
//std::cout << "Encoded message : " << encodedMessage << '\n';
std::cout << "\n\n";
std::cout << "Size of original message " << 8 * message.length() << " bits\n";
std::cout << "Size of encoded message " << encodedMessage.length() << " bits\n";
SaveHuffmanData(outputFilepath, symbolTable, encodedMessage);
}
std::string IntTo8BitBinaryString(int val)
{
std::string bin = "";
while(val > 0)
{
if(val % 2 == 1) bin += '1';
else bin += '0';
val /=2;
}
while(bin.length() < 8)
bin += '0';
return bin;
}
/*
Takes in filepath
Reads symbol table and encoded message from it
Saves it in the corresponding references
*/
void LoadHuffmanData(const std::string& filepath, std::vector<SymbolCodePair>& symbolTable, int& encodedMessageLength,std::string& encodedMessage)
{
/*
1. Create an input stream
2. Read size of symbol table and symbol table iteself
3. Read the size of encoded message
4. Read encoded message char wise, convert to string of byte
5. Trim the appended part
*/
std::ifstream readStream(filepath, std::ios::in | std::ios::binary);
int symbolTableSize = 0;
readStream >> symbolTableSize;
for(int i = 0; i < symbolTableSize; i++)
{
int key; std::string code;
readStream >> key >> code;
SymbolCodePair scp((char)key, code);
symbolTable.push_back(scp);
}
/*for(auto scp : symbolTable)
{
std::cout << "key : " << scp.key << " code : " << scp.code << '\n';
}*/
readStream >> encodedMessageLength;
//std::cout << "Encoded message length " << encodedMessageLength << '\n';
size_t nBytesToRead = (size_t)ceil((float)encodedMessageLength / 8);
//std::cout << "nBytesToRead " << nBytesToRead << '\n';
std::string byteData(nBytesToRead + 1, ' ');
if(readStream.is_open())
{
readStream.read(&byteData[0], nBytesToRead + 1);
}
for(int i = 1; i < nBytesToRead + 1; i++)
{
std::string code = IntTo8BitBinaryString((int)byteData[i] + 128);
//std::cout << i << ' ' << code << '\n';
encodedMessage += code;
}
/*int index = 0;
while(index < ceil((float)encodedMessageLength / 8))
{
char c;
readStream >> c;
int byteValue = c + 128;
std::string binString = IntTo8BitBinaryString(byteValue);
encodedMessage += binString;
index++;
}
std::cout << "Loaded huffman data\n";
std::cout << encodedMessage;*/
/*while(readStream.peek() != EOF)
{
char c;
readStream >> c;
int byteValue = c + 128;
std::string binString = IntTo8BitBinaryString(byteValue);
encodedMessage += binString;
}*/
}
void HuffmanDecompression(const std::string& inputFilepath, const std::string& outputFilepath)
{
std::ofstream writeStream(outputFilepath, std::ios::out | std::ios::binary);
int encodedMessageLength = 0;
std::string encodedMessage = "";
std::vector<SymbolCodePair> symbolTable;
LoadHuffmanData(inputFilepath, symbolTable, encodedMessageLength, encodedMessage);
std::unordered_map < std::string, char> symbolTableCache;
for(const SymbolCodePair& scp : symbolTable)
{
symbolTableCache[scp.code] = scp.key;
}
symbolTable.clear();
std::string currentBitSequence = "";
int index = 0;
while(index < encodedMessageLength)
{
currentBitSequence += encodedMessage[index];
if(symbolTableCache.find(currentBitSequence) != symbolTableCache.end())
{
// found
writeStream << symbolTableCache[currentBitSequence];
currentBitSequence.clear();
}
index++;
}
writeStream.close();
}