Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Huffman Coding Algorithm #798

Merged
merged 1 commit into from
May 13, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions compression/huffman.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import sys

class Letter:
def __init__(self, letter, freq):
self.letter = letter
self.freq = freq
self.bitstring = ""

def __repr__(self):
return f'{self.letter}:{self.freq}'


class TreeNode:
def __init__(self, freq, left, right):
self.freq = freq
self.left = left
self.right = right


def parse_file(file_path):
"""
Read the file and build a dict of all letters and their
frequences, then convert the dict into a list of Letters.
"""
chars = {}
with open(file_path) as f:
while True:
c = f.read(1)
if not c:
break
chars[c] = chars[c] + 1 if c in chars.keys() else 1
letters = []
for char, freq in chars.items():
letter = Letter(char, freq)
letters.append(letter)
letters.sort(key=lambda l: l.freq)
return letters

def build_tree(letters):
"""
Run through the list of Letters and build the min heap
for the Huffman Tree.
"""
while len(letters) > 1:
left = letters.pop(0)
right = letters.pop(0)
total_freq = left.freq + right.freq
node = TreeNode(total_freq, left, right)
letters.append(node)
letters.sort(key=lambda l: l.freq)
return letters[0]

def traverse_tree(root, bitstring):
"""
Recursively traverse the Huffman Tree to set each
Letter's bitstring, and return the list of Letters
"""
if type(root) is Letter:
root.bitstring = bitstring
return [root]
letters = []
letters += traverse_tree(root.left, bitstring + "0")
letters += traverse_tree(root.right, bitstring + "1")
return letters

def huffman(file_path):
"""
Parse the file, build the tree, then run through the file
again, using the list of Letters to find and print out the
bitstring for each letter.
"""
letters_list = parse_file(file_path)
root = build_tree(letters_list)
letters = traverse_tree(root, "")
print(f'Huffman Coding of {file_path}: ')
with open(file_path) as f:
while True:
c = f.read(1)
if not c:
break
le = list(filter(lambda l: l.letter == c, letters))[0]
print(le.bitstring, end=" ")
print()

if __name__ == "__main__":
# pass the file path to the huffman function
huffman(sys.argv[1])