@virnuls/

# Huffman Coding

## Codes text using Huffman Coding and compares that with using ASCII

Files
• main.py
main.py
```1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
```
```""" This program* creates a Huffman code for the input text.  Note that if you're using an example from a book, the tree might not be the same if there are ties in ranking/frequency, and in some cases the branches (i.e. 0s and 1s) will be reversed, but the total number of bits used will be the same.
[* which I wrote from first principles without any reference to other algorithms, so there may be better ways to do it!]"""

# space out tree so that children of node n
# are at positions 2n and 2n+1
def treeify(n):
# insertion point (i.e. index) for any padding
global insert
# while the list isn't long enough for the
# children to be in the right place...
while (n*2 + 1) >= len(tree):
# ...then pad it with blanks
tree.insert(insert,"")
# if the two child nodes concatenated don't give
# you the parent node then they're not in the
# right place, so...
while tree[n] != tree[n*2] + tree[n*2 + 1] and tree[n] != tree[n*2 + 1] + tree[n*2]:
# ...pad out the list with blanks
tree.insert(insert,"")
# ensure the next insertion point is after
# nodes that are now in the right place
if insert <= n*2 + 1:
insert = n*2 + 2

# return the code for a character
def codify(char):
# start at the root node
pos = 1
code = ""
# while we haven't found the letter
while tree[pos] != char:
# is the letter in the left child?
if char in tree[pos*2]:
# add a 1 to the code
code += "1"
# go to the left child
pos = pos * 2
else:
# add a zero to the code
code += "0"
# go to the right child
pos = pos * 2 + 1
return code

text = ""
while text == "":
text = input("\nText to encode: ")
# use a dictionary to count the letter frequencies
alphabet = {}
for char in text:
# if the character has been found before...
if char in alphabet:
# ...increment the count
alphabet[char] = alphabet[char] + 1
else:
# ...else add it to the dictionary
alphabet[char] = 1
# take a copy of the characters to use later
letters = (list(alphabet))

# tree is the list representation of the tree
# tree[1] is the root, so tree[0] is padding
tree = ["",""]
# the items in the dictionary are combined until
# there is only one left, which is the root
if len(alphabet) == 1:
tree.append(min(alphabet))
while len(alphabet) > 1:
# find the item with the lowest frequency...
key = min(alphabet, key=alphabet.get)
# ...store it and its frequency...
value = alphabet[key]
# ...remove it from the dictionary...
alphabet.pop(key)
# ...and add it to the tree below the root
tree.insert(2,key)
# find the item with the next lowest freq...
key2 = min(alphabet, key=alphabet.get)
# ...concatenate it with the previous one...
key += key2
value += alphabet[key2]
# ...also removing it from the dictionary
alphabet.pop(key2)
# ...and adding the new combined item and freq.
alphabet[key] = value
# add this next least frequent item to the tree
tree.insert(2,key2)

# turn list into a binary tree
# we know the root is in the right place, so
# start at the first left child (node 2)...
current = 2
# ...padding at index 4 onwards
insert = 4
# loop through the values in the list making
# sure their children are in the right place
while current < len(tree):
if len(tree[current]) > 1:
treeify(current)
current += 1

# display the results
print("\nList representation of the tree:")
print(tree)
# display the codes
print("\nCodes:")
# loop through the unique characters in the text
for char in letters:
print("-",char,"is",codify(char))
# use the tree to code the text
print("\n"+text,"would be represented as:")
# string to store the output
huffman = ""
for char in text:
huffman += codify(char)
print(huffman)
saving = str(100 - (int(len(huffman)/(len(text)*7)*100))) + "%."
print("\nThat's",len(huffman),"bits using Huffman coding, compared with",len(text),"x 7 =",len(text)*7,"bits using ASCII - a saving of",saving)```