Reputation: 11
I've been toying around with some compression algorithms lately but, for the last couple days, I've been having some real trouble implementing LZ78 in python. I've looked around online for some examples but haven't really found anything reliable that both encodes and decodes input. Does anyone have any resources I could look at or code I could examine?
Thanks
Upvotes: 0
Views: 7188
Reputation: 11
Here is my implementation of LZW compression algorithm from https://q4.github.io/thesis.pdf in python 3:
from typing import List
def LZW_compress(input:str)->List[int]:
if len(input) == 0:
return []
tree = {chr(65 + i):i for i in range(26)} # tree is initially filled with input alphabet.
output = []
w = ""
for x in input:
if w+x in tree.keys():
w += x
else:
output.append(tree[w])
tree[w+x] = len(tree)
w = x
output.append(tree[w])
return output
def LZW_decompress(input:List[int])->str:
tree = {i:chr(65 + i) for i in range(26)} # tree is initially filled with input alphabet.
output = ""
for i in range(len(input)):
w = tree[input[i]]
if i != 0:
tree[i+25] = tree[i+25] + w[0] # completing the incomplete assignment of prev. round.
w = tree[input[i]] # updating w in case: input[i] == i + 25
output += w
tree[i+26] = w # incomplete set. need to append the first character of the next phrase.
return output
# Example
plain = "HGHSHHBSYBVDHDHGKQQQAAAAAACCCCCCCCCCCCRASSFASASDKKSAHSVNFNNNNNNAHVJSV"
assert(plain == LZW_decompress(LZW_compress(plain)))
Upvotes: 1
Reputation: 2646
Here is both compression and decompression of both :
def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = ""
result = []
for c in uncompressed:
wc = w + c
if wc in dictionary:
w = wc
else:
result.append(dictionary[w])
# Add wc to the dictionary.
dictionary[wc] = dict_size
dict_size += 1
w = c
# Output the code for w.
if w:
result.append(dictionary[w])
return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
from cStringIO import StringIO
# Build the dictionary.
dict_size = 256
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size))
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
# use StringIO, otherwise this becomes O(N^2)
# due to string concatenation in a loop
result = StringIO()
w = compressed.pop(0)
result.write(w)
for k in compressed:
if k in dictionary:
entry = dictionary[k]
elif k == dict_size:
entry = w + w[0]
else:
raise ValueError('Bad compressed k: %s' % k)
result.write(entry)
# Add w+entry[0] to the dictionary.
dictionary[dict_size] = w + entry[0]
dict_size += 1
w = entry
return result.getvalue()
# How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT')
print (compressed)
decompressed = decompress(compressed)
print (decompressed)
Hope this helps
Upvotes: 9