Reputation: 31
def compress_data(word):
a=1
index=0
compressed_word = ""
while index<len(word):
if (word[index]==word[index+1]):
a=+1
temp_letter=word.index
index=index+1
elif word[index]!=word[index+1]:
temp_letter2=word[index]
index=index+1
compressed_word=a+temp_letter+temp_letter2
print(compressed_word)
word=input('Enter Word:')
compress_data(word)
I need to compress the data in python. For example: Like this:
input: 'aaaeebdddd' output: '3a2eb4d'
input: 'PPTTTMgggEE' output: '2P3TM3g2E'
input: 'GHJ' output: 'GHJ'
input: ' ' output: ' '
How can I write this in python?
Upvotes: 0
Views: 73
Reputation: 77347
itertools.groupby
is perfect for the job. It creates subiterators every time a condition in an interator changes. By default, that's every time an iterated value changes from the previous value.
import itertools
def compress_data(word):
result = []
# generates subiterator `group` when char in word changes
for key, group in itertools.groupby(word):
# iterates and counts the like-valued characters in the group
count = sum(1 for _ in group)
if count > 1:
result.append(str(count))
result.append(key)
return "".join(result)
tests = (
('aaaeebdddd', '3a2eb4d'),
('PPTTTMgggEE', '2P3TM3g2E'),
('GHJ', 'GHJ'),
('', '') )
print('worked', 'word', 'want', 'got')
for word, want in tests:
got = compress_data(word)
print(want==got, word, want, got)
Running it shows
~/tmp$ python test.py
worked word want got
True aaaeebdddd 3a2eb4d 3a2eb4d
True PPTTTMgggEE 2P3TM3g2E 2P3TM3g2E
True GHJ GHJ GHJ
True
Upvotes: 1
Reputation: 79
def compress_data(word):
previous = ""
letter_counter = 1
compressed_word = ""
for letter in word:
if letter == previous:
letter_counter += 1
else:
compressed_word += str(letter_counter) + previous if letter_counter > 1 else previous
letter_counter = 1
previous = letter
compressed_word += str(letter_counter) + previous if letter_counter > 1 else previous
return compressed_word
word=input('Enter Word:')
print(compress_data(word))
Upvotes: 1
Reputation: 5520
Using regex...
>>> re.sub(r'((.)\2*)(?=\2)', lambda m: str(len(m.group(1)) + 1), 'PPTTTMgggEE')
'2P3TM3g2E'
Upvotes: 1