Reputation: 107
I have a program that reads in a big chunk of text from a text file and then randomizes the content to display back as a short story based on the content of the text. The program works but the last part, where I am displaying the material is super clunky and not efficient and I was wondering if anyone had any ideas on how I could more efficiently take in the text and then display it as a string to the user but allow it to span multiple lines (wrap text, essentially) so that it is not just a giant string of text continuing to the right of the console.
from __future__ import print_function, division
import sys
import random
# global variables
suffix_map = {} # map from prefixes to a list of suffixes
prefix = () # current tuple of words
big_list = []
def process_file(filename, order=2):
"""Reads a file and performs Markov analysis.
filename: string
order: integer number of words in the prefix
returns: map from prefix to list of possible suffixes.
"""
fp = open(filename)
for line in fp:
for word in line.rstrip().split():
process_word(word, order)
def process_word(word, order=3):
"""Processes each word.
word: string
order: integer
During the first few iterations, all we do is store up the words;
after that we start adding entries to the dictionary.
"""
global prefix
if len(prefix) < order:
prefix += (word,)
return
try:
suffix_map[prefix].append(word)
except KeyError:
# if there is no entry for this prefix, make one
suffix_map[prefix] = [word]
prefix = shift(prefix, word)
def random_text(n=300):
"""Generates random wordsfrom the analyzed text.
Starts with a random prefix from the dictionary.
n: number of words to generate
"""
global big_list
# choose a random prefix (not weighted by frequency)
start = random.choice(list(suffix_map.keys()))
for i in range(n):
suffixes = suffix_map.get(start, None)
if suffixes == None:
random_text(n-i)
return
# choose a random suffix
word = random.choice(suffixes)
big_list.append(word + " ")
start = shift(start, word)
def shift(t, word):
"""Forms a new tuple by removing the head and adding word to the tail.
t : tuple of strings
word: string
Returns: tuple of strings
"""
return t[1:] + (word,)
def list_to_str_format():
global big_list
whole = " ".join(str(i) for i in big_list)
# 25 words per line
l1 = big_list[:25]
l2 = big_list[26:50]
l3 = big_list[51:75]
l4 = big_list[76:100]
l5 = big_list[101:125]
l6 = big_list[126:150]
l7 = big_list[151:175]
l8 = big_list[176:200]
l9 = big_list[201:225]
l10 = big_list[226:250]
l11 = big_list[256:275]
l12 = big_list[276:300]
str_1 = " ".join(str(i) for i in l1).capitalize()
str_2 = " ".join(str(i) for i in l2)
str_3 = " ".join(str(i) for i in l3)
str_4 = " ".join(str(i) for i in l4)
str_5 = " ".join(str(i) for i in l5)
str_6 = " ".join(str(i) for i in l6)
str_7 = " ".join(str(i) for i in l7)
str_8 = " ".join(str(i) for i in l8)
str_9 = " ".join(str(i) for i in l9)
str_10 = " ".join(str(i) for i in l10)
str_11 = " ".join(str(i) for i in l11)
str_12 = " ".join(str(i) for i in l12)
print(str_1)
print(str_2)
print(str_3)
print(str_4)
print(str_5)
print(str_6)
print(str_7)
print(str_8)
print(str_9)
print(str_10)
print(str_11)
print(str_12)
def main(filename, n=300, order=3):
try:
n = int(n)
order = int(order)
except ValueError as e:
print('Usage: %d filename [# of words] [prefix length]' % e)
else:
process_file(filename, order)
random_text(n)
list_to_str_format()
print()
main('C:\\Users\\Desktop\\TheBrothersKaramazov.txt')
Upvotes: 1
Views: 80
Reputation: 1281
i allowed myself to change your joining pattern which made a double space. you must import module re
def list_to_str_format(line_length=80):
global big_list
whole = "".join(str(i) for i in big_list)
regex = re.compile('(.*?(\s))*')
while whole != "":
break_pos = regex.match(whole[:line_length]).end()
print(whole[:break_pos])
whole = whole[break_pos:]
Upvotes: 1