Reputation: 13
In order to fix a bunch all-uppercase text files, I have written a script that:
def lowit(line):
line = line.lower()
sentences = line.split('. ')
sentences2 = [sentence[0].capitalize() + sentence[1:] for sentence in sentences]
string2 = '. '.join(sentences2)
return string2
def capcico(line, allKeywords):
allWords = line.split(' ')
original = line.split(' ')
for i,words in enumerate(allWords):
words = words.replace(',', '')
words = words.replace('.', '')
words = words.replace(';', '')
if words in allKeywords:
original[i] = original[i].capitalize()
return ' '.join(original)
def main():
dfile = open('fixed.txt', 'w')
f = open('allist.txt', 'r')
allKeywords = f.read().split('\n')
with open('ulm.txt', 'r') as fileinput:
for line in fileinput:
low_line = lowit(line)
dfile.write('\n' + capcico(low_line, allKeywords))
dfile.close()
if __name__ == '__main__':
main()
It works, but the problem is that it doesn't capitalize a city/Country if there are more than one in the same line:
TOWN IN WUERTTEMBERG, GERMANY.
changes to:
Town in Wuerttemberg, germany.
Any Ideas to what's wrong?
TNX
Upvotes: 1
Views: 147
Reputation: 66
It is because "germany" is really "germany\n". Strip the EOL off the word...
words = words.replace(',', '')
words = words.replace('.', '')
words = words.replace(';', '')
# Add in this line to strip the EOL
words = words.rstrip('\r\n')
Upvotes: 1
Reputation: 167
#Input
fileinput = open("ulm.txt").read()
##Input lower
filow = fileinput.lower()
#Keywords
allKeywords = open("allist.txt").read().split("\n")
for kw in allKeywords:
filow = filow.replace(kw.strip().lower(), kw.capitalize())
#Dots
fidots = filow.split(".")
for i,d in enumerate(fidots):
c = d.strip().capitalize()
dc = d.replace(c.lower(), c)
fidots[i] = dc
#Result
dfile = open("fixed.txt", "w")
result = ".".join(fidots)
dfile.write(result)
dfile.close()
Upvotes: 0