Reputation: 157
I would like to count the number of occurrence of specific words (conjunctions: "also", "although", "and", "as", "because", "before", "but", "for", "if", "nor", "of", "or", "since", "that", "though", "until", "when", "whenever", "whereas", "which", "while", "yet") and also punctuation from a txt file
this is what i have done:
def count(fname, words_list):
if fname:
try:
file = open(str(fname), 'r')
full_text = file.readlines()
file.close()
count_result = dict()
for word in words_list:
for text in full_text:
if word in count_result:
count_result[word] = count_result[word] + text.count(word)
else:
count_result[word] = text.count(word)
return count_result
except:
print('Something really bad just happened!')
print(count('sample2.txt', ["also", "although", "and", "as", "because", "before", "but", "for", "if", "nor", "of",
"or", "since", "that", "though", "until", "when", "whenever", "whereas",
"which", "while", "yet", ",", ";", "-", "'"]))
But what it does is it counts "was" into "as", how do i fix it or is there any other ways to archieve this? thanks
expected output is something like:
{'also': 0, 'although': 0, 'and': 27, 'as': 2, 'because': 0, 'before': 2, 'but': 4, 'for': 2, 'if': 2, 'nor': 0, 'of': 13, 'or': 2, 'since': 0, 'that': 10, 'though': 2, 'until': 0, 'when': 3, 'whenever': 0, 'whereas': 0, 'which': 0, 'while': 0, 'yet': 0, ',': 41, ';': 3, '-': 1, "'": 17, 'words_per_sentence': 25.4286, 'sentences_per_par': 1.75}
Upvotes: 0
Views: 2434
Reputation: 101
def word_count(fname, word_list):
count_w = dict()
for w in word_list:
count_w[w] = 0
with open(fname) as input_text:
text = input_text.read()
words = text.lower().split()
for word in words:
_word = word.strip('.,:-)()')
if _word in count_w:
count_w[_word] +=1
return count_w
def punctaction_count(fname, punctaction):
count_p = dict()
for p in punctaction:
count_p[p] = 0
with open(fname) as input_text:
for c in input_text.read():
if c in punctaction:
count_p[c] +=1
return count_p
print(word_count('c_prog.txt', ["also", "although", "and", "as", "because", "before", "but", "for", "if", "nor", "of", "or", "since", "that",
"though", "until", "when", "whenever", "whereas", "which", "while", "yet"]))
print(punctaction_count('c_prog.txt', [",", ";", "-", "'"]))
if you want to do this in one function :
def word_count(fname, word_list, punctaction):
count_w = dict()
for w in word_list:
count_w[w] = 0
count_p = dict()
for p in punctaction:
count_p[p] = 0
with open(fname) as input_text:
text = input_text.read()
words = text.lower().split()
for word in words:
_word = word.strip('.,:-)()')
if _word in count_w:
count_w[_word] +=1
for c in text:
if c in punctaction:
count_p[c] +=1
count_w.update(count_p)
return count_w
print(word_count('c_prog.txt', ["also", "although", "and", "as", "because", "before", "but", "for", "if", "nor", "of", "or", "since", "that",
"though", "until", "when", "whenever", "whereas", "which", "while", "yet"], [",", ";", "-", "'"]))
Upvotes: 0
Reputation: 1404
I would do something simpler like going through the words in your file and checking if each of them is in the list of the words you want to count. In that case add 1 to the counter dictionary at the entry of that word.
# get all the words in the file
word_list_in_text = file.read().split()
count_result={}
for word in words_list_in_text:
#check if each word in the file is in your target list
if word is in word_list:
if word not in count_result:
count_result[word] = 1
else:
count_result[word] += 1
print(count_result)
Upvotes: 0
Reputation: 465
The str.count(sub) function count the substrings sub
. When you want it to count "as", it found it in the word "was" and then increment the number of "as" it founds.
You could use regex here, to specify that you want "as" as a full word, not as a substring of another word. The flag \b indicates the end of a word.
Upvotes: 0
Reputation: 514
in 2.7 and 3.1 there is special Counter dict for what you're trying to achieve
Since you haven't posted any sample output. I would like to give you an approach that you can use. Maintain a list. Append these words that you require in the list. For example, if you approach the word "also", append it in the list.
>>> l.append("also")
>>> l
['also']
Similarly, you encounter the word "although", list becomes:
>>> l.append("although")
>>> l
['also', 'although']
If you again encounter "also", again append it to the list like above.
The list becomes:
['also', 'although', 'also']
Now use Counter to count number of occurrences of list elements:
>>> l = ['also', 'although', 'also']
>>> result = Counter(l)
>>> l
['also', 'although', 'also']
>>> result
Counter({'also': 2, 'although': 1})
Upvotes: 1