Reputation: 953
I would like to check if there are sequences of words in a text looking in a list of words:
word_list = "never", "not", "buy", "here", "again", "more", "hello", "not", "will", "table"
text = "I do will will not buy more here"
Expected output: will not buy more here
But not:
will will (repeated sequence)
will not (incomplete sequence)
I do (sequence with very small words)
My script:
word_list = "never", "not", "buy", "here", "again", "more", "hello", "not", "will"
text = "I do will will not buy more here"
text = text.split(" ")
sequences = []
counter = 0
for words in text:
for word in word_list:
if word in text:
sequences.append(word)
counter =+ counter
# to avoid meaningless sequences like (incomplete sequence): "will not", "I will", "more here"...
sequences_two_words = []
for sequence in sequences:
if len(sequence) <= 2:
pass
else:
sequences_two_words.append(sequence)
# to avoid sequences like (repeated sequence): "will will"
sequences_not_repeat = []
for not_repeat in sequences_two_words:
if not_repeat[0] == not_repeat[1]:
pass
else:
sequences_not_repeat.append(not_repeat)
# to avoid sequences like (sequence with very small words): "I do"
sequences_not_little = []
for little_len in sequences_not_repeat:
if len(little_len[1]) <= 2:
pass
else:
sequences_not_little.append(little_len)
print(sequences_not_little)
My output:
[]
[]
[]
[]
[]
[]
[]
[]
Upvotes: 2
Views: 240
Reputation: 1246
word_list = "never", "not", "buy", "here", "again", "more", "hello", "not", "will", "table"
text = "I do will will not buy more here"
text_split = text.lower().split(" ")
sequences = []
sequence = ()
prev = False
for word in text_split:
if word in word_list:
# len(word) > 2 removes: I do (sequence with very small words)
# prev != word removes: [will will (repeated sequence)]
if len(word) > 2 and prev != word:
sequence += (word, )
else:
if len(sequence) > 2: # removes: will not (incomplete sequence)
sequences.append(sequence)
sequence = ()
prev = word
if len(sequence) > 2:
sequences.append(sequence)
print(sequences) # array sequences you want
Upvotes: 1