Jesper Ezra
Jesper Ezra

Reputation: 159

for elem, count in other.items(): AttributeError: 'int' object has no attribute 'items' error

I'm new to python and need help with NLTK language modeling.

I'm trying to generate the setence starting with "he said" using trigram model but get the following error:

Traceback (most recent call last):
  File "C:\Users\\PycharmProjects\main.py", line 72, in <module>
    d[a, b] += freq_tri[a, b, c]
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\collections\__init__.py", line 904, in __iadd__
    for elem, count in other.items():
AttributeError: 'int' object has no attribute 'items'

The entire code is here:

# imports
import string
import random
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('reuters')
from nltk.corpus import reuters, stopwords
from collections import defaultdict, Counter
from nltk import FreqDist, ngrams

# input the reuters sentences
sents = reuters.sents()

# write the removal characters such as : Stopwords and punctuation
stop_words = set(stopwords.words('english'))
string.punctuation = string.punctuation + '"' + '"' + '-' + '''+''' + '—'
string.punctuation
removal_list = list(stop_words) + list(string.punctuation) + ['lt', 'rt']
removal_list

# generate unigrams bigrams trigrams
unigram = []
bigram = []
trigram = []
fourgram = []
tokenized_text = []

for sentence in sents:
    sentence = list(map(lambda x: x.lower(), sentence))
for word in sentence:
    if word == '.':
        sentence.remove(word)
    else:
        unigram.append(word)

tokenized_text.append(sentence)
bigram.extend(list(ngrams(sentence, 2, pad_left=True, pad_right=True)))
trigram.extend(list(ngrams(sentence, 3, pad_left=True, pad_right=True)))
fourgram.extend(list(ngrams(sentence, 4, pad_left=True, pad_right=True)))

# remove the n-grams with removable words
def remove_stopwords(x):
    y = []
    for pair in x:
        count = 0
        for word in pair:
            if word in removal_list:
                count = count or 0
            else:
                count = count or 1
        if (count == 1):
            y.append(pair)
    return (y)


unigram = remove_stopwords(unigram)
bigram = remove_stopwords(bigram)
trigram = remove_stopwords(trigram)
fourgram = remove_stopwords(fourgram)

# generate frequency of n-grams
freq_bi = FreqDist(bigram)
freq_tri = FreqDist(trigram)
freq_four = FreqDist(fourgram)

d = defaultdict(Counter)

for a, b, c in freq_tri:
    if (a != None and b != None):
        d[a, b] += freq_tri[a, b, c]

# Next word prediction
s = ''


def pick_word(counter):
    "Chooses a random element."
    return random.choice(list(counter.elements()))


prefix = "he", "said"
print(" ".join(prefix))
s = " ".join(prefix)
for i in range(19):
    suffix = pick_word(d[prefix])
    s = s + ' ' + suffix
    print(s)
    prefix = prefix[1], suffix

I tried changing the for loop to the following

for a, b, c in freq_tri.items():
    if (a != None and b != None):
        d[a, b] += freq_tri[a, b, c]

Expecting to get the items from the freq_tri list but got the following error:

Traceback (most recent call last):
  File "C:\Users\PycharmProjects\main.py", line 70, in <module>
    for a, b, c in freq_tri.items():
ValueError: not enough values to unpack (expected 3, got 2)

Which to be honest I don't quite understand why it returns 2 values if using items()

Please push me to the right direction I don't know what I'm missing.

Upvotes: 0

Views: 443

Answers (0)

Related Questions