AttributeError: 'NoneType' object has no attribute 'lower' python using spacy

Question

Here is the full error message:

AttributeErrorTraceback (most recent call last) in () 24 25 # train ---> 26 pipe.fit(train1, labelsTrain1) 27 28 # test

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in fit(self, X, y, **fit_params) 246 This estimator 247 """ --> 248 Xt, fit_params = self._fit(X, y, **fit_params) 249 if self._final_estimator is not None: 250 self._final_estimator.fit(Xt, y, **fit_params)

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in _fit(self, X, y, **fit_params) 211 Xt, fitted_transformer = fit_transform_one_cached( 212 cloned_transformer, None, Xt, y, --> 213 **fit_params_steps[name]) 214 # Replace the transformer of the step with the fitted 215 # transformer. This is necessary when loading the transformer

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\externals\joblib\memory.pyc in call(self, *args, **kwargs) 360 361 def call(self, *args, **kwargs): --> 362 return self.func(*args, **kwargs) 363 364 def call_and_shelve(self, *args, **kwargs):

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\pipeline.pyc in _fit_transform_one(transformer, weight, X, y, **fit_params) 579 **fit_params): 580 if hasattr(transformer, 'fit_transform'): --> 581 res = transformer.fit_transform(X, y, **fit_params) 582 else: 583 res = transformer.fit(X, y, **fit_params).transform(X)

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction ext.pyc in fit_transform(self, raw_documents, y) 867 868 vocabulary, X = self._count_vocab(raw_documents, --> 869 self.fixed_vocabulary_) 870 871 if self.binary:

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction ext.pyc in _count_vocab(self, raw_documents, fixed_vocab) 790 for doc in raw_documents: 791 feature_counter = {} --> 792 for feature in analyze(doc): 793 try: 794 feature_idx = vocabulary[feature]

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction ext.pyc in (doc) 264 265 return lambda doc: self._word_ngrams( --> 266 tokenize(preprocess(self.decode(doc))), stop_words) 267 268 else:

C:\Users\mcichonski\AppData\Local\Continuum\anaconda3\envs\py27\lib\site-packages\sklearn\feature_extraction ext.pyc in (x) 230 231 if self.lowercase: --> 232 return lambda x: strip_accents(x.lower()) 233 else: 234 return strip_accents

AttributeError: 'NoneType' object has no attribute 'lower'

Here is the code:

def printNMostInformative(vectorizer, clf, N):
    feature_names = vectorizer.get_feature_names()
    coefs_with_fns = sorted(zip(clf.coef_[0], feature_names))
    topClass1 = coefs_with_fns[:N]
    topClass2 = coefs_with_fns[:-(N + 1):-1]
    print("Class 1 best: ")
    for feat in topClass1:
        print(feat)
    print("Class 2 best: ")
    for feat in topClass2:
        print(feat)

vectorizer = CountVectorizer(tokenizer=tokenizeText, ngram_range=(1,1))
clf = LinearSVC()

pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer), ('clf', clf)])

# data
train1 = train['Title'].tolist()
labelsTrain1 = train['Conference'].tolist()

test1 = test['Title'].tolist()
labelsTest1 = test['Conference'].tolist()

# train
pipe.fit(train1, labelsTrain1)

# test
preds = pipe.predict(test1)
print("accuracy:", accuracy_score(labelsTest1, preds))
print("Top 10 features used to predict: ")

printNMostInformative(vectorizer, clf, 10)
pipe = Pipeline([('cleanText', CleanTextTransformer()), ('vectorizer', vectorizer)])
transform = pipe.fit_transform(train1, labelsTrain1)

vocab = vectorizer.get_feature_names()
for i in range(len(train1)):
    s = ""
    indexIntoVocab = transform.indices[transform.indptr[i]:transform.indptr[i+1]]
    numOccurences = transform.data[transform.indptr[i]:transform.indptr[i+1]]
    for idx, num in zip(indexIntoVocab, numOccurences):
        s += str((vocab[idx], num))

Looks like it has something to do with the train1 data. Not sure how to fix this.

This is after cleaning the data ad now trying to use this function to print out the most important features, the features that have the highest coefficients:

AttributeError: 'NoneType' object has no attribute 'lower' python using spacy

Answers (1)

Related Questions

AttributeError: &#39;NoneType&#39; object has no attribute &#39;lower&#39; python using spacy

Answers (1)

Related Questions

AttributeError: 'NoneType' object has no attribute 'lower' python using spacy