Edit: I have changed the code , from mlb to TfIdfVectorizer(). Still I am facing a problem. Please see below my code.
from sklearn.externals import joblib
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
model = joblib.load('D:/Testing -Python/model_mlb.pkl')
new_input = 'How can I pay my Library Fees'
pred = model.predict(TfIdfVectorizer.transform([new_input]))
pred = mlb.inverse_transform(pred)
My model is as follows.
OneVsRestClassifier(estimator=SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
eta0=0.0, fit_intercept=True, l1_ratio=0.15,
learning_rate='optimal', loss='hinge', max_iter=5, n_iter=None,
n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
tol=None, verbose=0, warm_start=False),
When I am running this, getting error as
ValueError: X has 6 features per sample; expecting 1543
Just to inform
X_Train.shape = [555, 1543]
Y_Train.shape = [555, 57]
What is going wrong? Please help
Further Edit (With Full Code): To train the model I am using a dataset whose sample is as follows
How to resent my Password ['Pass','ResetPass']
Where to See the next Road ['Direction','NaN']
What is my next topic ['Topic','Class']
Can I move without pass ['Pass','MovePass']
The above dataset is in pd.DataFrame()
Below is my code snippet
X = dataset['X']
Y = mlb.fit_transform(dataset['test_final'])
X_Train,X_Test,Y_Train,y_test = train_test_split(X,Y, random_state=0, test_size=0.33, shuffle=True)
text_clf = Pipeline([('vect', TfidfVectorizer()),('clf', OneVsRestClassifier(SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=5, tol=None)))])
parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
'vect__max_df': [0.25, 0.5, 0.75, 1.0],
'vect__smooth_idf': (True, False),
'vect__sublinear_tf' : (True,False)}
grid = GridSearchCV(text_clf, parameters, n_jobs=-1)
fit =,Y_Train)
predict = grid.predict(X_Test)
predict_label = mlb.inverse_transform(predict)
joblib.dump(text_clf,'D:/Testing -Python/model_mlb.pkl')
Then I am applying the following codes for new X and trying to retrieve Y.
model= joblib.load('D:/Testing -Python/model_mlb.pkl')
new_input = 'How can I pay my Library Fees'
pred = model.predict([new_input])[0]
pred = mlb.inverse_transform(pred)
Running the above I am NOW getting the following error.
AttributeError: 'list' object has no attribute 'shape'
Please help!!
The issue is you are not saving any model on your path. Let's forget the GridSearch
from sklearn.externals import joblib
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.multiclass import OneVsRestClassifier
dataset = pd.DataFrame({'X': ['How to resent my Password',
'Where to See the next Road',
'What is my next topic',
'Can I move without pass']*10,
'Y': [['Pass','ResetPass'], ['Direction','NaN'], ['Topic','Class'], ['Pass','MovePass']]*10})
mlb = MultiLabelBinarizer()
X, Y = dataset['X'], mlb.fit_transform(dataset['Y'])
X_Train, X_Test, Y_Train, y_test = train_test_split(X, Y, random_state=0, test_size=0.33, shuffle=True)
clf = SGDClassifier(loss='hinge', penalty='l2',
alpha=1e-3, random_state=42,
max_iter=5, tol=None)
text_clf = Pipeline([('vect', TfidfVectorizer()),
('clf', OneVsRestClassifier(clf))]), Y) ### new line here
predict = text_clf.predict(X_Test)
predict_label = mlb.inverse_transform(predict)
joblib.dump(text_clf, 'PATHTO/model_mlb.pkl') #save the good model
joblib.dump(mlb, 'PATHTO/mlb.pkl') # save the MLB
model = joblib.load('PATHTO/model_mlb.pkl')
mlb = joblib.load('PATHTO/mlb.pkl') # load the MLB
new_input = 'How to resent my Password'
pred = model.predict([new_input]) ## tfidf in your pipeline
pred = mlb.inverse_transform(pred)
And this returns
[('Pass', 'ResetPass')]
as in your train test
And if you want your grid search to be save just save the fit
