Reputation: 15
I'm trying to wrap a pytorch model, the model expects some text embeddings, For my wrapped model I want it to do the embeddings internally for a future purpose. Here is my code :
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
import skorch
class FastTextEmbedding(BaseEstimator, TransformerMixin):
def __init__(self, ftext_model):
self.ftext_model = ftext_model
self.label_encoder = LabelEncoder()
def fit(self, X, y=None):
self.label_encoder.fit(y)
return self
def transform(self, X, y=None):
embeddings = [self.ftext_model[text] for text in X]
if y is not None:
y_encoded = self.label_encoder.transform(y)
return torch.tensor(embeddings,dtype=torch.float32), torch.tensor(y_encoded, dtype=torch.float32)
else:
return torch.tensor(embeddings,dtype=torch.float32)
pipeline = Pipeline([
('embedding', FastTextEmbedding(ftext)),
('nn', skorch.NeuralNetClassifier(
BiLSTMClassifier,
optimizer=torch.optim.Adam,
lr=best_config['lr'],
max_epochs=n_epochs
))
])
pipeline.fit(train_texts, train_labels)
I get the following error, although I embed my X data, some internal code just tries to work on the X data and not the embeddings returned by transform method I have overread, what am I missing here? any help would be highly appreciated.
TypeError Traceback (most recent call last)
<ipython-input-112-57f21e6419ed> in <cell line: 2>()
1 get_ipython().run_line_magic('time', '')
----> 2 pipeline.fit(train_texts, train_labels)
3 #y_proba = pipeline.predict_proba(X)
17 frames
/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
403 if self._final_estimator != "passthrough":
404 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
--> 405 self._final_estimator.fit(Xt, y, **fit_params_last_step)
406
407 return self
/usr/local/lib/python3.10/dist-packages/skorch/classifier.py in fit(self, X, y, **fit_params)
163 # this is actually a pylint bug:
164 # https://github.com/PyCQA/pylint/issues/1085
--> 165 return super(NeuralNetClassifier, self).fit(X, y, **fit_params)
166
167 def predict_proba(self, X):
/usr/local/lib/python3.10/dist-packages/skorch/net.py in fit(self, X, y, **fit_params)
1317 self.initialize()
1318
-> 1319 self.partial_fit(X, y, **fit_params)
1320 return self
1321
/usr/local/lib/python3.10/dist-packages/skorch/net.py in partial_fit(self, X, y, classes, **fit_params)
1276 self.notify('on_train_begin', X=X, y=y)
1277 try:
-> 1278 self.fit_loop(X, y, **fit_params)
1279 except KeyboardInterrupt:
1280 pass
/usr/local/lib/python3.10/dist-packages/skorch/net.py in fit_loop(self, X, y, epochs, **fit_params)
1188 self.notify('on_epoch_begin', **on_epoch_kwargs)
1189
-> 1190 self.run_single_epoch(iterator_train, training=True, prefix="train",
1191 step_fn=self.train_step, **fit_params)
1192
/usr/local/lib/python3.10/dist-packages/skorch/net.py in run_single_epoch(self, iterator, training, prefix, step_fn, **fit_params)
1224 for batch in iterator:
1225 self.notify("on_batch_begin", batch=batch, training=training)
-> 1226 step = step_fn(batch, **fit_params)
1227 self.history.record_batch(prefix + "_loss", step["loss"].item())
1228 batch_size = (get_len(batch[0]) if isinstance(batch, (tuple, list))
/usr/local/lib/python3.10/dist-packages/skorch/net.py in train_step(self, batch, **fit_params)
1103 return step['loss']
1104
-> 1105 self._step_optimizer(step_fn)
1106 return step_accumulator.get_step()
1107
/usr/local/lib/python3.10/dist-packages/skorch/net.py in _step_optimizer(self, step_fn)
1058 optimizer.step()
1059 else:
-> 1060 optimizer.step(step_fn)
1061
1062 def train_step(self, batch, **fit_params):
/usr/local/lib/python3.10/dist-packages/torch/optim/optimizer.py in wrapper(*args, **kwargs)
383 )
384
--> 385 out = func(*args, **kwargs)
386 self._optimizer_step_code()
387
/usr/local/lib/python3.10/dist-packages/torch/optim/optimizer.py in _use_grad(self, *args, **kwargs)
74 torch.set_grad_enabled(self.defaults['differentiable'])
75 torch._dynamo.graph_break()
---> 76 ret = func(self, *args, **kwargs)
77 finally:
78 torch._dynamo.graph_break()
/usr/local/lib/python3.10/dist-packages/torch/optim/adam.py in step(self, closure)
144 if closure is not None:
145 with torch.enable_grad():
--> 146 loss = closure()
147
148 for group in self.param_groups:
/usr/local/lib/python3.10/dist-packages/skorch/net.py in step_fn()
1092 def step_fn():
1093 self._zero_grad_optimizer()
-> 1094 step = self.train_step_single(batch, **fit_params)
1095 step_accumulator.store_step(step)
1096
/usr/local/lib/python3.10/dist-packages/skorch/net.py in train_step_single(self, batch, **fit_params)
992 Xi, yi = unpack_data(batch)
993 y_pred = self.infer(Xi, **fit_params)
--> 994 loss = self.get_loss(y_pred, yi, X=Xi, training=True)
995 loss.backward()
996 return {
/usr/local/lib/python3.10/dist-packages/skorch/classifier.py in get_loss(self, y_pred, y_true, *args, **kwargs)
148 eps = torch.finfo(y_pred.dtype).eps
149 y_pred = torch.log(y_pred + eps)
--> 150 return super().get_loss(y_pred, y_true, *args, **kwargs)
151
152 # pylint: disable=signature-differs
/usr/local/lib/python3.10/dist-packages/skorch/net.py in get_loss(self, y_pred, y_true, X, training)
1662
1663 """
-> 1664 y_true = to_tensor(y_true, device=self.device)
1665 return self.criterion_(y_pred, y_true)
1666
/usr/local/lib/python3.10/dist-packages/skorch/utils.py in to_tensor(X, device, accept_sparse)
102 return {key: to_tensor_(val) for key, val in X.items()}
103 if isinstance(X, (list, tuple)):
--> 104 return [to_tensor_(x) for x in X]
105 if np.isscalar(X):
106 return torch.as_tensor(X, device=device)
/usr/local/lib/python3.10/dist-packages/skorch/utils.py in <listcomp>(.0)
102 return {key: to_tensor_(val) for key, val in X.items()}
103 if isinstance(X, (list, tuple)):
--> 104 return [to_tensor_(x) for x in X]
105 if np.isscalar(X):
106 return torch.as_tensor(X, device=device)
/usr/local/lib/python3.10/dist-packages/skorch/utils.py in to_tensor(X, device, accept_sparse)
104 return [to_tensor_(x) for x in X]
105 if np.isscalar(X):
--> 106 return torch.as_tensor(X, device=device)
107 if isinstance(X, Sequence):
108 return torch.as_tensor(np.array(X), device=device)
TypeError: new(): invalid data type 'str'
Update
: It is solved in here : https://github.com/skorch-dev/skorch/issues/1052
Upvotes: 1
Views: 37