Reputation: 2978
I want to create a sklearn
pipeline that consists of two steps:
This is my data set (of course, I'm providing a reduced subset to show the data format):
x_train
array([[[0.45977011, 0.16666667, 0.18373494, ..., 0.33333333,
0.71317829, 0.7246617 ],
[0.6091954 , 0.25 , 0.28313253, ..., 0.33333333,
0.66666667, 0.73101353],
[0.25287356, 0.75 , 0.34337349, ..., 0.16666667,
0.62790698, 0.62137531],
...,
[0.6091954 , 0.58333333, 0.20481928, ..., 0.33333333,
0.62015504, 0.65009666],
[0.41954023, 0.91666667, 0.30722892, ..., 0.33333333,
0.71317829, 0.76719138],
[0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
0.5503876 , 0.71306269]],
[[0.6091954 , 0.25 , 0.28313253, ..., 0.33333333,
0.66666667, 0.73101353],
[0.25287356, 0.75 , 0.34337349, ..., 0.16666667,
0.62790698, 0.62137531],
[0.54022989, 0.5 , 0.34337349, ..., 0.33333333,
0.57364341, 0.66238608],
...,
[0.41954023, 0.91666667, 0.30722892, ..., 0.33333333,
0.71317829, 0.76719138],
[0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
0.5503876 , 0.71306269],
[0.44252874, 0.75 , 0.48192771, ..., 0.41666667,
0.62015504, 0.65023474]],
[[0.25287356, 0.75 , 0.34337349, ..., 0.16666667,
0.62790698, 0.62137531],
[0.54022989, 0.5 , 0.34337349, ..., 0.33333333,
0.57364341, 0.66238608],
[0.3908046 , 0.33333333, 0.34939759, ..., 0.41666667,
0.58914729, 0.70450152],
...,
[0.31609195, 0.41666667, 0.46987952, ..., 0.33333333,
0.5503876 , 0.71306269],
[0.44252874, 0.75 , 0.48192771, ..., 0.41666667,
0.62015504, 0.65023474],
[0.60344828, 0.41666667, 0.46686747, ..., 0.25 ,
0.66666667, 0.61391881]]]
y_train
array([[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.]], dtype=float32)
And this is my current code:
import numpy as np
from keras.wrappers.scikit_learn import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import *
from tensorflow.keras.callbacks import *
from sklearn.pipeline import Pipeline
# Custom transformer
class Transformer():
def transform(self, x):
x_img = np.apply_along_axis(self.rec_plot, 1, x).astype('float16')
return x_img
def rec_plot(s, eps=0.10, steps=10):
d = pdist(s[:,None])
d = np.floor(d/eps)
d[d>steps] = steps
Z = squareform(d)
return Z
def fit(self, x, y=None):
return x
def create_model():
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(50, 50, 17)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
#sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
return model
transformer = Transformer()
clf = KerasClassifier(build_fn=create_model, verbose=0)
blackbox_model = Pipeline([('transform', transformer),('clf',clf)])
blackbox_model.fit(x_train, y_train)
When I run this code on my dataset, I get the following error:
AttributeError: 'numpy.ndarray' object has no attribute 'transform'
It seems to be related to the data format (therefore, I shared my data format above). But I'm not sure how to fix this issue.
Upvotes: 0
Views: 8865
Reputation: 19310
The method Transformer().fit()
should return self
.
Because your Transformer
object is stateless, it might be easier to use sklearn.preprocessing.FunctionTransformer
. You can instantiate that class with your transform function. Something like the following (untested):
import sklearn.preprocessing
def _rec_plot(s, eps=0.10, steps=10):
d = pdist(s[:,None])
d = np.floor(d/eps)
d[d>steps] = steps
Z = squareform(d)
return Z
def fun(x, y=None):
return np.apply_along_axis(_rec_plot, 1, x).astype('float16')
transformer = sklearn.preprocessing.FunctionTransformer(func=fun)
I also suggest not using the syntax from module import *
because that can pollute your namespace. When I first read your question, I wondered if the problem was clashing function names because of all of the unnecessary imports.
Upvotes: 2