Brian Brown
Brian Brown

Reputation: 4311

RNN, Keras, Python: Min Max Scaler Data normalization ValueError: Found array with dim 3. Estimator expected <= 2

I prepared a simple dataset and labels set. I want to learn how can I implement simple RNN in Keras. I prepared my data. When I do not use normalization (MinMaxScaler) everything compiles without errors.

However, when I try to use the scaler, I got ValueError: Found array with dim 3. Estimator expected <= 2.. This is the code:

# -*- coding: utf-8 -*- 
#!/usr/bin/env python3

import tensorflow as tf
from tensorflow import keras

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN 
from keras.callbacks import ModelCheckpoint
from keras import backend as K

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


import numpy 
import matplotlib.pyplot as plt

def stagger(a, delay):
    num_of_rows = a.shape[0]  
    num_of_cols = a.shape[1] 
    data_in = numpy.zeros((num_of_rows + delay, num_of_cols * (1 + delay)))
    data_in[0:num_of_rows, 0:4] = a
    data_in[1:(num_of_rows + 1), 4:8] = a
    a = data_in[0:num_of_rows, :]
    return a

dataset = numpy.array([[0,   2,   0, 324],  [1, 2, 0,324],  [2, 2, 0, 324],  [3, 2, 0, 324], [4, 2, 0, 324],  [5, 2, 0, 324], [6, 2, 0, 324], 
                    [7, 2, 0, 324], [8, 2, 0, 324], [9, 2, 0, 324], [ 10, 2, 0, 324], [ 11, 2, 0, 324], [ 12, 2, 0, 324], [ 13, 2, 0, 324],
                    [ 14, 2, 0, 324], [ 15, 2, 0, 324], [ 16, 2, 0, 324], [ 17, 2, 0, 324],[ 18, 2, 0, 324], [ 19, 2, 0, 324], [ 20, 2, 0, 324],
                    [ 21, 2, 0, 324],[ 22, 2, 0, 324], [ 23, 2, 0, 324]])

labels = numpy.array([[0.82174763], [0.62098727], [0.45012733], [1.5912102 ],
                  [0.37570953], [0.2930966 ], [0.34982923], [0.72239097],
                  [1.37881947], [1.79550653], [1.88867237], [1.93567087],
                  [1.9771925 ], [2.10873853], [2.158302  ], [2.11018633],
                  [1.9714166 ], [2.2553416 ], [2.41161887], [2.41161887],
                  [2.30333453], [2.38390613], [2.21882553], [2.0707972 ]])

delay = 2
input_shape = (1, 4*(1+delay))
min_max_scaler = MinMaxScaler(feature_range=(0, 1))

# prepare dataset
dataset = stagger(dataset, delay)

# split dataset
x_train, x_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.2, shuffle=False)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, shuffle=False)

# normalize dataset
x_train = min_max_scaler.fit_transform(x_train)
x_test = min_max_scaler.transform(x_test)
x_val = min_max_scaler.transform(x_val)

# reshape dataset
x_train = numpy.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = numpy.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))
x_val = numpy.reshape(x_val, (x_val.shape[0], 1, x_val.shape[1]))
y_train = numpy.reshape(y_train, (y_train.shape[0], 1, y_train.shape[1]))
y_test = numpy.reshape(y_test, (y_test.shape[0], 1, y_test.shape[1]))
y_val = numpy.reshape(y_val, (y_val.shape[0], 1, y_val.shape[1]))

# RNN model
model = Sequential()
model.add(SimpleRNN(64, activation="relu", kernel_initializer='random_uniform', input_shape=input_shape, return_sequences=True))
model.add(Dense(32, activation="relu", kernel_initializer= 'random_uniform')) 
model.add(Dense(1, activation="linear", kernel_initializer= 'random_uniform'))

# train and predict
callback = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy', tf.keras.metrics.MeanSquaredError()])
history = model.fit(x_train, y_train, epochs=100, batch_size=8, validation_data=(x_val, y_val), callbacks=[callback])
results = model.evaluate(x_test, y_test)

# plot
test_predictions = model.predict(x_test)
test_predictions = min_max_scaler.inverse_transform(test_predictions)
y_test = y_test[:,:,0]
test_predictions = test_predictions[:,:,0]
plt.plot(y_test)
plt.plot(test_predictions)
plt.legend(['y_test', 'predictions'], loc='upper left')
plt.show()

Upvotes: 1

Views: 1498

Answers (1)

Marco Cerliani
Marco Cerliani

Reputation: 22031

this is because you are passing 3d sequences to minmaxscaler. it accepts 2d sequences. what you have to do is to transform your prediction in 2d and then return to 3d. this can be done in one line...

test_predictions = min_max_scaler.inverse_transform(test_predictions.reshape(-1,1)).reshape(test_predictions.shape)

Upvotes: 1

Related Questions