model.fit() error message when trying to train 1D CNN

Question

I am trying to train a 1D-CNN on tabular data to then use LRP on it (as it has been done here). I am stuck at the model.fit() part of the implementation. Up to that point everything seems to work.

Here is the error I get:

model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)

Epoch 1/100
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [60], in ()
----> 1 model_history = model.fit(X_train_smote, y_train_smote, batch_size=100, epochs=100, validation_split = 0.2)

File D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\utils	raceback_utils.py:67, in filter_traceback..error_handler(*args, **kwargs)
     65 except Exception as e:  # pylint: disable=broad-except
     66   filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67   raise e.with_traceback(filtered_tb) from None
     68 finally:
     69   del filtered_tb

File ~\AppData\Local\Temp\__autograph_generated_filelswqetod.py:15, in outer_factory..inner_factory..tf__train_function(iterator)
     13 try:
     14     do_return = True
---> 15     retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
     16 except:
     17     do_return = False

ValueError: in user code:

    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine	raining.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine	raining.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine	raining.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine	raining.py", line 890, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine	raining.py", line 948, in compute_loss
        return self.compiled_loss(
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 139, in __call__
        losses = call_fn(y_true, y_pred)
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 243, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\losses.py", line 1787, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "D:\Programme\Anaconda\envs\LRP_innvestigate\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None,) and (None, 6, 2) are incompatible

Yet, for the sake of clarity here is the rest of my current implementation (adapted from a Kaggle notebook). The model structure is based on the feedback of my previous question here on SO.

import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import ListedColormap
from sklearn.metrics import plot_confusion_matrix
from scipy.stats import norm, boxcox
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from collections import Counter
from scipy import stats
import tensorflow as tf
import warnings
warnings.simplefilter(action='ignore', category=Warning)

dataset = pd.read_csv('F:/Programmieren/this_one/data/Churn_Modelling.csv')

# split into variables and target
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

# here gender is encoded
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

# one hot encoding the country (as explained in Ullah et al.)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

# split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= .2)

# upsample minority class with SMOTE 
import imblearn
from imblearn.over_sampling import SMOTENC

#get original class distribution
counter = Counter(y)
print(counter)

#SMOTENC is used instead of SMOTE because there are multiple categorical features in the dataset
oversample = SMOTENC(categorical_features=[0, 1, 2, 4, 9, 10])
X_train_smote, y_train_smote = oversample.fit_resample(X_train, y_train)

#get new class distribution
counter = Counter(y_train_smote)
print(counter)

# normalize values to range 0-1 as explained in Ullah et al.

from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
X_train_smote = mms.fit_transform(X_train_smote)
X_test = mms.transform(X_test)

#record-wise normalization for relative value comparison as stated by one of the authors I was in contact with
from sklearn.preprocessing import normalize

X_train_smote = normalize(X_train_smote, axis=1, norm='l1')
X_test = normalize(X_test, axis=1, norm='l1')

#reshape data for CNN
sample_size = X_train_smote.shape[0] # number of samples in train set
time_steps  = X_train_smote.shape[1] # number of features in train set
input_dimension = 1 

train_data_reshaped = X_train_smote.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:
", train_data_reshaped.shape)
print("1 Sample shape:
",train_data_reshaped[0].shape)
print("An example sample:
", train_data_reshaped[0])

#reshape test data as well
test_data_reshaped = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv1D

#create model as explained in the paper
model = Sequential()
model.add(Conv1D(filters=25, kernel_size=3, activation='relu', input_shape=(12,1)))
model.add(Conv1D(50, 3))
model.add(Conv1D(100, 3))
model.add(Dense(2200, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

#output of  model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv1d (Conv1D)             (None, 10, 25)            100       
                                                                 
 conv1d_1 (Conv1D)           (None, 8, 50)             3800      
                                                                 
 conv1d_2 (Conv1D)           (None, 6, 100)            15100     
                                                                 
 dense (Dense)               (None, 6, 2200)           222200    
                                                                 
 dense_1 (Dense)             (None, 6, 2)              4402      
                                                                 
 dense_2 (Dense)             (None, 6, 2)              6         
                                                                 
=================================================================
Total params: 245,608
Trainable params: 245,608
Non-trainable params: 0
_________________________________________________________________

Are there any major flaws in my current approach that I am not aware of?

model.fit() error message when trying to train 1D CNN

Answers (1)

Related Questions