Reputation: 39
I am training a model in keras consisting of 6 custom blocks of CNN. I am using tensorflow 2 with keras and using categorical cross entropy with adam optimizer. The dataset consist of 55000 images. I am using 20 epochs for training.
The model generation function:
def generateModel(class_count):
return keras.Sequential([
keras.layers.experimental.preprocessing.Rescaling(1./255),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_1_a"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_1_b"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_1_c"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_2_a"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_2_b"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_2_c"),
keras.layers.MaxPooling2D((1,1)),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_3_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_3_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_3_c"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_4_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_4_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_4_c"),
keras.layers.MaxPooling2D((1,1)),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_5_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_5_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_5_c"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_6_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_6_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_6_c"),
keras.layers.MaxPooling2D((2,2)),
keras.layers.Flatten(),
keras.layers.Dropout(0.3),
keras.layers.Dense(512,activation=keras.activations.tanh),
keras.layers.Dense(256,activation=keras.activations.tanh),
keras.layers.Dense(128,activation=keras.activations.tanh),
keras.layers.Dense(class_count,activation=keras.activations.softmax)
])
My main problem is that the accuracy begins with value between 0.09 and 0.1 and it also ends with that range. The model accuracy not increasing and you can call it almost stable. I also tried to reduce the number of filter channels setting them to 32 in each block but still the same happens. I do not know what is happening.
Update:
The dataset loading function. The path variable accepts the path to the root location of the stored images like /home/username/PLantDiseaseDataset/* where * can be any class name folder consisting of the images:
def getDataset(path):
dataset={}
dataset['training']=keras.preprocessing.image_dataset_from_directory(dataset_location,label_mode='categorical',batch_size=32,image_size=(256,256),seed=1,validation_split=0.1,subset='training')
dataset['validation']=keras.preprocessing.image_dataset_from_directory(dataset_location,label_mode='categorical',batch_size=32,image_size=(256,256),seed=1,validation_split=0.1,subset='validation')
return dataset
The function that generates the model is
def generateModel(class_count):
return keras.Sequential([
keras.layers.experimental.preprocessing.Rescaling(1./255),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_1_a"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_1_b"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_1_c"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_2_a"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_2_b"),
keras.layers.Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_2_c"),
keras.layers.MaxPooling2D((1,1)),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_3_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_3_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_3_c"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_4_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_4_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_4_c"),
keras.layers.MaxPooling2D((1,1)),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_5_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_5_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_5_c"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_6_a"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_6_b"),
keras.layers.Conv2D(32,kernel_size=(2,2),strides=(1,1),padding='valid',activation=keras.activations.relu,name="Conv_6_c"),
keras.layers.MaxPooling2D((2,2)),
keras.layers.Flatten(),
keras.layers.Dropout(0.3),
keras.layers.Dense(512,activation=keras.activations.tanh),
keras.layers.Dense(256,activation=keras.activations.tanh),
keras.layers.Dense(128,activation=keras.activations.tanh),
keras.layers.Dense(class_count,activation=keras.activations.softmax)
])
The function that trains the model is
def train_model(model,dataset):
model.compile(loss='categorical_crossentropy',optimizer=keras.optimizers.Adam(learning_rate=0.001),metrics=['categorical_accuracy'])
history=model.fit(dataset['validation'],epochs=10)
return history
The calling of all functions and execution statements are
dataset_location=<Root location of the images i.e. the path variable value>
dataset=getDataset(dataset_location)
model=generateModel(len(dataset['training'].class_names))
history=train_model(model,dataset)
I have trained the model on 10% of dataset with 10 epochs so this is the result is
Epoch 1/10
2/174 [..............................] - ETA: 2:37 - loss: 3.6052 - categorical_accuracy: 0.0625WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.2761s vs `on_train_batch_end` time: 0.7887s). Check your callbacks.
174/174 [==============================] - 180s 1s/step - loss: 3.4654 - categorical_accuracy: 0.0931
Epoch 2/10
174/174 [==============================] - 176s 1s/step - loss: 3.4134 - categorical_accuracy: 0.0979
Epoch 3/10
174/174 [==============================] - 177s 1s/step - loss: 3.4111 - categorical_accuracy: 0.0936
Epoch 4/10
174/174 [==============================] - 177s 1s/step - loss: 3.4097 - categorical_accuracy: 0.0974
Epoch 5/10
174/174 [==============================] - 176s 1s/step - loss: 3.4127 - categorical_accuracy: 0.0936
Epoch 6/10
174/174 [==============================] - 176s 1s/step - loss: 3.4091 - categorical_accuracy: 0.0925
Epoch 7/10
174/174 [==============================] - 176s 1s/step - loss: 3.4102 - categorical_accuracy: 0.0952
Epoch 8/10
174/174 [==============================] - 176s 1s/step - loss: 3.4084 - categorical_accuracy: 0.0913
Epoch 9/10
174/174 [==============================] - 176s 1s/step - loss: 3.4109 - categorical_accuracy: 0.0924
Epoch 10/10
174/174 [==============================] - 176s 1s/step - loss: 3.4107 - categorical_accuracy: 0.0940
Hope this might also help
Upvotes: 1
Views: 406
Reputation: 8092
To help isolate the cause of the problem I suggest you use a model that is known to work. I suggest Mobilenet. If the mobilenet model trains well then the problem is in your model. If it does not train well then the problem is with the data or the way the data is feed to the model. Code to use Mobilenet is shown below. Mobilenet needs the images to be 224 X 224 so change that in your data set.
img_shape=(224,224)
base_model=tf.keras.applications.mobilenet.MobileNet( include_top=False, input_shape=img_shape, pooling='max', weights='imagenet',dropout=.4)
x=base_model.output
x = Dense(64, kernel_regularizer = regularizers.l2(l = 0.016),activity_regularizer=regularizers.l1(0.006),
bias_regularizer=regularizers.l1(0.006) ,activation='relu')(x)
x=Dropout(rate=.3 seed=123)(x)
output=Dense(class_count, activation='softmax',)(x)
model=Model(inputs=base_model.input, outputs=output)
model.compile(Adamax(lr=.001 ), loss='categorical_crossentropy', metrics='accuracy')
Upvotes: 1