Reputation: 11
The model does not predict the right thing even after having a high training accuracy , the reason for that is really an issue, it does not predict well it is suppose to classify malaria images into infected and non infected but it seems not to do that, this model was developed using the cnn transfer learning
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten,Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import tensorflow as tf
print(tf.__version__)
# re-size all the images to this
IMAGE_SIZE = [224, 224 , 3]
# Import the Vgg 16 library as shown below and add preprocessing layer to the front of VGG
# Here we will be using imagenet weights
mobilnet = VGG19(input_shape=IMAGE_SIZE , weights='imagenet', include_top=False)
from google.colab import drive
drive.mount('/content/drive')
# don't train existing weights
for layer in mobilnet.layers:
layer.trainable = False
# useful for getting number of output classes
# folders = glob('Dataset/Train/*')
enter code here
folders = glob('/content/drive/MyDrive/Dataset/Train/*')
folders
# our layers - you can add more if you want
x = Flatten()(mobilnet.output)
prediction = Dense(len(folders), activation='softmax')(x)
# create a model object
model = Model(inputs=mobilnet.input, outputs=prediction)
# view the structure of the model
model.summary()
from tensorflow.keras.layers import MaxPooling2D
# tell the model what cost and optimization method to use
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
# Use the Image Data Generator to import the images from the dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
# # Make sure you provide the same target size as initialied for the image size
training_set = train_datagen.flow_from_directory(directory='/content/drive/MyDrive/Dataset/Train',
target_size = (224, 224),
batch_size = 32,
class_mode = 'categorical')
training_set
test_set = test_datagen.flow_from_directory('/content/drive/MyDrive/Dataset/Test',
target_size = (224, 224),
batch_size = 32,
class_mode = 'categorical')
test_set
from keras.callbacks import ModelCheckpoint, EarlyStopping
# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
# fit the model
# Run the cell. It will take some time to execute
r = model.fit_generator(
training_set,
validation_data=test_set,
epochs= 20,
steps_per_epoch=len(training_set),
validation_steps=len(test_set),
# callbacks=[es]
)
# plot the loss
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig('LossVal_loss')
# plot the accuracy
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')
plt.legend()
plt.show()
plt.savefig('AccVal_acc')
# save it as a h5 file
from tensorflow.keras.models import load_model
model.save('model_vgg19.h5')
y_pred = model.predict(test_set)
y_pred
import numpy as np
y_pred = np.argmax(y_pred, axis=1)
y_pred
print(y_pred.shape)
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
model=load_model('model_vgg19.h5')
img=image.load_img('/content/drive/MyDrive/Dataset/Train/Parasite/C111.png',target_size=(224,224))
x=image.img_to_array(img)
x
x.shape
x=x/255
x=np.expand_dims(x,axis=0)
image_data = preprocess_input(x)
print(image_data.shape)
model.predict(image_data)
a=np.argmax(model.predict(image_data), axis=1)
print(a)
if(a==1):
print("Not infected")
else:
print("Infected")
Upvotes: 0
Views: 616
Reputation: 8112
Be advised that modelfit_generator
is being depreciated. model.fit
can now handle generators. Steps per epoch really should be len(training_set)/batch_size
and same for validation steps. In the test_set
flow_from_directory
set shuffle=False
.
Do you know the balance values for your data set? That is the ratio in the training set between samples infected samples/ not infected samples. If this is <.5 your model will tend to predict not infected. For example, if you have 100 samples of infected and 1000 samples of not infected your model will be 90% accurate just by always predicting non-infected. I suggest you modify your early stopping callback as below
es=tf.keras.callbacks.EarlyStopping( monitor="val_loss", min_delta=0,
patience=4, verbose=1, mode="auto",
baseline=None, restore_best_weights=True)
Increase your number of epochs to say 50 so early stopping gets activated. With restore_best_weights=True it will load the model with the weights for the epoch with the lowest validation loss. I also suggest you use an adjustable learning rate using Keras callback ReduceLROnPlateau
. This will automatically reduce the learning rate by parameter factor if the validation loss fails to improve for patience number of epochs. My suggested code is given below:
tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=1,
verbose=1, mode="auto", min_delta=0.0001,
cooldown=0, min_lr=0)
set callbacks=[es, rlronp] in model.fit
you are not training the Vgg model which is fine but you might want to fine-tune the model. After you complete training you might do
mobilnet.trainable=True
fine_tune_epochs=10
total_epochs=epochs + fine_tune_epochs
r=model.fit(training_set, validation_data=test_set, epochs= total_epochs,
steps_per_epoch=len(training_set)/batch_size,
validation_steps=len(test_set)/batch_siz,
callbacks=[es, rlronp], initial_epoch=epoch)
To see if your model is really performing well you should create a confusion matrix. get a set of predictions on the test-set as below
classes=['infected', 'normal']
import seaborn as sns
sns.set_style('darkgrid')
from sklearn.metrics import confusion_matrix, classification_report
y_pred=[]
preds=model.predict(train_set, batch_size=batch_size)
for p in preds:
y_pred.append(np.argmax(p))
y_true=test_set.labels # this is why you set shuffle=False for the train_set
y_true= np.array(labels)
y_pred=np.array(y_pred)
cm = confusion_matrix(y_true, y_pred )
clr = classification_report(y_true, y_pred, target_names=classes)
fig_width=8
fig_height=8
plt.figure(figsize=(fig_width, fig_height))
sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)
plt.xticks(np.arange(length)+.5, classes, rotation= 90)
plt.yticks(np.arange(length)+.5, classes, rotation=0)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
print("Classification Report:\n----------------------\n", clr)
Upvotes: 1