Reputation: 1
I'm new to learning the transfer learning method and I'm working on a code by myself. However, despite having a lot of data, my model doesn't seem to learn well; in fact, sometimes I get the same accuracy value. Could the source of my mistake be in the code structure? Or is there another aspect I might have missed?
What I want to do is this: I have two subfolders under the "cars" folder on my computer. These are the test and train folders. Inside the test folder, there are 63 acceptable car images and 63 unacceptable car images, which I named as accept1, accept2,...accept63 and reject1, reject2,...reject63. My train folder contains two subfolders, "accept" and "reject." There are 453 accept images in the "accept" folder, which I named as accept1, accept2,...accept453. On the other hand, there are 453 reject images in the "reject" folder, which I named as reject1, reject2,...reject453. I'm trying to create a prediction program based on different learning rates and freezing percentages using the transfer learning method. However, as I mentioned, the test accuracy values don't look very positive.
For example:
Test accuracy for DenseNet121 with lr=0.015 and 0% frozen layers: 0.800000011920929
Training accuracy: 0.8815426826477051
Validation accuracy: 0.7555555701255798
Test accuracy for DenseNet121 with lr=0.015 and 5.0% frozen layers: 0.800000011920929
Training accuracy: 0.8677685856819153
Validation accuracy: 0.7888888716697693
Test accuracy for DenseNet121 with lr=0.015 and 10.0% frozen layers: 0.800000011920929
Training accuracy: 0.8209366202354431
Validation accuracy: 0.7388888597488403
Test accuracy for DenseNet121 with lr=0.014 and 0% frozen layers: 0.800000011920929
Training accuracy: 0.8939393758773804
Validation accuracy: 0.7833333611488342
Can you help me? As I said, I'm new to programming.
Here is my code;
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import pandas as pd
import timeit
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# Directories where the data is located
train_dir = "C:/Users/cndmr/Desktop/cars/train"
test_dir = "C:/Users/cndmr/Desktop/cars/test"
# Using ImageDataGenerator to load and preprocess the dataset
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
rotation_range=10,
width_shift_range=0.2,
height_shift_range=0.2,
brightness_range=[0.5, 1.5],
fill_mode='nearest',
validation_split=0.2 # 80% will be used for training, 20% for validation
)
# Load training and validation datasets
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(224, 224),
batch_size=64, # Increased batch size
class_mode='categorical',
subset='training' # Subset specified for training dataset
)
validation_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(224, 224),
batch_size=64, # Increased batch size
class_mode='categorical',
subset='validation' # Subset specified for validation dataset
)
# Create a list to load test data from a dataframe
test_data = [('acceptance', f'acceptance{i}.jpg') for i in range(1, 40)] + [('reject', f'reject{i}.jpg') for i in range(1, 40)]
# Create a dataframe
test_df = pd.DataFrame(test_data, columns=['class', 'filename'])
# Using ImageDataGenerator to load and preprocess the test dataset
test_datagen = ImageDataGenerator(rescale=1./255) # New datagen for the test dataset
test_generator = test_datagen.flow_from_dataframe(
test_df,
directory=test_dir,
x_col='filename',
y_col='class',
target_size=(224, 224),
batch_size=64,
class_mode='categorical',
shuffle=False # Step 1
)
# Train and evaluate each model by gradually increasing the learning rates
for lr_ratio in [0.010,0.011,0.012,0.013,0.014,0.015, 0.016, 0.017, 0.018, 0.019, 0.020, 0.021, 0.022,0.023,0.024,0.025]:
print(f"\nLearning Rate Ratio: {lr_ratio}")
# Set optimizer proportional to the learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001 * lr_ratio)
# Add pretrained models to the list
pretrained_models = [DenseNet121]
# For each model
for model_class in pretrained_models:
model_name = model_class.__name__
print(f"\n\n{model_name}\n{'=' * len(model_name)}")
# Set optimizer proportional to the learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001 * lr_ratio)
# Load the pretrained model
base_model = model_class(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# Expand the model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(32, activation='relu')(x)
predictions = Dense(len(train_generator.class_indices), activation='softmax')(x) # Use softmax instead of sigmoid
# Create a new expanded model
model = Model(inputs=base_model.input, outputs=predictions)
# Determine the percentage of layers to freeze
freeze_percentages = [0.3, 0.2, 0.1,0.05, 0]
for freeze_percentage in freeze_percentages:
print(f"\nFreeze Percentage: {freeze_percentage * 100}%")
# Freeze the layers at the specified percentage
freeze_layers = int(len(base_model.layers) * freeze_percentage)
for layer in base_model.layers[:freeze_layers]:
layer.trainable = False
# Compile the model
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
# Define callbacks
checkpoint = ModelCheckpoint(f'best_model_{model_name}_{lr_ratio}_{freeze_percentage}.h5', save_best_only=True, monitor='val_loss', mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, mode='min')
# Train the model
start_time = timeit.default_timer()
history = model.fit(
train_generator,
epochs=40,
validation_data=validation_generator,
callbacks=[checkpoint, early_stopping, reduce_lr],
workers=0
)
elapsed_time = timeit.default_timer() - start_time
print(f"Training time: {elapsed_time} seconds")
# Get accuracy values during training
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']
# Evaluate overall performance after a specific combination of learning rate and frozen percentage
test_loss, test_acc = model.evaluate(test_generator, steps=len(test_generator), workers=0)
print(f'Test accuracy for {model_name} with lr={lr_ratio} and {freeze_percentage * 100}% frozen layers: {test_acc}')
print(f'Training accuracy: {training_accuracy[-1]}') # Training accuracy in the last epoch
print(f'Validation accuracy: {validation_accuracy[-1]}') # Validation accuracy in the last epoch
print("=" * (len(model_name) + 60))
# Calculate confusion matrix
y_true = test_generator.classes
y_pred = np.argmax(model.predict(test_generator, workers=0), axis=1)
cm = confusion_matrix(y_true, y_pred)
# Visualize the matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', xticklabels=test_generator.class_indices.keys(), yticklabels=test_generator.class_indices.keys())
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
I'm using JupyterLab 3.6.3 btw.
Upvotes: 0
Views: 49