Reputation: 11
I am trying to convert an workbook I did some time ago on Colab (using ImageDataGenerator) to one that uses tf.data.dataset as I now have a multi-gpu set up and am trying to learn how to do faster training. The model trains on the age/ gender/ race dataset from Kaggle but in this instance we're interested in just the sex and age prediction. Sex will either be 0 or 1 and the loss function is binarycrossentropy while age is an integer between 0 and 120 and the loss function is mse at it is regression.
import tensorflow as tf
import os
AUTOTUNE = tf.data.AUTOTUNE
batch_size = 64
#Load datasets from directories
train_gen = tf.data.Dataset.list_files(os.listdir(training_dir), shuffle = False)
valid_gen = tf.data.Dataset.list_files(os.listdir(validation_dir), shuffle = False)
def decode_img(img):
#Convert compressed string into a 3D tensor
img = tf.io.decode_jpeg(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
#Resize the image to the desired size
return tf.image.resize(img, [128,128])
def get_label(file):
gender = get_sex(file) #returns either 0 or 1
age = get_age(file) #returns interger between 0 and about 120
return gender, age
def process_path(file):
file = file.numpy()
file_path = str(bytes.decode(file))
file = file_path.split(' ')[-1].split("\\")[-1]
labels = get_label(file)
# Load data from file as a String
img = tf.io.read_file(file_path)
img = decode_img(img)
img = img / 255.0
return img, labels
def _set_shapes(t1, t2):
t1.set_shape((128,128,3))
t2.set_shape((2,))
return (t1,t2)
train_gen = train_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32]), num_parallel_calls=AUTOTUNE)
valid_gen = valid_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32]), num_parallel_calls=AUTOTUNE)
train_gen = train_gen.map(_set_shapes,num_parallel_calls=AUTOTUNE)
valid_gen = valid_gen.map(_set_shapes, num_parallel_calls=AUTOTUNE)
train_gen = train_gen.batch(batch_size)
valid_gen = valid_gen.batch(batch_size)
train_gen
Output: <BatchDataset shapes: ((None, 128, 128, 3), (None, 2)), types: (tf.float32, tf.int32)>
#configure for performance
def config_for_performance(ds):
ds = ds.cache()
ds = ds.prefetch(buffer_size=AUTOTUNE)
return ds
train_gen = config_for_performance(train_gen)
valid_gen = config_for_performance(valid_gen)
The model itself:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Input, Activation, Flatten, BatchNormalization, PReLU
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import BinaryCrossentropy
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')
gpus = tf.config.list_logical_devices('GPU')
#print(gpus)
strategy = tf.distribute.MirroredStrategy(gpus,cross_device_ops=tf.distribute.ReductionToOneDevice())
with strategy.scope():
#Define the convolution layers
inp = Input(shape=(128,128,3))
cl1 = Conv2D(32,(3,3), padding='same', kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(inp)
bn1 = BatchNormalization()(cl1)
pr1 = PReLU(alpha_initializer='he_uniform')(bn1)
cl2 = Conv2D(32,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(pr1)
bn2 = BatchNormalization()(cl2)
pr2 = PReLU(alpha_initializer='he_uniform')(bn2)
mp1 = MaxPool2D((2,2))(pr2)
cl3 = Conv2D(64,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp1)
bn3 = BatchNormalization()(cl3)
pr3 = PReLU(alpha_initializer='he_uniform')(bn3)
cl4 = Conv2D(64,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(pr3)
bn4 = BatchNormalization()(cl4)
pr4 = PReLU(alpha_initializer='he_uniform')(bn4)
mp2 = MaxPool2D((2,2))(pr4)
cl5 = Conv2D(128,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp2)
bn5 = BatchNormalization()(cl5)
pr5 = PReLU(alpha_initializer='he_uniform')(bn5)
mp3 = MaxPool2D((2,2))(pr5)
cl6 = Conv2D(256,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp3)
bn6 = BatchNormalization()(cl6)
pr6 = PReLU(alpha_initializer='he_uniform')(bn6)
mp4 = MaxPool2D((2,2))(pr6)
cl7 = Conv2D(512,(3,3), padding='same',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(mp4)
bn7 = BatchNormalization()(cl7)
pr7 = PReLU(alpha_initializer='he_uniform')(bn7)
mp5 = MaxPool2D((2,2))(pr7)
flt = Flatten()(mp5)
#This layer predicts age
agelayer = Dense(128, activation='relu',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(flt)
agelayer = BatchNormalization()(agelayer)
agelayer = Dropout(0.6)(agelayer)
agelayer = Dense(1, activation='relu', name='age_output', kernel_initializer='he_uniform', dtype='float32')(agelayer)
#This layer predicts gender
glayer = Dense(128, activation='relu',kernel_regularizer=l2(0.001), kernel_initializer='he_uniform')(flt)
glayer = BatchNormalization()(glayer)
glayer = Dropout(0.5)(glayer)
glayer = Dense(1, activation='sigmoid', name='gender_output', kernel_initializer='he_uniform', dtype='float32')(glayer)
modelA = Model(inputs=inp, outputs=[glayer,agelayer])
model_folder = 'C:/Users/mm/OneDrive/Documents/Age estimation & gender classification/models'
if not os.path.exists(model_folder):
os.mkdir(model_folder)
#Callback to control learning rate during training. Reduces learning rate by 5% after 3 epochs of no improvement on validation loss
lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.95, patience=3,min_lr=0.000005)
#Callback to stop training if after 100 epochs of no improvement it stops and restores the best weights
es_callback = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True, min_delta=0.001)
#Compile Model A
modelA.compile(optimizer='Adam', loss={'gender_output': BinaryCrossentropy(), 'age_output': 'mse'}, metrics={'gender_output': 'accuracy', 'age_output':'mae'})
#Training Model A
history = modelA.fit(train_gen, epochs=100, validation_data=valid_gen, callbacks=[es_callback,lr_callback])
The error message:
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1') Epoch 1/100 INFO:tensorflow:Error reported to Coordinator: logits and labels must have the same shape ((None, 1) vs (None, 2)) Traceback (most recent call last): File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\ops\nn_impl.py", line 130, in sigmoid_cross_entropy_with_logits labels.get_shape().assert_is_compatible_with(logits.get_shape()) File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\framework\tensor_shape.py", line 1161, in assert_is_compatible_with raise ValueError("Shapes %s and %s are incompatible" % (self, other)) ValueError: Shapes (None, 2) and (None, 1) are incompatible
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\training\coordinator.py", line 297, in stop_on_exception yield File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\distribute\mirrored_run.py", line 346, in run self.main_result = self.main_fn(*self.main_args, **self.main_kwargs) File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\autograph\impl\api.py", line 692, in wrapper return converted_call(f, args, kwargs, options=options) File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\autograph\impl\api.py", line 382, in converted_call return _call_unconverted(f, args, kwargs, options) File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\autograph\impl\api.py", line 463, in _call_unconverted return f(*args, **kwargs) File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 835, in run_step outputs = model.train_step(data) show more (open the raw output data in a text editor) ...
File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\util\dispatch.py", line 206, in wrapper return target(*args, **kwargs) File "C:\Users\mm\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\ops\nn_impl.py", line 132, in sigmoid_cross_entropy_with_logits raise ValueError("logits and labels must have the same shape (%s vs %s)" % ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2))
Upvotes: 0
Views: 517
Reputation: 11
Managed to resove this with a bit of research and trial and error. Main issues are:
def process_path(file):
file = file.numpy()
file_path = str(bytes.decode(file))
file = file_path.split("\\")[-1]
gender, age = get_label(file)
# Load data from file as a String
img = tf.io.read_file(file_path)
img = decode_img(img)
img = img / 255.0
return img, gender, age
NB: I made a modification to extracting the labels from the filename as it wasn't getting it right all the time:
file = file_path.split("\\")[-1]
train_gen = train_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32, tf.int32]), num_parallel_calls=AUTOTUNE)
valid_gen = valid_gen.map(lambda x: tf.py_function(process_path, [x], [tf.float32, tf.int32, tf.int32]), num_parallel_calls=AUTOTUNE)
def _set_shapes(t1, t2, t3):
t1.set_shape((128,128,3))
t2.set_shape((1,))
t3.set_shape((1,))
t2 = tf.reshape(t2, [-1,1])
t3 = tf.reshape(t3, [-1,1])
return (t1,t2,t3)
Upvotes: 1