Reputation: 137
I'm trying to create a binary classifier with TensorFlow, capable of detecting with a person is wearing a surgical Mask. The thing is, I've created a dataset for training containing 2.2k cropped faces of people wearing masks and another 2.2k cropped faces of people NOT wearing masks (my face included). Trainned the CNN for 100 epochs(almost 24hrs) and got an accuracy around 84.25%(val_acc). But my CNN still predicting wrong most of the time! I've tried more than a dozen CNN architectures and even Transfer Learning using MobileNetV2 but my results still pretty bad. Am I doing something wrong?
Part of my training code is:
IMG_SIZE = 200 # Dimensões das imagens
batch_size = 50 # Quantidade de dados que serão alimentados à NN de uma só vez
epochs = 100 # Quantidade de vezes que os dados de treinamento serão passados à NN
training_data = []
### PREPARA DADOS COM AUGMENTATION PARA ALIMENTAR A NN ###
#Cria Geradores de Dados - Augmentation = Zoom, Horizontal Flip, Rotate 45°
train_datagen = ImageDataGenerator(rescale=1.0 / 255.0,
shear_range= 0.2,
zoom_range= 0.2,
rotation_range= 45,
width_shift_range=0.1,
height_shift_range=0.1,
horizontal_flip=True,
fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
# Prepara os Iteradores
train_it = train_datagen.flow_from_directory(directory=TRAIN_DIR,
class_mode='binary',
batch_size=batch_size,
target_size=(IMG_SIZE, IMG_SIZE)
)
test_it = test_datagen.flow_from_directory(directory=VALIDATION_DIR,
class_mode='binary',
batch_size=batch_size,
target_size=(IMG_SIZE, IMG_SIZE)
)
############################################
model = Sequential([
Conv2D(32, kernel_size=(3,3), padding='same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
Conv2D(32, kernel_size=(3,3), padding='same', activation='relu'),
MaxPooling2D(pool_size=(2,2), strides=2),
Dropout(0.2),
Conv2D(64, kernel_size=(3,3), activation='relu'),
Conv2D(64, kernel_size=(3, 3), activation='relu'),
BatchNormalization(),
MaxPooling2D(pool_size=(2, 2), strides=2),
Dropout(0.3),
Flatten(),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
############ COMPILA NOVA REDE NEURAL ############
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
initial_learning_rate= 0.0000001,
decay_steps= (total_val//batch_size)*1000,
decay_rate=1,
staircase= False
)
adam = Adam(learning_rate=lr_schedule) #adam = Adam(learning_rate=1.0e-6, decay= 1.0e-4 / epochs, epsilon=1)#acrescentado epsilon
model.compile(optimizer=adam,
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model.summary()
##################################################
############ PERFORMS THE NN TRAINING ############
# fit model
history = model.fit(train_it,
steps_per_epoch=total_train // batch_size,
validation_data=test_it,
validation_steps=total_val // batch_size,
epochs=epochs,
callbacks=[checkpoint_callback],
shuffle=True,
)
And the evaluation code:
####################################################################
#### IMAGE ANALYSIS FUNC. RETURNS RESULT ON VAR result ####
def img_analysis(x, y, w, h):
for image_path in os.listdir(WEBCAM_TEST): # Itera entre as imagens contidas no diretório WEBCAM_TEST
load_path = os.path.join(WEBCAM_TEST, image_path) # Define a variavel 'load_path' como sendo o caminho do diretório 'WEBCAM_TEST' + o nome do arquivo
img = load_img(load_path, target_size=(IMG_SIZE, IMG_SIZE)) #Carrega imagem nas dimensões declaradas em IMG_SIZE
#img_preview = img
img = img_to_array(img) #Converte imagem em um array do NumPy
img = img.reshape(1, IMG_SIZE, IMG_SIZE, 3) #Redimensiona imagem
img = img.astype('float32') #Converte imagem para Float
result = model.predict(img) #Realiza previsão
return result, load_path
###########################################################################################
#### DRAWS A RECTANGLE ON THE DETECTED FACES WITH THE RESULT ####
def show_rectangle(result,load_path, x, y, w, h):
if result == 0:
#print('Mask NOK -', CATEGORIES[1])
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.putText(frame, 'SEM MASCARA',
org=(x, y - 10),
fontFace=cv2.FONT_HERSHEY_DUPLEX,
fontScale=0.5,
color=(0, 0, 255)
)
path = os.path.join(SAVED_IMG, 'MaskOff')
try:
shutil.copy(load_path, path)
except:
pass
if result == 1:
#print('Mask OK -', CATEGORIES[0])
cv2.rectangle(frame, (x, fy), (x + w, y + h), (0, 255, 0), 2)
cv2.putText(frame, 'COM MASCARA',
org=(x, y - 10),
fontFace=cv2.FONT_HERSHEY_DUPLEX,
fontScale=0.5,
color=(0, 255, 0)
)
path = os.path.join(SAVED_IMG, 'MaskOn')
try:
shutil.copy(load_path, path)
except:
pass
#############################################################################
################ WEBCAM CONFIG. ################
video = cv2.VideoCapture(0) # Cria objeto de captura da webcam
video.set(cv2.CAP_PROP_FRAME_WIDTH, 640) # 1366 Define largura do video
video.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) # 760 Define altura do video
################ FACE DETECTION METHOD ################
face_cascade = cv2.CascadeClassifier(HOME_DIR + 'cascade/haarcascade_frontalface_default.xml') ###########################################################################
model = tf.keras.models.load_model(HOME_DIR + 'best_TL.hdf5')
while (True):
conectado, frame = video.read() # Método para leitura da webcam (conectado é variável booleana indicando conexão; e frame é o objeto de leitura da webcam)
frame = cv2.resize(frame, (640,480), fx=0, fy=0, interpolation=cv2.INTER_CUBIC) #Redimensiona a imagem da câmera
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Cria novo objeto a partir do objeto frame e converte as cores em escala de cinza
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=3) #Método para detecção do rosto
for (fx, fy, fw, fh) in faces: #Itera entre os itens da lista faces
fh = int(fh * 1.2) #Corrige altura do quadro
roi_face_color = frame[fy:fy + fh, fx:fx + fw] #Cria Region of Interest para o rosto
roi_colorQ.put(roi_face_color) #Coloca o frame do rosto na fila
img_ready = any(os.listdir(WEBCAM_TEST)) # Teste para verificar se há imagens para verificar
if img_ready == True:
result, load_path = img_analysis(fx, fy, fw, fh) # Chama função para análise da fotografia
show_rectangle(result, load_path, fx, fy, fw, fh) # Chama função para desenho do retângulo no rosto
print('The Result is: ', result)
cv2.imshow('MaskDetector - Thread - Press "q" to quit',frame) # Exibe imagem do objeto frame em uma janela chamada Video
if cv2.waitKey(1) == ord('q'): # Cria laço condicional para manter a janela aberta até que uma tecla seja pressionada, nesse caso 'q' - 0 para qualquer tecla, 1 para tecla específica
break # Fecha a janela
### MEMORY RELEASE AND END OF THE PROGRAM ###
video.release() # Libera a captura
cv2.destroyAllWindows() # Fecha a janela e libera a memória
Upvotes: 0
Views: 684
Reputation: 137
I figured that my Dataset wasn't appropriate. I downloaded thousands of images of people wearing surgical masks and them I passed those images through a face extract algorithm to build a dataset containing just the face.
But unfortunately that strategy didn't work very well with me. Even though my accuracy was around 85% my CNN still predicting wrong moust of the time. The solution was to build a new dataset, composed by 4.4k pictures of my self. My accuracy increased up to 98,8% and the predictions were finally ok, as you can see here: https://www.youtube.com/watch?v=QM41tMJSrBE
Upvotes: 1
Reputation: 138
You don't seem to normalise the data. It's recommended you do that in the training and inference process.
Upvotes: 0