AlexMacabu
AlexMacabu

Reputation: 137

Good accuracy, bad predictions - CNN with TensorFlow Python

I'm trying to create a binary classifier with TensorFlow, capable of detecting with a person is wearing a surgical Mask. The thing is, I've created a dataset for training containing 2.2k cropped faces of people wearing masks and another 2.2k cropped faces of people NOT wearing masks (my face included). Trainned the CNN for 100 epochs(almost 24hrs) and got an accuracy around 84.25%(val_acc). But my CNN still predicting wrong most of the time! I've tried more than a dozen CNN architectures and even Transfer Learning using MobileNetV2 but my results still pretty bad. Am I doing something wrong?

Part of my training code is:

IMG_SIZE = 200  # Dimensões das imagens
batch_size = 50  # Quantidade de dados que serão alimentados à NN de uma só vez
epochs = 100  # Quantidade de vezes que os dados de treinamento serão passados à NN

training_data = []

### PREPARA DADOS COM AUGMENTATION PARA ALIMENTAR A NN  ###
#Cria Geradores de Dados - Augmentation = Zoom, Horizontal Flip, Rotate 45°
train_datagen = ImageDataGenerator(rescale=1.0 / 255.0,
                                   shear_range= 0.2,
                                   zoom_range= 0.2,
                                   rotation_range= 45,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   horizontal_flip=True,
                                   fill_mode='nearest'
                                   )

test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Prepara os Iteradores
train_it = train_datagen.flow_from_directory(directory=TRAIN_DIR,
                                             class_mode='binary',
                                             batch_size=batch_size,
                                             target_size=(IMG_SIZE, IMG_SIZE)
                                             )
test_it = test_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                           class_mode='binary',
                                           batch_size=batch_size,
                                           target_size=(IMG_SIZE, IMG_SIZE)
                                           )
############################################

model = Sequential([
    Conv2D(32, kernel_size=(3,3), padding='same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    Conv2D(32, kernel_size=(3,3), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2,2), strides=2),
    Dropout(0.2),
    Conv2D(64, kernel_size=(3,3), activation='relu'),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2), strides=2),
    Dropout(0.3),
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation='sigmoid')

])


############   COMPILA NOVA REDE NEURAL    ############
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    initial_learning_rate= 0.0000001,
    decay_steps= (total_val//batch_size)*1000,
    decay_rate=1,
    staircase= False

)

adam = Adam(learning_rate=lr_schedule) #adam = Adam(learning_rate=1.0e-6, decay= 1.0e-4 / epochs, epsilon=1)#acrescentado epsilon

model.compile(optimizer=adam,
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()
##################################################
############   PERFORMS THE NN TRAINING    ############

# fit model
history = model.fit(train_it,
                              steps_per_epoch=total_train // batch_size,
                              validation_data=test_it,
                              validation_steps=total_val // batch_size,
                              epochs=epochs,
                              callbacks=[checkpoint_callback],
                              shuffle=True,
                              )

And the evaluation code:

####################################################################
#### IMAGE ANALYSIS FUNC. RETURNS RESULT ON VAR result ####
def img_analysis(x, y, w, h):
    for image_path in os.listdir(WEBCAM_TEST):  # Itera entre as imagens contidas no diretório WEBCAM_TEST
        load_path = os.path.join(WEBCAM_TEST, image_path)  # Define a variavel 'load_path' como sendo o caminho do diretório 'WEBCAM_TEST' + o nome do arquivo
        img = load_img(load_path, target_size=(IMG_SIZE, IMG_SIZE)) #Carrega imagem nas dimensões declaradas em IMG_SIZE
        #img_preview = img
        img = img_to_array(img) #Converte imagem em um array do NumPy
        img = img.reshape(1, IMG_SIZE, IMG_SIZE, 3) #Redimensiona imagem
        img = img.astype('float32') #Converte imagem para Float
        result = model.predict(img) #Realiza previsão
        return result, load_path
###########################################################################################
#### DRAWS A RECTANGLE ON THE DETECTED FACES WITH THE RESULT ####
def show_rectangle(result,load_path, x, y, w, h):
    if result == 0:
        #print('Mask NOK -', CATEGORIES[1])
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
        cv2.putText(frame, 'SEM MASCARA',
                    org=(x, y - 10),
                    fontFace=cv2.FONT_HERSHEY_DUPLEX,
                    fontScale=0.5,
                    color=(0, 0, 255)
                    )
        path = os.path.join(SAVED_IMG, 'MaskOff')
        try:
            shutil.copy(load_path, path)
        except:
            pass

    if result == 1:
        #print('Mask OK -', CATEGORIES[0])
        cv2.rectangle(frame, (x, fy), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(frame, 'COM MASCARA',
                    org=(x, y - 10),
                    fontFace=cv2.FONT_HERSHEY_DUPLEX,
                    fontScale=0.5,
                    color=(0, 255, 0)
                    )
        path = os.path.join(SAVED_IMG, 'MaskOn')
        try:
            shutil.copy(load_path, path)
        except:
            pass
#############################################################################

################  WEBCAM CONFIG. ################
video = cv2.VideoCapture(0)  # Cria objeto de captura da webcam
video.set(cv2.CAP_PROP_FRAME_WIDTH, 640)  # 1366 Define largura do video
video.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)  # 760 Define altura do video

################  FACE DETECTION METHOD  ################
face_cascade = cv2.CascadeClassifier(HOME_DIR + 'cascade/haarcascade_frontalface_default.xml')    ###########################################################################
model = tf.keras.models.load_model(HOME_DIR + 'best_TL.hdf5')

while (True):
    conectado, frame = video.read()  # Método para leitura da webcam (conectado é variável booleana indicando conexão; e frame é o objeto de leitura da webcam)
    frame = cv2.resize(frame, (640,480), fx=0, fy=0, interpolation=cv2.INTER_CUBIC) #Redimensiona a imagem da câmera
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Cria novo objeto a partir do objeto frame e converte as cores em escala de cinza
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=3) #Método para detecção do rosto
    for (fx, fy, fw, fh) in faces: #Itera entre os itens da lista faces
        fh = int(fh * 1.2) #Corrige altura do quadro
        roi_face_color = frame[fy:fy + fh, fx:fx + fw] #Cria Region of Interest para o rosto
        roi_colorQ.put(roi_face_color) #Coloca o frame do rosto na fila
        img_ready = any(os.listdir(WEBCAM_TEST)) # Teste para verificar se há imagens para verificar
        if img_ready == True:
            result, load_path = img_analysis(fx, fy, fw, fh)  # Chama função para análise da fotografia
            show_rectangle(result, load_path, fx, fy, fw, fh) # Chama função para desenho do retângulo no rosto
            print('The Result is: ', result)
    cv2.imshow('MaskDetector - Thread - Press "q" to quit',frame)  # Exibe imagem do objeto frame em uma janela chamada Video
    if cv2.waitKey(1) == ord('q'):  # Cria laço condicional para manter a janela aberta até que uma tecla seja pressionada, nesse caso 'q' - 0 para qualquer tecla, 1 para tecla específica
        break  # Fecha a janela

### MEMORY RELEASE AND END OF THE PROGRAM ###
video.release()  # Libera a captura
cv2.destroyAllWindows()  # Fecha a janela e libera a memória

Upvotes: 0

Views: 684

Answers (2)

AlexMacabu
AlexMacabu

Reputation: 137

I figured that my Dataset wasn't appropriate. I downloaded thousands of images of people wearing surgical masks and them I passed those images through a face extract algorithm to build a dataset containing just the face.

But unfortunately that strategy didn't work very well with me. Even though my accuracy was around 85% my CNN still predicting wrong moust of the time. The solution was to build a new dataset, composed by 4.4k pictures of my self. My accuracy increased up to 98,8% and the predictions were finally ok, as you can see here: https://www.youtube.com/watch?v=QM41tMJSrBE

Upvotes: 1

Vlad Sirbu
Vlad Sirbu

Reputation: 138

You don't seem to normalise the data. It's recommended you do that in the training and inference process.

Upvotes: 0

Related Questions