Cmark
Cmark

Reputation: 41

Pytorch Model always outputs 0.5 for an unkown reason

I have a pytorch model I'm trying to use to do facial recognition. I am using the same model structure, loss, and optimizer as a working code, but it seems like the backprop won't do anything, any output of the NN is just 0.5. Here is the code, any help/suggestions is/are appreciated.

import cv2
import numpy as np
import PIL
import torchvision.transforms as transforms
import torch
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F

devicet = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(devicet)
if devicet == 'cpu':
  print ('Using CPU')
else:
  print ('Using GPU')
cuda0 = torch.device('cuda:0')

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.step1 = nn.Linear(10000, 200)
        self.step2 = nn.Linear(200, 200)
        self.step3 = nn.Linear(200, 1)
    def forward(self, x):
      x = F.relu(x)
      x = self.step1(x)
      x = F.relu(x)
      x = self.step2(x)
      x = F.relu(x)
      x = self.step3(x)
      x = F.relu(x)
      x = torch.sigmoid(x)
      return (x)
net = Net()

transformer = transforms.ToTensor()
original_image = cv2.imread('group.jpg', cv2.IMREAD_GRAYSCALE)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
detected_faces = face_cascade.detectMultiScale(image=original_image, scaleFactor=1.05, minNeighbors=2)
tensor_collection_faces = []
tensor_collection_output = []
for (x, y, width, height) in detected_faces:
    im_pil = PIL.Image.fromarray(original_image)
    im_pil = im_pil.crop((x,y,x+width,y+height))
    im_pil = im_pil.resize((100,100))
    im_pil = PIL.ImageOps.grayscale(im_pil)
    curr_image_tens = (transformer(im_pil)).reshape((10000)).numpy()
    tensor_collection_faces.append(curr_image_tens)
    display(im_pil)
    tensor_collection_output.append([float(input('Expected Result: '))])
input_tensor = torch.tensor(tensor_collection_faces)
output_tensor = torch.tensor(tensor_collection_output)
loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
for i in tqdm(range(500), desc='Training'):
    y_pred = net(input_tensor)
    loss = loss_fn(y_pred, output_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
print (net(input_tensor))

The output of the code:

Training: 100%|██████████| 500/500 [00:11<00:00, 44.40it/s]tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward>)

Upvotes: 1

Views: 898

Answers (1)

Jay Mody
Jay Mody

Reputation: 4033

You applied both relu and sigmoid to your final output. In this case, you want to apply only sigmoid.

def forward(self, x):
    x = F.relu(x)
    x = self.step1(x)
    x = F.relu(x)
    x = self.step2(x)
    x = F.relu(x)
    x = self.step3(x)
    x = F.relu(x) # <---- delete this line
    x = torch.sigmoid(x)
    return (x)

What's happening is your network is outputting negative values in the last layer (before relu or sigmoid are applied), which when passed to relu go to 0. sigmoid(0) = 0.5, which is why you are seeing 0.5.

    x = self.step3(x)    # x = some negative value
    x = F.relu(x)        # relu(negative) = 0
    x = torch.sigmoid(x) # sigmoid(0) = 0.5

enter image description here

enter image description here

There might be other issues with your code, but it's hard to say without having access to the data/labels (or even toy data).

Upvotes: 2

Related Questions