Arul
Arul

Reputation: 303

PyTorch network produces constant output

I am trying to train a simple MLP to approximate y=f(a,b,c). My code is as below.

import torch
import torch.nn as nn
from torch.autograd import Variable

# hyper parameters
input_size = 3
output_size = 1
num_epochs = 50
learning_rate = 0.001
    
# Network definition
class FeedForwardNet(nn.Module):
    def __init__(self, l1_size, l2_size):
      super(FeedForwardNet, self).__init__()
      self.fc1 = nn.Linear(input_size, l1_size) 
      self.relu1 = nn.ReLU()
      self.fc2 = nn.Linear(l1_size, l2_size) 
      self.relu2 = nn.ReLU()
      self.fc3 = nn.Linear(l2_size, output_size)
    
    def forward(self, x):
      out = self.fc1(x)
      out = self.relu1(out)
      out = self.fc2(out)
      out = self.relu2(out)
      out = self.fc3(out)
      return out

model = FeedForwardNet(5 , 3)  
    
# sgd optimizer
optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=0.9)

for epoch in range(11):
    print ('Epoch ', epoch)
    for i in range(trainX_light.shape[0]):
    X = Variable( torch.from_numpy(trainX_light[i]).view(-1, 3) )
    Y = Variable( torch.from_numpy(trainY_light[i]).view(-1, 1) )
    # forward
    optimizer.zero_grad()
    output = model(X)
    
    loss = (Y - output).pow(2).sum()
    print (output.data[0,0])
    loss.backward()
    optimizer.step()
    totalnorm = 0
    for p in model.parameters():
        modulenorm = p.grad.data.norm()
        totalnorm += modulenorm ** 2
        totalnorm = math.sqrt(totalnorm)
    
     print (totalnorm)
    
    # validation code
    if (epoch + 1) % 5 == 0:
    print (' test points',testX_light.shape[0])
    total_loss = 0
    for t in range(testX_light.shape[0]):
        X = Variable( torch.from_numpy(testX_light[t]).view(-1, 3) )
        Y = Variable( torch.from_numpy(testY_light[t]).view(-1, 1) )
        output = model(X)
        loss = (Y - output).pow(2).sum()
        print (output.data[0,0])
        total_loss += loss
    print ('epoch ', epoch, 'avg_loss ', total_loss.data[0] / testX_light.shape[0])

print ('Done')

The problem that I have now is, the validation code

output = model(X)

is always producing an exact same output value (I guess this value is some sort of garbage). I am not sure what mistake I am doing in this part. Could some help me figure out the mistake in my code?

Upvotes: 0

Views: 1181

Answers (1)

Arul
Arul

Reputation: 303

The reason that network produced random values (and inf later) was the exploding gradient problem. Clipping the gradient (torch.nn.utils.clip_grad_norm(model.parameters(), 0.1)) helped.

Upvotes: 1

Related Questions