Both RNN regression models (LSTM and GRU) predict the same value during evaluation but predicts normally during training

Question

I am trying to train a regression RNN to predict the emotional valence (float between 1 and 5) from audio samples. Both LSTM and GRU models make normal predictions during the training loop, but when being evaluated, they always predict the same value.

I have tried to change the loss function (MSE, RMSE, MAE), different batch sizes, optimizers. There wasn't any noticeable difference. The model does optimize, the loss decreases. I also set model.train() during the eval loop and it does the same.

Models:

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
        self.fc = nn.Sequential(nn.Linear(in_features=hidden_size, out_features=1))
        
    def forward(self, x):
        out, _ = self.gru(x)    
        out = self.fc(out[:, -1, :])
        return out

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
        self.fc = nn.Linear(hidden_size, output_size)
        
        self.init_weights()

    def forward(self, x):

        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out

Train and eval loops:

def train_one_epoch():
    all_loss = []
    model.train()
    with tqdm(train_loader, total=len(train_loader), desc="Training") as tqdm_train:
        for batch in tqdm_train:

            optimizer.zero_grad()
            data = batch['audio_data'].unsqueeze(-1).cuda()
            outputs = model(data)

            target = batch['valence'].cuda()
            # RMSE
            loss = torch.sqrt(criterion(outputs.squeeze(), target))

            loss.backward()
            optimizer.step()    

            all_loss.append(loss.item())
            tqdm_train.set_postfix(loss=mean(all_loss))
     return mean(all_loss)

def test():
    all_loss = []
    predictions = []
    model.eval()
    with torch.no_grad():
        with tqdm(val_loader, total=len(val_loader), desc="Testing") as tqdm_test:
            for batch in tqdm_test:
                
                data = batch['audio_data'].unsqueeze(-1).cuda()
                outputs = model(data)

                for i in outputs:
                    predictions.append(round(i.item(), 4))
                
                target = batch['valence'].cuda()
                loss = criterion(outputs.squeeze(), target).item()

                all_loss.append(loss)
    return mean(all_loss), predictions

Any feedback is appreciated!

Both RNN regression models (LSTM and GRU) predict the same value during evaluation but predicts normally during training

Answers (0)

Related Questions