Reputation: 121
I am trying to train a regression RNN to predict the emotional valence (float between 1 and 5) from audio samples. Both LSTM and GRU models make normal predictions during the training loop, but when being evaluated, they always predict the same value.
I have tried to change the loss function (MSE, RMSE, MAE), different batch sizes, optimizers. There wasn't any noticeable difference. The model does optimize, the loss decreases. I also set model.train() during the eval loop and it does the same.
Models:
class GRUModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers):
super(GRUModel, self).__init__()
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
self.fc = nn.Sequential(nn.Linear(in_features=hidden_size, out_features=1))
def forward(self, x):
out, _ = self.gru(x)
out = self.fc(out[:, -1, :])
return out
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTMModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
self.fc = nn.Linear(hidden_size, output_size)
self.init_weights()
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
Train and eval loops:
def train_one_epoch():
all_loss = []
model.train()
with tqdm(train_loader, total=len(train_loader), desc="Training") as tqdm_train:
for batch in tqdm_train:
optimizer.zero_grad()
data = batch['audio_data'].unsqueeze(-1).cuda()
outputs = model(data)
target = batch['valence'].cuda()
# RMSE
loss = torch.sqrt(criterion(outputs.squeeze(), target))
loss.backward()
optimizer.step()
all_loss.append(loss.item())
tqdm_train.set_postfix(loss=mean(all_loss))
return mean(all_loss)
def test():
all_loss = []
predictions = []
model.eval()
with torch.no_grad():
with tqdm(val_loader, total=len(val_loader), desc="Testing") as tqdm_test:
for batch in tqdm_test:
data = batch['audio_data'].unsqueeze(-1).cuda()
outputs = model(data)
for i in outputs:
predictions.append(round(i.item(), 4))
target = batch['valence'].cuda()
loss = criterion(outputs.squeeze(), target).item()
all_loss.append(loss)
return mean(all_loss), predictions
Any feedback is appreciated!
Upvotes: 0
Views: 25