Reputation: 14067
I am trying to concatenate embedding layer with other features. It doesn’t give me any error, but doesn’t do any training either. Is anything wrong with this model definition, how to debug this?
Note: The last column (feature) in my X is feature with word2ix (single word). Note: The net works fine without the embedding feature/layer
originally posted on pytorch forum
class Net(torch.nn.Module):
def __init__(self, n_features, h_sizes, num_words, embed_dim, out_size, dropout=None):
super().__init__()
self.num_layers = len(h_sizes) # hidden + input
self.embedding = torch.nn.Embedding(num_words, embed_dim)
self.hidden = torch.nn.ModuleList()
self.bnorm = torch.nn.ModuleList()
if dropout is not None:
self.dropout = torch.nn.ModuleList()
else:
self.dropout = None
for k in range(len(h_sizes)):
if k == 0:
self.hidden.append(torch.nn.Linear(n_features, h_sizes[0]))
self.bnorm.append(torch.nn.BatchNorm1d(h_sizes[0]))
if self.dropout is not None:
self.dropout.append(torch.nn.Dropout(p=dropout))
else:
if k == 1:
input_dim = h_sizes[0] + embed_dim
else:
input_dim = h_sizes[k-1]
self.hidden.append(torch.nn.Linear(input_dim, h_sizes[k]))
self.bnorm.append(torch.nn.BatchNorm1d(h_sizes[k]))
if self.dropout is not None:
self.dropout.append(torch.nn.Dropout(p=dropout))
# Output layer
self.out = torch.nn.Linear(h_sizes[-1], out_size)
def forward(self, inputs):
# Feedforward
for l in range(self.num_layers):
if l == 0:
x = self.hidden[l](inputs[:, :-1])
x = self.bnorm[l](x)
if self.dropout is not None:
x= self.dropout[l](x)
embeds = self.embedding(inputs[:,-1])#.view((1, -1)
x = torch.cat((embeds, x),dim=1)
else:
x = self.hidden[l](x)
x = self.bnorm[l](x)
if self.dropout is not None:
x = self.dropout[l](x)
x = F.relu(x)
output= self.out(x)
return output
Upvotes: 2
Views: 10752
Reputation: 14067
There were a few issues. The key one was data type. I mixed float features and int indices.
sample data and training before fix:
NUM_TARGETS = 4
NUM_FEATURES = 3
NUM_TEXT_FEATURES = 1
x = np.random.rand(5, NUM_FEATURES)
y = np.random.rand(5, NUM_TARGETS)
word_ix = np.arange(5).reshape(-1,1).astype(int)
x_train = np.append(x, word_ix, axis=1)
x_train = torch.from_numpy(x).float().to(device)
y_train = torch.from_numpy(y).float().to(device)
h_sizes = [2,2]
net = Net(x_train.shape[1] , h_sizes=h_sizes, num_words=5, embed_dim=2, out_size=y_train.shape[1],dropout=.01) # define the network
print(net) # net architecture
net = net.float()
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001, weight_decay=.01)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
# one training loop
prediction = net(x_train) # input x and predict based on x
loss = loss_func(prediction, y_train) # must be (1. nn output, 2. target)
optimizer.zero_grad() # clear gradients for next train
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
# train_losses.append(loss.detach().to('cpu').numpy())
To resolve this, I separated word index feature from x, and also removed net.float()
.
changed the dtypes conversion to:
x_train = torch.from_numpy(x).float().to(device)
y_train = torch.from_numpy(y).float().to(device)
# NOTE: word index needs to be long
word_ix = torch.from_numpy(word_ix).to(torch.long).to(device)
and forward
method changed to :
def forward(self, inputs, word_ix):
# Feedforward
for l in range(self.num_layers):
if l == 0:
x = self.hidden[l](inputs)
x = self.bnorm[l](x)
if self.dropout is not None:
x = self.dropout[l](x)
embeds = self.embedding(word_ix)
# NOTE:
# embeds has a shape of (batch_size, 1, embed_dim)
# inorder to merge this change this with x, reshape this to
# (batch_size, embed_dim)
embeds = embeds.view(embeds.shape[0], embeds.shape[2])
x = torch.cat((x, embeds.view(x.shape)),dim=1)
else:
x = self.hidden[l](x)
x = self.bnorm[l](x)
if self.dropout is not None:
x = self.dropout[l](x)
x = F.relu(x)
output= self.out(x)
return output
Upvotes: 5