Wassim Jaoui
Wassim Jaoui

Reputation: 95

Machine Translation FFN : Dimension problem due to window size

this is my first time creating a FFN to train it to translate French to English using word prediction: Input are two arrays of size 2 x window_size + 1 from source language and window_size target language. And the label of size 1

For e.g for window_size = 2:

["je","mange", "la", "pomme","avec"] 

and

 ["I", "eat"]

So the input of size [5] and [2] after concatenating => 7

Label: "the" (refering to "la" in French) The label is changed to one-hot-encoding before comparing with yHat

I'm using unique index for each word ( 1 to len(vocab) ) and train using the index (not the words) The output of the FFN is a probability of the size of the vocab of the target language

The problem is that the FFN doesn't learn and the accuracy stays at 0. When I print the size of y_final (target probability) and yHat (Model Hypo) they have different dimensions:

yHat.size()=[512, 7, 10212]

with 64 batch_size, 7 is the concatenated input size and 10212 size of target vocab, while

y_final.size()= [512, 10212]

And over all the forward method I have these sizes:

torch.Size([512, 5, 32])
torch.Size([512, 5, 64])
torch.Size([512, 5, 64])
torch.Size([512, 2, 256])
torch.Size([512, 2, 32])
torch.Size([512, 2, 64])
torch.Size([512, 2, 64])
torch.Size([512, 7, 64])
torch.Size([512, 7, 128])
torch.Size([512, 7, 10212])

Since the accuracy augments when yHat = y_final then I thought that it is never the case because they don't even have the same shapes (2D vs 3D). Is this the problem ? Please refer to the code and if you need any other info please tell me.

The code is working fine, no errors.

trainingData = TensorDataset(encoded_source_windows, encoded_target_windows, encoded_labels)
# print(trainingData)
batchsize = 512

trainingLoader = DataLoader(trainingData, batch_size=batchsize, drop_last=True)

def ffnModel(vocabSize1,vocabSize2, learningRate=0.01):
    class ffNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.embeds_src = nn.Embedding(vocabSize1, 256)
            self.embeds_target = nn.Embedding(vocabSize2, 256)
            # input layer
            self.inputSource = nn.Linear(256, 32)
            self.inputTarget = nn.Linear(256, 32)
            # hidden layer 1
            self.fc1 = nn.Linear(32, 64)
            self.bnormS = nn.BatchNorm1d(5)
            self.bnormT = nn.BatchNorm1d(2)
            # Layer(s) afer Concatenation:
            self.fc2 = nn.Linear(64,128)
            self.output = nn.Linear(128, vocabSize2)
            self.softmaaax = nn.Softmax(dim=0)
        # forward pass
        def forward(self, xSource, xTarget):
            xSource = self.embeds_src(xSource)
            xSource = F.relu(self.inputSource(xSource))
            xSource = F.relu(self.fc1(xSource))
            xSource = self.bnormS(xSource)
            xTarget = self.embeds_target(xTarget)
            xTarget = F.relu(self.inputTarget(xTarget))
            xTarget = F.relu(self.fc1(xTarget))
            xTarget = self.bnormT(xTarget)
            xCat = torch.cat((xSource, xTarget), dim=1)#dim=128 or 1  ?
            xCat = F.relu(self.fc2(xCat))
            print(xCat.size())
            xCat = self.softmaaax(self.output(xCat))
            return xCat 
    # creating instance of the class
    net = ffNetwork()
    # loss function
    lossfun = nn.CrossEntropyLoss()
    # lossfun = nn.NLLLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=learningRate)
    return net, lossfun, optimizer

def trainModel(vocabSize1,vocabSize2, learningRate):
    # number of epochs
    numepochs = 64
    # create a new Model instance
    net, lossfun, optimizer = ffnModel(vocabSize1,vocabSize2, learningRate)
    # initialize losses
    losses = torch.zeros(numepochs)
    trainAcc = []
    # loop over training data batches
    batchAcc = []
    batchLoss = []
    for epochi in range(numepochs):
        #Switching on training mode
        net.train()
        # loop over training data batches
        batchAcc = []
        batchLoss = []
        for A, B, y in tqdm(trainingLoader):
            # forward pass and loss
            final_y = []
            for i in range(y.size(dim=0)):
                yy = [0] * target_vocab_length
                yy[y[i]] = 1
                final_y.append(yy)
            final_y = torch.tensor(final_y)
            yHat = net(A, B)
            loss = lossfun(yHat, final_y)
            ################
            print("\n yHat.size()")
            print(yHat.size())
            print("final_y.size()")
            print(final_y.size())
            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # loss from this batch
            batchLoss.append(loss.item())
            print(f'batchLoss: {loss.item()}')
            #Accuracy calculator:
            matches = torch.argmax(yHat) == final_y  # booleans (false/true)                                
            matchesNumeric = matches.float()  # convert to numbers (0/1)
            accuracyPct = 100 * torch.mean(matchesNumeric)  # average and x100
            batchAcc.append(accuracyPct)  # add to list of accuracies
            print(f'accuracyPct: {accuracyPct}')

        trainAcc.append(np.mean(batchAcc))

        losses[epochi] = np.mean(batchLoss)
    return trainAcc,losses,net

trainAcc,losses,net = trainModel(len(source_vocab),len(target_vocab), 0.01)
print(trainAcc)

Upvotes: 1

Views: 83

Answers (0)

Related Questions