user11140802
user11140802

Reputation:

TypeError: forward() missing 1 required positional argument: 'hidden'

I'm trying to visualize my GRU model using PyTorchViz but every time I run this code it gives me an error.

I want something like this in the

picture

import torch
from torch import nn
from torchviz import make_dot, make_dot_from_trace

model = IC_V6(f.tokens)
x = torch.randn(1,8)

make_dot(model(x), params=dict(model.named_parameters()))

here is my Class for holding the data

class Flickr8KImageCaptionDataset:

    def __init__(self):

        all_data = json.load(open('caption_datasets/dataset_flickr8k.json', 'r'))
        all_data=all_data['images']

        self.training_data = []
        self.test_data = []
        self.w2i = {ENDWORD: 0, STARTWORD: 1}
        self.word_frequency = {ENDWORD: 0, STARTWORD: 0}
        self.i2w = {0: ENDWORD, 1: STARTWORD}
        self.tokens = 2 #END is default
        self.batch_index = 0

        for data in all_data:
            if(data['split']=='train'):
                self.training_data.append(data)
            else:
                self.test_data.append(data)

            for sentence in data['sentences']:
                for token in sentence['tokens']:
                    if(token not in self.w2i.keys()):
                        self.w2i[token] = self.tokens
                        self.i2w[self.tokens] = token
                        self.tokens +=1
                        self.word_frequency[token] = 1
                    else:
                        self.word_frequency[token] += 1

    def image_to_tensor(self,filename):
        image = Image.open(filename)
        image = TF.resize(img=image, size=(HEIGHT,WIDTH))

        image = TF.to_tensor(pic=image)
        image = TF.normalize(image, mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

        return torch.unsqueeze(image,0)


    def return_train_batch(self): #size of 1 always

        #np.random.shuffle(self.training_data)
        for index in range(len(self.training_data)):

        #index = np.random.randint(len(self.training_data))
            sentence_index = np.random.randint(len(self.training_data[index]['sentences']))
            output_sentence_tokens = deepcopy(self.training_data[index]['sentences'][sentence_index]['tokens'])
            output_sentence_tokens.append(ENDWORD) #corresponds to end word

            image = self.image_to_tensor('/home/vincent/Documents/Final Code/Flicker8k_Dataset/'+self.training_data[index]['filename'])


            yield image, list(map(lambda x: self.w2i[x], output_sentence_tokens)), output_sentence_tokens, index

    def convert_tensor_to_word(self, output_tensor):
        output = F.log_softmax(output_tensor.detach().squeeze(), dim=0).numpy()
        return self.i2w[np.argmax(output)]

    def convert_sentence_to_tokens(self, sentence):

        tokens = sentence.split(" ")
        converted_tokens= list(map(lambda x: self.w2i[x], tokens))
        converted_tokens.append(self.w2i[ENDWORD])
        return converted_tokens

    def caption_image_greedy(self, net, image_filename, max_words=15): #non beam search, no temperature implemented
        net.eval()
        inception.eval()
        image_tensor = self.image_to_tensor(image_filename)
        hidden=None
        embedding=None
        words = []

        input_token = STARTWORD
        input_tensor = torch.tensor(self.w2i[input_token]).type(torch.LongTensor)


        for i in range(max_words):

            if(i==0):
                out, hidden=net(input_tensor, hidden=image_tensor, process_image=True)
            else:
                out, hidden=net(input_tensor, hidden)

            word = self.convert_tensor_to_word(out)

            input_token = self.w2i[word]
            input_tensor = torch.tensor(input_token).type(torch.LongTensor)


            if(word==ENDWORD):
                break
            else:
                words.append(word)

        return ' '.join(words)


    def forward_beam(self, net, hidden, process_image, partial_sentences, sentences, topn_words=5, max_sentences=10):

        max_words = 50

        hidden_index = {}

        while(sentences<max_sentences):

            #print("Sentences: ",sentences)

            new_partial_sentences = []
            new_partial_sentences_logp = []
            new_partial_avg_logp= []

            if(len(partial_sentences[-1][0])>max_words):
                break



            for partial_sentence in partial_sentences:

                input_token = partial_sentence[0][-1]

                input_tensor = torch.tensor(self.w2i[input_token]).type(torch.FloatTensor)

                if(partial_sentence[0][-1]==STARTWORD):
                    out, hidden=net(input_tensor, hidden, process_image=True)
                else:
                    out, hidden=net(input_tensor, torch.tensor(hidden_index[input_token]))

                #take first topn words and add as children to root

                out = F.log_softmax(out.detach().squeeze(), dim=0).numpy()
                out_indexes = np.argsort(out)[::-1][:topn_words]


                for out_index in out_indexes:

                    if(self.i2w[out_index]==ENDWORD):
                        sentences=sentences+1

                    else:

                        total_logp = float(out[out_index]) + partial_sentence[1]
                        new_partial_sentences_logp.append(total_logp)
                        new_partial_sentences.append([np.concatenate((partial_sentence[0], [self.i2w[out_index]])),total_logp])

                        len_words = len(new_partial_sentences[-1][0])



                        new_partial_avg_logp.append(total_logp/len_words)

                        #print(self.i2w[out_index])

                        hidden_index[self.i2w[out_index]] = deepcopy(hidden.detach().numpy())




            #select topn partial sentences

            top_indexes = np.argsort(new_partial_sentences_logp)[::-1][:topn_words]


            new_partial_sentences = np.array(new_partial_sentences)[top_indexes]

            #print("New partial sentences (topn):", new_partial_sentences)

            partial_sentences = new_partial_sentences


        return partial_sentences

    def caption_image_beam_search(self, net, image_filename, topn_words=10, max_sentences=10):

        net.eval()
        inception.eval()
        image_tensor = self.image_to_tensor(image_filename)
        hidden=None
        embedding=None
        words = []


        sentences = 0
        partial_sentences = [[[STARTWORD], 0.0]]
        #root_id = hash(input_token) #for start word
        #nodes = {}

        #nodes[root_id] = Node(root_id, [STARTWORD, 0], None)

        partial_sentences = self.forward_beam(net, image_tensor, True, partial_sentences, sentences,  topn_words, max_sentences)

        logp = []

        joined_sentences = []

        for partial_sentence in partial_sentences:


            joined_sentences.append([' '.join(partial_sentence[0][1:]),partial_sentence[1]])



        return joined_sentences

    def print_beam_caption(self, net, train_filename,num_captions=0):
        beam_sentences = f.caption_image_beam_search(net,train_filename)
        if(num_captions==0):
            num_captions=len(beam_sentences)
        for sentence in beam_sentences[:num_captions]:
            print(sentence[0]+" [",sentence[1], "]")

and here is my GRU model

class IC_V6(nn.Module):

    #V2: Fed image vector directly as hidden and fed words generated as iputs back to LSTM
    #V3: Added an embedding layer between words input and GRU/LSTM

    def __init__(self, token_dict_size):
        super(IC_V6, self).__init__()

        #Input is an image of height 500, and width 500


        self.embedding_size = INPUT_EMBEDDING
        self.hidden_state_size = HIDDEN_SIZE
        self.token_dict_size = token_dict_size
        self.output_size = OUTPUT_EMBEDDING

        self.batchnorm = nn.BatchNorm1d(self.embedding_size)

        self.input_embedding = nn.Embedding(self.token_dict_size, self.embedding_size)

        self.embedding_dropout = nn.Dropout(p=0.22)
        self.gru_layers = 3

        self.gru = nn.GRU(input_size=self.embedding_size, hidden_size=self.hidden_state_size, num_layers=self.gru_layers, dropout=0.22)
        self.linear = nn.Linear(self.hidden_state_size, self.output_size)
        self.out = nn.Linear(self.output_size, token_dict_size)

    def forward(self, input_tokens, hidden, process_image=False, use_inception=True):



        if(USE_GPU):
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        if(process_image):
            if(use_inception):
                inp=self.embedding_dropout(inception(hidden))
            else:
                inp=hidden
            #inp=self.batchnorm(inp)
            hidden=torch.zeros((self.gru_layers,1, self.hidden_state_size))
        else:
            inp=self.embedding_dropout(self.input_embedding(input_tokens.view(1).type(torch.LongTensor).to(device)))
            #inp=self.batchnorm(inp)


        hidden = hidden.view(self.gru_layers,1,-1)
        inp = inp.view(1,1,-1)

        out, hidden = self.gru(inp, hidden)
        out = self.out(self.linear(out))



        return out, hidden

this is how I called them:

f = Flickr8KImageCaptionDataset()
net = IC_V6(f.tokens)

the error is:

TypeError Traceback (most recent call last)
<ipython-input-42-7993fc1a032f> in <module>
      6 x = torch.randn(1,8)
      7 
----> 8 make_dot(model(x), params=dict(model.named_parameters()))

~/anaconda3/envs/Thesis/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

TypeError: forward() missing 1 required positional argument: 'hidden'

What should I do to solve this problem? Any help will be much appreciated.

Upvotes: 4

Views: 65303

Answers (1)

MBT
MBT

Reputation: 24169

I think the error message is pretty straight forward. You have two positional arguments input_tokens and hidden for your forward().

Python complains that one of them (hidden) is missing when you call your forward() function.

Looking at your code, you call your forward like this:

model(x)

So x is mapped to input_tokens, but you need to hand over a second argument hidden.

So you need to call it like this, providing your hidden state:

model(x, hidden)

Upvotes: 6

Related Questions