Leon Wang
Leon Wang

Reputation: 188

Self-defined tensorflow decoder TypeError: __call__() missing 1 required positional argument: 'inputs'

I am using tensorflow 2.0 for training my own attention model, however I ran into one big issue when building my decoder class, like this

TypeError                                 Traceback (most recent call last)
<ipython-input-19-3042369c4295> in <module>
      9     enc_hidden_h=fw_sample_state_h,
     10     enc_hidden_c=fw_sample_state_c,
---> 11     enc_output=sample_output)
     12 
     13 print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

TypeError: __call__() missing 1 required positional argument: 'inputs'

And my encoder-decoder attention model is like below Encoder: which is a self-defined pBLSTM

class Encoder(tf.keras.Model):
    def __init__(self, lstm_units, final_units, batch_sz, conv_filters, mfcc_dims):
        super(Encoder, self).__init__()
        self.lstm_units = lstm_units
        self.final_units = final_units
        self.batch_sz = batch_sz
        self.conv_filters = conv_filters
        self.mfcc_dims = mfcc_dims

        # Convolution layer to extract feature after MFCC
        self.conv_feat = tf.keras.layers.Conv1D(filters=self.conv_filters, 
                                                kernel_size=self.mfcc_dims, 
                                                padding='valid', 
                                                activation='relu', 
                                                strides=self.mfcc_dims)

    def call(self, x):
        '''
        build a pyramidal LSTM neural network encoder
        '''
        # Convolution Feature Extraction
        x = self.conv_feat(x)

        # initialize states for forward and backward
        initial_state_fw = None
        initial_state_bw = None

        counter = 0
        while(x.shape[1] > self.final_units):
            counter += 1
            # forward LSTM
            fw_output, fw_state_h, fw_state_c = self.build_lstm(True)(x, initial_state=initial_state_fw)

            # backward LSTM
            bw_output, bw_state_h, bw_state_c = self.build_lstm(False)(x, initial_state=initial_state_bw)

            x = tf.concat([fw_output, bw_output], -1)
            x = self.reshape_pyramidal(x)

            initial_state_fw = [fw_state_h, fw_state_c]
            initial_state_bw = [bw_state_h, bw_state_c]

        print(f"Encoder pyramid layer number: {counter}\n")
        return x, (fw_state_h, fw_state_c), (bw_state_h, bw_state_c)

    def build_lstm(self, back=True):
        '''
        build LSTM layer for forward and backward
        '''
        return tf.keras.layers.LSTM(units=self.lstm_units, 
                                    return_sequences=True, 
                                    return_state=True, 
                                    go_backwards=back)

    def reshape_pyramidal(self, outputs):
        '''
        After concatenating forward and backward outputs
        return the reshaped output
        '''
        batch_size, time_steps, num_units = outputs.shape

        return tf.reshape(outputs, (batch_size, -1, num_units * 2))

Attention Model: which is built following this paper: https://arxiv.org/abs/1508.04025v5

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        # query hidden state shape == (batch_size, hidden size)
        # query_with_time_axis shape == (batch_size, 1, hidden size)
        # values shape == (batch_size, max_len, hidden size)
        # we are doing this to broadcast addition along the time axis to calculate the score
        query_with_time_axis = tf.expand_dims(query, 1)

        # score shape == (batch_size, max_length, 1)
        # we get 1 at the last axis because we are applying score to self.V
        # the shape of the tensor before applying self.V is (batch_size, max_length, units)
        score = self.V(tf.nn.tanh(
            self.W1(query_with_time_axis) + self.W2(values)))

        # attention_weights shape == (batch_size, max_length, 1)
        attention_weights = tf.nn.softmax(score, axis=1)

        # context_vector shape after sum == (batch_size, hidden_size)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

Decoder: A 1-layer LSTM decoder

class Decoder(tf.keras.Model):
    def __init__(self, target_sz, embedding_dim, decoder_units, batch_sz, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.batch_sz = batch_sz
        self.decoder_units = decoder_units
        self.embedding = tf.keras.layers.Embedding(target_sz, embedding_dim)
        self.attention = BahdanauAttention(self.decoder_units)
        self.lstm = tf.keras.layers.LSTM(units=self.decoder_units, return_sequences=True, return_state=True)
        self.fc = tf.keras.layers.Dense(target_sz)


    def call(self, x, enc_hidden_h, enc_hidden_c, enc_output):
        '''
        build LSTM decoder
        '''
        # enc_output shape == (batch_size, max_length, hidden_size)
        context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)

        # x shape after passing through embedding == (batch_size, 1, embedding_dim)
        x = self.embedding(x)

        # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        # passing the concatenated vector to the LSTM
        output, state_h, state_c = self.lstm(x)

        # output shape == (batch_size * 1, hidden_size)
        output = tf.reshape(output, (-1, output.shape[-1]))

        # output shape == (batch_size, vocab)
        x = self.fc(output)

        return x, (state_h, state_c), attention_weights

I ran into that error when testing with my example input like below

example_input_batch, example_target_batch = next(iter(dataset))
sample_output, (fw_sample_state_h, fw_sample_state_c), bw_sample_state = encoder(example_input_batch)
decoder = Decoder(target_sz=PHONEME_SIZE, 
                  embedding_dim=EMBEDDING_DIM, 
                  decoder_units=LSTM_UNITS, 
                  batch_sz=BATCH_SIZE)

sample_target_size = tf.random.uniform((BATCH_SIZE, 1))
sample_decoder_output, sample_decoder_hidden, attention_weights = decoder(
    x=sample_target_size, 
    enc_hidden_h=fw_sample_state_h, 
    enc_hidden_c=fw_sample_state_c, 
    enc_output=sample_output)

Upvotes: 2

Views: 7496

Answers (1)

Anwarvic
Anwarvic

Reputation: 12992

As discussed in the comments, the problem was that the poster was inheriting from tf.keras.Model while creating the Decoder() class. And this superclass was expecting an inputs argument in the __call__() operator.

So, this error can be resolved by change x to inputs in the Decoder.call() method like so:

def call(self, inputs, enc_hidden_h, enc_hidden_c, enc_output):
    '''
    build LSTM decoder
    '''
    # enc_output shape == (batch_size, max_length, hidden_size)
    context_vector, attention_weights = self.attention(enc_hidden_h, enc_output)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(inputs)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the LSTM
    output, state_h, state_c = self.lstm(x)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[-1]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, (state_h, state_c), attention_weights

Upvotes: 2

Related Questions