TKT
TKT

Reputation: 31

Calling a basic LSTM cell within a custom Tensorflow cell

I'm trying to implement the MATCH LSTM from this paper: https://arxiv.org/pdf/1608.07905.pdf

I'm using Tensorflow. One part of the architecture is an RNN that uses the input and the previous state to compute an attention vector which it applies to a context before concatenating the result with the inputs and sending them into an LSTM. To build the first part of this RNN, I wrote a custom cell for Tensorflow to call. But I'm not sure how to send the results into an LSTM. Is it possible to call the basic LSTM cell within the custom cell I'm writing? I tried this a few different ways but kept getting the error "module' object has no attribute 'rnn_cell'" at the line where the LSTM cell is called. Any help would be much appreciated!

EDIT to add code:

import numpy as np import tensorflow as tf

class MatchLSTMCell(tf.contrib.rnn.RNNCell):

def __init__(self, state_size, question_tensor, encoded_questions, batch_size):
    self._state_size = state_size
    self.question_tensor = question_tensor
    self.encoded_questions = encoded_questions
    self.batch_size = batch_size

@property
def state_size(self):
    return self._state_size

@property
def output_size(self):
    return self._state_size

def __call__(self, inputs, state, scope=None):
    scope = scope or type(self).__name__

    with tf.variable_scope(scope):

        W_p = tf.get_variable("W_p", dtype=tf.float64, shape=[self.state_size, self.state_size], initializer=tf.contrib.layers.xavier_initializer())
        W_r = tf.get_variable("W_r", dtype=tf.float64, shape=[self.state_size, self.state_size], initializer=tf.contrib.layers.xavier_initializer())
        b_p = tf.get_variable("b_p", dtype=tf.float64, shape=[self.state_size])
        w = tf.get_variable("w", dtype=tf.float64, shape=[1,self.state_size])
        b = tf.get_variable("b", dtype=tf.float64, shape=[])

        #print 'question tensor', np.shape(self.question_tensor)
        #print 'inputs', np.shape(inputs)
        #print 'insides', np.shape(tf.matmul(inputs, W_p) + tf.matmul(state, W_r) + b_p)
        G = tf.nn.tanh(
                       tf.transpose(tf.transpose(self.question_tensor, perm=[1,0,2]) + 
                       (tf.matmul(inputs, W_p) + tf.matmul(state, W_r) + b_p), perm=[1,0,2])
                       )
        #print 'big G', np.shape(G)

        attention_list = []
        for i in range(self.batch_size):
            attention_matrix = tf.matmul(G[i,:,:], tf.transpose(w))
            attention_list.append(attention_matrix)
        attention_scores = tf.stack(attention_list)
        a = tf.nn.softmax(attention_scores + b)
        a = tf.reshape(a, [self.batch_size, -1])
        #print 'a shape is', np.shape(a)

        weighted_question_list = []
        for i in range(self.batch_size):
            attention_vector = tf.matmul(tf.reshape(a[i], [1,-1]), self.encoded_questions[i])
            weighted_question_list.append(attention_vector)
        weighted_questions = tf.stack(weighted_question_list)
        weighted_questions = tf.reshape(weighted_questions, [32, -1])
        #print'weighted questions', np.shape(weighted_questions)

        z = tf.concat([inputs, weighted_questions], 1)
        lstm_cell = tf.nn.rnn_cell.LSTMCell(self.state_size)
        output, new_state = lstm_cell.__call__(z, state)

    return output, new_state

Upvotes: 0

Views: 1187

Answers (1)

DanielT
DanielT

Reputation: 11

I'm also trying to reimplement Match_LSTM for Squad for experiment. I use MurtyShikhar's as reference. It works! However, he had to customize AttentionWrapper and use existed BasicLSTM cell.

I also try to create a Match_LSTM_cell by putting z and state as (inputs,state) pair in Basic_LSTM:

    def __call__(self, inputs,state):
        #c is not a output. c somehow is a "memory keeper".
        #Necessary to update and pass new_c through LSTM
        c,h=state

        #...Calculate your z
        #...inputs will be each tokens in context(passage) respectively
        #...Calculate alpha_Q
        z=tf.concat([inputs,alpha_Q],axis=1)

        ########This part is reimplement of Basic_LSTM
        with vs.variable_scope("LSTM_core"):
            sigmoid=math_ops.sigmoid
            concat=_linear([z,h],dimension*4,bias=True)
            i,j,f,o=array_ops.split(concat,num_or_size_splits=4,axis=1)
            new_c=(c*sigmoid(f+self._forget_bias)+sigmoid(i)*self._activation(j))

            new_h = self._activation(new_c) * sigmoid(o)
            new_state=(new_c,new_h)
        return new_h,new_state 

Upvotes: 1

Related Questions