Reputation: 31
I'm trying to implement the MATCH LSTM from this paper: https://arxiv.org/pdf/1608.07905.pdf
I'm using Tensorflow. One part of the architecture is an RNN that uses the input and the previous state to compute an attention vector which it applies to a context before concatenating the result with the inputs and sending them into an LSTM. To build the first part of this RNN, I wrote a custom cell for Tensorflow to call. But I'm not sure how to send the results into an LSTM. Is it possible to call the basic LSTM cell within the custom cell I'm writing? I tried this a few different ways but kept getting the error "module' object has no attribute 'rnn_cell'" at the line where the LSTM cell is called. Any help would be much appreciated!
EDIT to add code:
import numpy as np import tensorflow as tf
class MatchLSTMCell(tf.contrib.rnn.RNNCell):
def __init__(self, state_size, question_tensor, encoded_questions, batch_size):
self._state_size = state_size
self.question_tensor = question_tensor
self.encoded_questions = encoded_questions
self.batch_size = batch_size
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._state_size
def __call__(self, inputs, state, scope=None):
scope = scope or type(self).__name__
with tf.variable_scope(scope):
W_p = tf.get_variable("W_p", dtype=tf.float64, shape=[self.state_size, self.state_size], initializer=tf.contrib.layers.xavier_initializer())
W_r = tf.get_variable("W_r", dtype=tf.float64, shape=[self.state_size, self.state_size], initializer=tf.contrib.layers.xavier_initializer())
b_p = tf.get_variable("b_p", dtype=tf.float64, shape=[self.state_size])
w = tf.get_variable("w", dtype=tf.float64, shape=[1,self.state_size])
b = tf.get_variable("b", dtype=tf.float64, shape=[])
#print 'question tensor', np.shape(self.question_tensor)
#print 'inputs', np.shape(inputs)
#print 'insides', np.shape(tf.matmul(inputs, W_p) + tf.matmul(state, W_r) + b_p)
G = tf.nn.tanh(
tf.transpose(tf.transpose(self.question_tensor, perm=[1,0,2]) +
(tf.matmul(inputs, W_p) + tf.matmul(state, W_r) + b_p), perm=[1,0,2])
)
#print 'big G', np.shape(G)
attention_list = []
for i in range(self.batch_size):
attention_matrix = tf.matmul(G[i,:,:], tf.transpose(w))
attention_list.append(attention_matrix)
attention_scores = tf.stack(attention_list)
a = tf.nn.softmax(attention_scores + b)
a = tf.reshape(a, [self.batch_size, -1])
#print 'a shape is', np.shape(a)
weighted_question_list = []
for i in range(self.batch_size):
attention_vector = tf.matmul(tf.reshape(a[i], [1,-1]), self.encoded_questions[i])
weighted_question_list.append(attention_vector)
weighted_questions = tf.stack(weighted_question_list)
weighted_questions = tf.reshape(weighted_questions, [32, -1])
#print'weighted questions', np.shape(weighted_questions)
z = tf.concat([inputs, weighted_questions], 1)
lstm_cell = tf.nn.rnn_cell.LSTMCell(self.state_size)
output, new_state = lstm_cell.__call__(z, state)
return output, new_state
Upvotes: 0
Views: 1187
Reputation: 11
I'm also trying to reimplement Match_LSTM for Squad for experiment. I use MurtyShikhar's as reference. It works! However, he had to customize AttentionWrapper and use existed BasicLSTM cell.
I also try to create a Match_LSTM_cell by putting z and state as (inputs,state) pair in Basic_LSTM:
def __call__(self, inputs,state):
#c is not a output. c somehow is a "memory keeper".
#Necessary to update and pass new_c through LSTM
c,h=state
#...Calculate your z
#...inputs will be each tokens in context(passage) respectively
#...Calculate alpha_Q
z=tf.concat([inputs,alpha_Q],axis=1)
########This part is reimplement of Basic_LSTM
with vs.variable_scope("LSTM_core"):
sigmoid=math_ops.sigmoid
concat=_linear([z,h],dimension*4,bias=True)
i,j,f,o=array_ops.split(concat,num_or_size_splits=4,axis=1)
new_c=(c*sigmoid(f+self._forget_bias)+sigmoid(i)*self._activation(j))
new_h = self._activation(new_c) * sigmoid(o)
new_state=(new_c,new_h)
return new_h,new_state
Upvotes: 1