huda
huda

Reputation: 51

Tensorflow reuse variables in different name scope

I've got the problem of reuse variable in different name scope. The code below separate source embedding and target embedding in two different spaces, What I want to do is to put source and target in the same space, reusing the variables in lookup table.

''' Applying bidirectional encoding for source-side inputs and first-word decoding.
'''
def decode_first_word(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
    with tf.name_scope('Word_Embedding_Layer'):
        with tf.variable_scope('Source_Side'):
            source_embedding_tensor = self._src_lookup_table(source_vocab_id_tensor)
    with tf.name_scope('Encoding_Layer'):
        source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
            source_embedding_tensor, source_mask_tensor)
    with tf.name_scope('Decoding_Layer_First'):
        rvals = self.decode_next_word(source_concated_hidden_tensor, source_mask_tensor, \
            None, None, None, scope, reuse)
    return rvals + [source_concated_hidden_tensor]


''' Applying one-step decoding.
'''
def decode_next_word(self, enc_concat_hidden, src_mask, cur_dec_hidden, \
                            cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
                            src_side_pre_act=None):
    with tf.name_scope('Word_Embedding_Layer'):
        with tf.variable_scope('Target_Side'):
            cur_trg_wemb = None 
            if None == cur_trg_wid:
                pass
            else:
                cur_trg_wemb = self._trg_lookup_table(cur_trg_wid)

I want to make them as follows ,so there will only be one embedding node in the whole graph:

def decode_first_word_shared_embedding(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
    with tf.name_scope('Word_Embedding_Layer'):
        with tf.variable_scope('Bi_Side'):
            source_embedding_tensor = self._bi_lookup_table(source_vocab_id_tensor)
    with tf.name_scope('Encoding_Layer'):
        source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
            source_embedding_tensor, source_mask_tensor)
    with tf.name_scope('Decoding_Layer_First'):
        rvals = self.decode_next_word_shared_embedding(source_concated_hidden_tensor, source_mask_tensor, \
            None, None, None, scope, reuse)
    return rvals + [source_concated_hidden_tensor]

def decode_next_word_shared_embedding(self, enc_concat_hidden, src_mask, cur_dec_hidden, \
                            cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
                            src_side_pre_act=None):
    with tf.name_scope('Word_Embedding_Layer'):            
        cur_trg_wemb = None 
        if None == cur_trg_wid:
            pass
        else:
            with tf.variable_scope('Bi_Side'):
                cur_trg_wemb = self._bi_lookup_table(cur_trg_wid)

How to achieve this?

Upvotes: 2

Views: 616

Answers (2)

Xiong-Hui Chen
Xiong-Hui Chen

Reputation: 410

One of the solutions is to save the variable_scope instance and reuse it.


def decode_first_word_shared_embedding(self, source_vocab_id_tensor, source_mask_tensor, scope, reuse):
    with tf.name_scope('Word_Embedding_Layer'):
        with tf.variable_scope('Bi_Side'):
            source_embedding_tensor = self._bi_lookup_table(source_vocab_id_tensor)
            shared_variable_scope = tf.get_variable_scope()

    with tf.name_scope('Encoding_Layer'):
        source_concated_hidden_tensor = self._encoder.get_biencoded_tensor(\
            source_embedding_tensor, source_mask_tensor)
    with tf.name_scope('Decoding_Layer_First'):
        rvals = self.decode_next_word_shared_embedding(source_concated_hidden_tensor, source_mask_tensor, \
            None, None, None, scope, reuse)
    return rvals + [source_concated_hidden_tensor], 

def decode_next_word_shared_embedding(self, enc_concat_hidden, src_mask, cur_dec_hidden, shared_variable_scope, \
                            cur_trg_wid, trg_mask=None, scope=None, reuse=False, \
                            src_side_pre_act=None):
    with tf.variable_scope('Target_Side'):           
        cur_trg_wemb = None 
        if None == cur_trg_wid:
            pass
        else:
            with tf.variable_scope(shared_variable_scope, reuse=True):
                cur_trg_wemb = self._bi_lookup_table(cur_trg_wid)

And this is my demo code:

with tf.variable_scope('Word_Embedding_Layer'):
    with tf.variable_scope('Bi_Side'):
        v = tf.get_variable('bi_var', [1], dtype=tf.float32)
        reuse_scope = tf.get_variable_scope()
with tf.variable_scope('Target_side'):
    # some other codes.
    with tf.variable_scope(reuse_scope, reuse=True):
        w = tf.get_variable('bi_var', [1], dtype=tf.float32)
print(v.name)
print(w.name)
assert v==w

Output:
Word_Embedding_Layer/Bi_Side/bi_var:0
Word_Embedding_Layer/Bi_Side/bi_var:0

Upvotes: 1

huda
huda

Reputation: 51

I solved it by using a dictionary to save the the weight matrix of embedding. A hint from https://www.tensorflow.org/versions/r0.12/how_tos/variable_scope/

Upvotes: 3

Related Questions