Abdullahi Mohammad
Abdullahi Mohammad

Reputation: 41

how to perform a multi-task deep neural network training

I was trying to build a multi-task deep neural network with a profile function for complexity tuning. My goal is to train the network with this function and compute three different losses with their corresponding training accuracies based on the percentage of channel profiles for three different ranges, (0% - 20%), (20% - 40%) and (40% - 100%) as shown in my code below. Please is what I'm doing correct or is there a suggestion of how best I can do it?

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import math
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Network Parameters
n_1 = 100               # 1st layer number of neurons
n_2 = 100               # 2nd layer number of neurons
n_input = 784           #MNIST data input (img shape: 28*28)
n_classes = 10          # MNIST total classes (0-9 digits)
learning_rate = 0.0008
training_epochs = 20
batch_size = 30
display_step = 1


np.random.seed(1)
# tf Graph input
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])

# Store layers weight & bias
def initialize_param(n_input, n_1, n_2, n_class):
    tf.set_random_seed(1)
    W1 = tf.get_variable("W1", shape = [n_input, n_1], 
        initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", shape = [n_1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", shape = [n_1, n_2], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", shape = [n_2], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", shape = [n_2, n_class], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b3 = tf.get_variable("b3", shape = [n_class], initializer = tf.zeros_initializer())

    parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2,"W3": W3,"b3": b3}
    return parameters
parameters = initialize_param(784, 100, 100, 10)

def linear_func(n):
    return[np.float32(1.0 - 1.0 * i/n) for i in range(1, n + 1)]
L = linear_func(100)

def linear_profile(lp, n_1):
    p_L = tf.constant(L, shape = [1, 100])
    L_11 = tf.constant(1.0, shape = [1, int(np.round((lp) * n_1))])
    L_12 = tf.zeros(shape = [1, int(np.round((1 - lp) * n_1))])
    L1 = tf.concat((L_11, L_12), axis = 1)
    p_L1 = tf.multiply(L1, p_L)
    return p_L1

# Creating Multiple Profile
pc1 = np.linspace(0, 0.2, 100)
pc2 = np.linspace(0.2, 0.4, 100)
pc3 = np.linspace(0.4, 1.0, 100)
prof = {"p1": pc1, "p2": pc2, "p3":pc3}

profile_1 = []
profile_2 = []
profile_3 = []

for i, v in enumerate(prof):   
    if i == 0:
        pc1 = prof[v]
        for j in pc1:
            p_L1 = linear_profile(j, 100)
            profile = tf.stack(p_L1, axis = 0) 
            profile_1.append(profile)
        profile1 = tf.convert_to_tensor(profile_1, dtype=tf.float32) 

    elif i == 1:
        pc2 = prof[v]
        for j in pc2:
            p_L1 = linear_profile(j, 100)
            profile = tf.stack(p_L1, axis = 0) 
            profile_2.append(profile)
        profile2 = tf.convert_to_tensor(profile_2, dtype=tf.float32)

    elif i == 2:
        pc3 = prof[v]
        for j in pc3:
            p_L1 = linear_profile(j, 100)
            profile = tf.stack(p_L1, axis = 0) 
            profile_3.append(profile)
        profile3 = tf.convert_to_tensor(profile_3, dtype=tf.float32)

def mlp_1(x, profile_type):
    for j in range(len(pc1)):
        Z_ML11 = tf.add(tf.matmul(x, parameters['W1']), parameters['b1'])  
        A_ML11 = tf.nn.relu(Z_ML11)
        P_ML11 = tf.multiply(profile_type[j], A_ML11)
        Z_ML12 = tf.add(tf.matmul(P_ML11, parameters['W2']), parameters['b2'])  
        A_ML12 = tf.nn.relu(Z_ML12)
        P_ML12 = tf.multiply(profile_type[j], A_ML12)
        out_layer = tf.add(tf.matmul(P_ML12, parameters['W3']), parameters['b3'])
        return out_layer

logits_1 = mlp_1(X, profile1)
logits_2 = mlp_1(X, profile2)
logits_3 = mlp_1(X, profile3)

# Define loss and optimizer
loss_op_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_1, labels = Y))
loss_op_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_2, labels = Y))
loss_op_3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits_3, labels = Y))
optimizer_1 = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.98).minimize(loss_op_1)
optimizer_2 = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.98).minimize(loss_op_2)
optimizer_3 = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum = 0.98).minimize(loss_op_3)

# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    # Training Loop
    cost_1 = []
    cost_2 = []
    cost_3 = []
    for epoch in range(training_epochs):
        avg_cost1 = 0.
        avg_cost2 = 0.
        avg_cost3 = 0.
        total_batch = int(mnist.train.num_examples/batch_size)

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)

            # Run optimization op (backprop) and cost op (to get loss value)
            _, c_1 = sess.run([loss_op_1, optimizer_1], feed_dict = {X: batch_x, Y: batch_y})
            _, c_2 = sess.run([loss_op_2, optimizer_2], feed_dict = {X: batch_x, Y: batch_y})
            _, c_3 = sess.run([loss_op_3, optimizer_3], feed_dict = {X: batch_x, Y: batch_y})

            # Compute average losses
            avg_cost1 += c_1 / total_batch
            avg_cost2 += c_2 / total_batch
            avg_cost3 += c_3 / total_batch
            cost_1.append(avg_cost1)
            cost_2.append(avg_cost2)
            cost_3.append(avg_cost3)

            logits_list = [logits_1, logits_2, logits_3]
            train_accuracy = []
            for r in logits_list:
                if i % 5000 == 0:
                    pred = tf.nn.softmax(logits_list[r])  # Apply softmax to logits
                    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
                    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                    trian_accuracy_1 = accuracy.eval({X: mnist.train.images, Y: mnist.train.labels})
                    train.append(trian_accuracy_1)

        # Display logs per epoch step
        avg_cost = [avg_cost1, avg_cost1, avg_cost1]
        for l in avg_cost:
            if epoch % display_step == 0:
                print("Epoch:", '%03d' % (epoch + 1), "cost = {:.9f}".format(avg_cost[l]))
    sess.close()

When the code is run, I get the following error:

ValueError: TypeError                                 Traceback (most recent call last)
<ipython-input-19-411b2efd4af7> in <module>()
    134 
    135             # Compute average losses
--> 136             avg_cost1 += c_1 / total_batch
    137             avg_cost2 += c_2 / total_batch
    138             avg_cost3 += c_3 / total_batch

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

.

Upvotes: 2

Views: 179

Answers (2)

johnashu
johnashu

Reputation: 2211

In addition to fixing the actual error you may want to add some error handling to catch these errors and deal with the possibility of None values occuring in your code using a try/except block..

try:
    avg_cost1 += c_1 / total_batch
    avg_cost2 += c_2 / total_batch
    avg_cost3 += c_3 / total_batch
    cost_1.append(avg_cost1)
    cost_2.append(avg_cost2)
    cost_3.append(avg_cost3)
except TypeError:
    print('There is nothing here!!')
    #do something else with the None value...

print('continuing script...')

Upvotes: 0

Peter Szoldan
Peter Szoldan

Reputation: 4868

c_1 is NoneType, therefore it cannot do the calculation. The problem is with this line (can't see the line number):

_, c_1 = sess.run([loss_op_1, optimizer_1], feed_dict = {X: batch_x, Y: batch_y})

because in order to get the loss value back into c_1, you shouldn't ask for the return value from the optimizer but the loss itself loss_op_1. Therefore the line should be:

c_1, _ = sess.run([loss_op_1, optimizer_1], feed_dict = {X: batch_x, Y: batch_y})

This will be the same for c_2 and c_3 analogously.

This is only to fix the listed TypeError, I haven't reviewed your code for any other issues.

Upvotes: 1

Related Questions