機械学習素人
機械学習素人

Reputation: 3

loss value don't change(neural network)

I implemented neural network model with tensorflow(version 2.0) on Python3

I don't know the code works properly because loss value don't almost change.

The code is wrong or The model is too many parameter(this mean that the code is right)?

Please tell me whether the code works properly.

The following is the code.

import tensorflow as tf
import numpy as np

fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()



class Model(object):
 def __init__(self):
        self.var_list   = []
        self.w_layer1 = tf.Variable(tf.random.normal(shape=[28*28, 1000], stddev=0.3,dtype=tf.float64))
        self.b_layer1 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
        self.w_layer2 = tf.Variable(tf.random.normal(shape=[1000, 100], stddev=0.3,dtype=tf.float64))
        self.b_layer2 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
        self.w_layer3 = tf.Variable(tf.random.normal(shape=[100, 100], stddev=0.3,dtype=tf.float64))
        self.b_layer3 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))
        self.w_layer4 = tf.Variable(tf.random.normal(shape=[100, 10], stddev=0.3,dtype=tf.float64))
        self.b_layer4 = tf.Variable(tf.random.normal(shape=[1,], stddev=0.1,dtype=tf.float64))

        self.var_list.append(self.w_layer1)
        self.var_list.append(self.b_layer1)
        self.var_list.append(self.w_layer2)
        self.var_list.append(self.b_layer2)
        self.var_list.append(self.w_layer3)
        self.var_list.append(self.b_layer3)
        self.var_list.append(self.w_layer4)
        self.var_list.append(self.b_layer4)


 def __call__(self, x):
        return self.w*x+self.b

 def dense_layer(self, inputs, w, b):
        z = tf.matmul(inputs, w) + b
        return tf.nn.relu(z)

 def output_layer(self, inputs, w, b):
        return tf.matmul(inputs, w) + b

 def flattend(self, inputs):
        inputs = tf.cast(inputs, tf.float64)
        return tf.reshape(inputs, [-1, 28*28])

 def loss(self, outputs, targets):
        predicted_y = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = outputs, labels = targets))
        return predicted_y

 def grad(self, x, target_y):
        with tf.GradientTape() as tape:
            tape.watch(self.var_list)
            loss_value = self.loss(self.run(x), target_y)
            return tape.gradient(loss_value, self.var_list)

 def run(self, inputs):
        inputs  = self.flattend(inputs)
        layer1  = self.dense_layer(inputs, self.w_layer1, self.b_layer1)
        layer2  = self.dense_layer(layer1, self.w_layer2, self.b_layer2)
        layer3  = self.dense_layer(layer2, self.w_layer3, self.b_layer3)
        layer4  = self.output_layer(layer3, self.w_layer4, self.b_layer4)

        return layer4

 def optimizer(self):
    opt = tf.keras.optimizers.SGD(learning_rate=0.01)
    return opt

def make_onehot_labels(labels):
    depth = 10
    one_hot_labels = tf.one_hot(labels, depth)
    return one_hot_labels



fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images/255.0
test_images  = test_images/255.0
train_labels = make_onehot_labels(train_labels)
test_labels  = make_onehot_labels(test_labels)

ds_train_x = tf.data.Dataset.from_tensor_slices(train_images)
ds_train_y = tf.data.Dataset.from_tensor_slices(train_labels)
train_dataset = tf.data.Dataset.zip((ds_train_x, ds_train_y)).shuffle(1000).repeat().batch(300)


train_images = tf.convert_to_tensor(train_images)
train_labels   = tf.convert_to_tensor(train_labels)


test_images = tf.convert_to_tensor(test_images)
test_labels   = tf.convert_to_tensor(test_labels)


count = 1
model = Model()
opt = model.optimizer()
print(model.loss(model.run(train_images), train_labels))
for epoch in range(10):

    for data in train_dataset:

        if count%200==0:

            print(model.loss(model.run(train_images), train_labels))
            #print(grads)
            break
        grads = model.grad(data[0], data[1])  
        opt.apply_gradients(zip(grads, model.var_list))
        count = count+1


the following is the result which the above code executed

tf.Tensor(184.81706096058622, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)
tf.Tensor(1.2104797483683287, shape=(), dtype=float64)

Upvotes: 0

Views: 45

Answers (1)

thushv89
thushv89

Reputation: 11333

The issue is in the following part

for epoch in range(10):

    for data in train_dataset:

        if count%200==0:

            print(model.loss(model.run(train_images), train_labels))
            #print(grads)
            break
        grads = model.grad(data[0], data[1])  
        opt.apply_gradients(zip(grads, model.var_list))
        count = count+1

You have a break within the if condition, meaning you break your training loop (and restart a new epoch) when you hit count%200==0. Remove the break and you'll see the error rate going down.

To elaborate on the issue, as soon as you reach count==200 you break the loop, and the counter does not increase anymore so you're basically not reaching anything beyond that if condition after 200 iterations ( this anything beyond includes your gradient application).

Upvotes: 1

Related Questions