Reputation: 2478
I am trying to do MNIST classification using TensorFlow 2.0.
The architecture of my neural network is as follows:
The input layer has 784 neurons (28 * 28)
The hidden layer has 512 neurons
The output layer has 10 neurons
The hidden layer uses ReLU activation function and output layer has 10 neurons.
and my code to do so is as follows:
# Load and prepare the MNIST dataset-
mnist = tf.keras.datasets.mnist
# type(mnist)
# module
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# type(X_train), type(y_train), type(X_test), type(y_test)
# (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)
# Normalize and convert samples from integers to floating-point numbers-
X_train, X_test = X_train / 255.0, X_test / 255.0
X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)
y_train = tf.cast(y_train, dtype=tf.float32)
y_test = tf.cast(y_test, dtype=tf.float32)
print("\nShapes of training and testing sets are:")
print("X_train.shape = {0}, y_train.shape = {1}, X_test.shape = {2} & y_test.shape = {3}\n".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
# Shapes of training and testing sets are:
# X_train.shape = (60000, 28, 28), y_train.shape = (60000,), X_test.shape = (10000, 28, 28) & y_test.shape = (10000,)
# Reshape training and testing sets-
X_train = tf.reshape(X_train, shape=(X_train.shape[0], 784))
X_test = tf.reshape(X_test, shape=(X_test.shape[0], 784))
print("\nDimensions of training and testing sets AFTER reshaping are:")
print("X_train.shape = {0} and X_test.shape = {1}\n".format(X_train.shape, X_test.shape))
# Dimensions of training and testing sets AFTER reshaping are:
# X_train.shape = (60000, 784) and X_test.shape = (10000, 784)
def relu(x):
'''
Function to calculate ReLU for
given 'x'
'''
# return np.maximum(x, 0)
return tf.cast(tf.math.maximum(x, 0), dtype = tf.float32)
def relu_derivative(x):
'''
Function to calculate derivative
of ReLU
'''
# return np.where(x <= 0, 0, 1)
# return tf.where(x <=0, 0, 1)
return tf.cast(tf.where(x <=0, 0, 1), dtype=tf.float32)
def softmax_stable(z):
'''
Function to compute softmax activation function.
Numerically stable
'''
# First cast 'z' to floating type-
z = tf.cast(z, dtype = tf.float32)
# Get largest element in 'z'-
largest = tf.math.reduce_max(z)
# Raise each value to exp('z - largest')-
z_exp = tf.math.exp(z - largest)
# Compute softmax activation values-
s = z_exp / tf.math.reduce_sum(z_exp)
return s
def initialize_parameters():
W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))
return {'W1': W1, 'W2': W2,
'b1': b1, 'b2': b2}
def forward_propagation(parameters, X, Y):
W1 = parameters['W1']
W2 = parameters['W2']
b1 = parameters['b1']
b2 = parameters['b2']
Z1 = tf.matmul(X_train, W1) + b1 # (6000, 512)
A1 = relu(Z1) # (6000, 512)
Z2 = tf.matmul(A1, W2) + b2 # (6000, 10)
# A2 = softmax(Z2) # (6000, 10)
# OR-
A2 = tf.nn.softmax(Z2) # (6000, 10)
return A2
def cost(parameters, X, Y):
y_pred_temp = forward_propagation(parameters, X, Y)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
return loss_fn(y_true = Y, y_pred = y_pred_temp)
def train_model(parameters, X, Y, learning_rate):
W1 = parameters['W1']
W2 = parameters['W2']
b1 = parameters['b1']
b2 = parameters['b2']
with tf.GradientTape(persistent = True) as t:
current_loss = cost(parameters, X_train, y_train)
dW2, dW1, db2, db1 = t.gradient(current_loss, [W2, W1, b2, b1])
W2 = W2 - (learning_rate * dW2)
W1 = W1 - (learning_rate * dW1)
b2 = b2 - (learning_rate * db2)
b1 = b1 - (learning_rate * db1)
updated_params = {'W1': W1, 'W2': W2,
'b1': b1, 'b2': b2}
return updated_params
params = initialize_parameters()
updated_params, cost_val = train_model(params, X_train, y_train, 0.01)
Now, if I want to use "train_model()" in a loop where I update it's values as follows:
for epoch in range(100):
updated_params, cost_val = train_model(updated_params, X_train, y_train, 0.01)
Subsequent calls to "train_model()" returns "dW2", "dW1", "db2", "db1" as "NoneType"
What's going wrong?
Thanks!
Upvotes: 1
Views: 515
Reputation: 11333
The problem is with your initialize_parameters()
function. You are not creating tf.Variables
but tf.Tensors
. You need your parameters to be tf.Variables
if you need to take derivative w.r.t. them.
def initialize_parameters():
W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))
return {'W1': W1, 'W2': W2,
'b1': b1, 'b2': b2}
Upvotes: 3