TensorFlow 2.0 GradientTape NoneType error

Question

I am trying to do MNIST classification using TensorFlow 2.0.

The architecture of my neural network is as follows:

The input layer has 784 neurons (28 * 28)

The hidden layer has 512 neurons

The output layer has 10 neurons

The hidden layer uses ReLU activation function and output layer has 10 neurons.

and my code to do so is as follows:

# Load and prepare the MNIST dataset-
mnist = tf.keras.datasets.mnist

# type(mnist)
# module

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# type(X_train), type(y_train), type(X_test), type(y_test)
# (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)


# Normalize and convert samples from integers to floating-point numbers-
X_train, X_test = X_train / 255.0, X_test / 255.0

X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)
y_train = tf.cast(y_train, dtype=tf.float32)
y_test = tf.cast(y_test, dtype=tf.float32)

print("
Shapes of training and testing sets are:")
print("X_train.shape = {0}, y_train.shape = {1}, X_test.shape = {2} & y_test.shape = {3}
".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
# Shapes of training and testing sets are:
# X_train.shape = (60000, 28, 28), y_train.shape = (60000,), X_test.shape = (10000, 28, 28) & y_test.shape = (10000,)


# Reshape training and testing sets-
X_train = tf.reshape(X_train, shape=(X_train.shape[0], 784))
X_test = tf.reshape(X_test, shape=(X_test.shape[0], 784))

print("
Dimensions of training and testing sets AFTER reshaping are:")
print("X_train.shape = {0} and X_test.shape = {1}
".format(X_train.shape, X_test.shape))
# Dimensions of training and testing sets AFTER reshaping are:
# X_train.shape = (60000, 784) and X_test.shape = (10000, 784)


def relu(x):
    '''
    Function to calculate ReLU for
    given 'x'
    '''
    # return np.maximum(x, 0)
    return tf.cast(tf.math.maximum(x, 0), dtype = tf.float32)


def relu_derivative(x):
    '''
    Function to calculate derivative
    of ReLU
    '''
    # return np.where(x <= 0, 0, 1)
    # return tf.where(x <=0, 0, 1)
    return tf.cast(tf.where(x <=0, 0, 1), dtype=tf.float32)


def softmax_stable(z):
    '''
    Function to compute softmax activation function.
    Numerically stable
    '''
    # First cast 'z' to floating type-
    z = tf.cast(z, dtype = tf.float32)

    # Get largest element in 'z'-
    largest = tf.math.reduce_max(z)

    # Raise each value to exp('z - largest')-
    z_exp = tf.math.exp(z - largest)

    # Compute softmax activation values-
    s = z_exp / tf.math.reduce_sum(z_exp)

    return s


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}


def forward_propagation(parameters, X, Y):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    Z1 = tf.matmul(X_train, W1) + b1    # (6000, 512)
    A1 = relu(Z1)                       # (6000, 512)

    Z2 = tf.matmul(A1, W2) + b2         # (6000, 10)
    # A2 = softmax(Z2)                  # (6000, 10)
    # OR-
    A2 = tf.nn.softmax(Z2)          # (6000, 10)

    return A2


def cost(parameters, X, Y):
    y_pred_temp = forward_propagation(parameters, X, Y)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    return loss_fn(y_true = Y, y_pred = y_pred_temp)



def train_model(parameters, X, Y, learning_rate):

    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    with tf.GradientTape(persistent = True) as t:
        current_loss = cost(parameters, X_train, y_train)

    dW2, dW1, db2, db1 = t.gradient(current_loss, [W2, W1, b2, b1])

    W2 = W2 - (learning_rate * dW2)
    W1 = W1 - (learning_rate * dW1)
    b2 = b2 - (learning_rate * db2)
    b1 = b1 - (learning_rate * db1)

    updated_params = {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

    return updated_params

params = initialize_parameters()

updated_params, cost_val = train_model(params, X_train, y_train, 0.01)

Now, if I want to use "train_model()" in a loop where I update it's values as follows:

for epoch in range(100):
    updated_params, cost_val = train_model(updated_params, X_train, y_train, 0.01)

Subsequent calls to "train_model()" returns "dW2", "dW1", "db2", "db1" as "NoneType"

What's going wrong?

Thanks!

thushv89 · Accepted Answer

The problem is with your initialize_parameters() function. You are not creating tf.Variables but tf.Tensors. You need your parameters to be tf.Variables if you need to take derivative w.r.t. them.


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

TensorFlow 2.0 GradientTape NoneType error

Answers (1)

Related Questions