Arun
Arun

Reputation: 2478

TensorFlow 2.0 GradientTape NoneType error

I am trying to do MNIST classification using TensorFlow 2.0.

The architecture of my neural network is as follows:

The input layer has 784 neurons (28 * 28)

The hidden layer has 512 neurons

The output layer has 10 neurons

The hidden layer uses ReLU activation function and output layer has 10 neurons.

and my code to do so is as follows:

# Load and prepare the MNIST dataset-
mnist = tf.keras.datasets.mnist

# type(mnist)
# module

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# type(X_train), type(y_train), type(X_test), type(y_test)
# (numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)


# Normalize and convert samples from integers to floating-point numbers-
X_train, X_test = X_train / 255.0, X_test / 255.0

X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)
y_train = tf.cast(y_train, dtype=tf.float32)
y_test = tf.cast(y_test, dtype=tf.float32)

print("\nShapes of training and testing sets are:")
print("X_train.shape = {0}, y_train.shape = {1}, X_test.shape = {2} & y_test.shape = {3}\n".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
# Shapes of training and testing sets are:
# X_train.shape = (60000, 28, 28), y_train.shape = (60000,), X_test.shape = (10000, 28, 28) & y_test.shape = (10000,)


# Reshape training and testing sets-
X_train = tf.reshape(X_train, shape=(X_train.shape[0], 784))
X_test = tf.reshape(X_test, shape=(X_test.shape[0], 784))

print("\nDimensions of training and testing sets AFTER reshaping are:")
print("X_train.shape = {0} and X_test.shape = {1}\n".format(X_train.shape, X_test.shape))
# Dimensions of training and testing sets AFTER reshaping are:
# X_train.shape = (60000, 784) and X_test.shape = (10000, 784)


def relu(x):
    '''
    Function to calculate ReLU for
    given 'x'
    '''
    # return np.maximum(x, 0)
    return tf.cast(tf.math.maximum(x, 0), dtype = tf.float32)


def relu_derivative(x):
    '''
    Function to calculate derivative
    of ReLU
    '''
    # return np.where(x <= 0, 0, 1)
    # return tf.where(x <=0, 0, 1)
    return tf.cast(tf.where(x <=0, 0, 1), dtype=tf.float32)


def softmax_stable(z):
    '''
    Function to compute softmax activation function.
    Numerically stable
    '''
    # First cast 'z' to floating type-
    z = tf.cast(z, dtype = tf.float32)

    # Get largest element in 'z'-
    largest = tf.math.reduce_max(z)

    # Raise each value to exp('z - largest')-
    z_exp = tf.math.exp(z - largest)

    # Compute softmax activation values-
    s = z_exp / tf.math.reduce_sum(z_exp)

    return s


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}


def forward_propagation(parameters, X, Y):
    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    Z1 = tf.matmul(X_train, W1) + b1    # (6000, 512)
    A1 = relu(Z1)                       # (6000, 512)

    Z2 = tf.matmul(A1, W2) + b2         # (6000, 10)
    # A2 = softmax(Z2)                  # (6000, 10)
    # OR-
    A2 = tf.nn.softmax(Z2)          # (6000, 10)

    return A2


def cost(parameters, X, Y):
    y_pred_temp = forward_propagation(parameters, X, Y)
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    return loss_fn(y_true = Y, y_pred = y_pred_temp)



def train_model(parameters, X, Y, learning_rate):

    W1 = parameters['W1']
    W2 = parameters['W2']
    b1 = parameters['b1']
    b2 = parameters['b2']

    with tf.GradientTape(persistent = True) as t:
        current_loss = cost(parameters, X_train, y_train)

    dW2, dW1, db2, db1 = t.gradient(current_loss, [W2, W1, b2, b1])

    W2 = W2 - (learning_rate * dW2)
    W1 = W1 - (learning_rate * dW1)
    b2 = b2 - (learning_rate * db2)
    b1 = b1 - (learning_rate * db1)

    updated_params = {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

    return updated_params

params = initialize_parameters()

updated_params, cost_val = train_model(params, X_train, y_train, 0.01)

Now, if I want to use "train_model()" in a loop where I update it's values as follows:

for epoch in range(100):
    updated_params, cost_val = train_model(updated_params, X_train, y_train, 0.01)

Subsequent calls to "train_model()" returns "dW2", "dW1", "db2", "db1" as "NoneType"

What's going wrong?

Thanks!

Upvotes: 1

Views: 515

Answers (1)

thushv89
thushv89

Reputation: 11333

The problem is with your initialize_parameters() function. You are not creating tf.Variables but tf.Tensors. You need your parameters to be tf.Variables if you need to take derivative w.r.t. them.


def initialize_parameters():
    W1 = tf.Variable(tf.random.uniform(shape=(784, 512), minval=0, maxval=1))
    b1 = tf.Variable(tf.random.uniform(shape = (1, 512), minval = 0, maxval=1))
    W2 = tf.Variable(tf.random.uniform(shape = (512, 10), minval=0, maxval=1))
    b2 = tf.Variable(tf.random.uniform(shape = (1, 10), minval=0, maxval=1))

    return {'W1': W1, 'W2': W2,
        'b1': b1, 'b2': b2}

Upvotes: 3

Related Questions