Reputation: 591
i am implementing capsule neural network with data-frame of [200* 4098]. nut after 1 epoch i am getting accuracy and loss NAN
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix,roc_curve,roc_auc_score,f1_score
import matplotlib.pyplot as plt
import pandas as pd
def squash(s, axis=-1, epsilon=1e-7, name=None):
with tf.name_scope(name, default_name="squash"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
keep_dims=True)
safe_norm = tf.sqrt(squared_norm + epsilon)
squash_factor = squared_norm / (1. + squared_norm)
unit_vector = s / safe_norm
return squash_factor * unit_vector
def condition(input, counter):
return tf.less(counter, 100)
def loop_body(input, counter):
output = tf.add(input, tf.square(counter))
return output, tf.add(counter, 1)
def safe_norm(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
with tf.name_scope(name, default_name="safe_norm"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
keep_dims=keep_dims)
return tf.sqrt(squared_norm + epsilon)
df=pd.read_csv('/content/gdrive/My Drive/subject3.csv')
tf.reset_default_graph()
np.random.seed(42)
tf.set_random_seed(42)
X = tf.placeholder(shape=[None, 4097, 1], dtype=tf.float32, name="X")
conv1_params = {
"filters": 32,
"kernel_size": 4,
"strides": 2,
"padding": "valid",
"activation": tf.nn.relu,
}
conv2_params = {
"filters": 32,#caps1_n_maps * caps1_n_dims, # 256 convolutional filters
"kernel_size": 4,
"strides": 3,
"padding": "valid",
"activation": tf.nn.relu
}
conv1 = tf.layers.conv1d(X, name="conv1", **conv1_params)
conv2 = tf.layers.conv1d(conv1, name="conv2", **conv2_params)
print("conv layer shape ",conv1.shape, conv2.shape)
caps1_n_maps = 16
caps1_n_caps = caps1_n_maps * 8 # 1152 primary capsules
caps1_n_dims = 4
caps1_raw = tf.reshape(conv2,[-1, caps1_n_caps, caps1_n_dims],
name="caps1_raw")
print(caps1_raw.shape)
print(caps1_raw.get_shape().as_list()[0])
caps1_output = squash(caps1_raw, name="caps1_output")
print("output of Caps1 ",caps1_output)
caps2_n_caps = 2
caps2_n_dims = 16
init_sigma = 0.1
W_init = tf.random_normal(
shape=(1, caps1_n_caps, caps2_n_caps, caps2_n_dims, caps1_n_dims),
stddev=init_sigma, dtype=tf.float32, name="W_init")
W = tf.Variable(W_init, name="W")
batch_size = tf.shape(X)[0]
W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1], name="W_tiled")
caps1_output_expanded = tf.expand_dims(caps1_output, -1,
name="caps1_output_expanded")
caps1_output_tile = tf.expand_dims(caps1_output_expanded, 2,
name="caps1_output_tile")
caps1_output_tiled = tf.tile(caps1_output_tile, [1, 1, caps2_n_caps, 1, 1],
name="caps1_output_tiled")
caps2_predicted = tf.matmul(W_tiled, caps1_output_tiled,
name="caps2_predicted")
print(W_tiled)
print(caps1_output_tiled)
print(caps2_predicted)
raw_weights = tf.zeros([batch_size, caps1_n_caps, caps2_n_caps, 1, 1],
dtype=np.float32, name="raw_weights")
routing_weights = tf.nn.softmax(raw_weights, dim=2, name="routing_weights")
weighted_predictions = tf.multiply(routing_weights, caps2_predicted,
name="weighted_predictions")
weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True,
name="weighted_sum")
caps2_output_round_1 = squash(weighted_sum, axis=-2,
name="caps2_output_round_1")
caps2_output_round_1_tiled = tf.tile(
caps2_output_round_1, [1, caps1_n_caps, 1, 1, 1],
name="caps2_output_round_1_tiled")
agreement = tf.matmul(caps2_predicted, caps2_output_round_1_tiled,
transpose_a=True, name="agreement")
raw_weights_round_2 = tf.add(raw_weights, agreement,
name="raw_weights_round_2")
routing_weights_round_2 = tf.nn.softmax(raw_weights_round_2,
dim=2,
name="routing_weights_round_2")
weighted_predictions_round_2 = tf.multiply(routing_weights_round_2,
caps2_predicted,
name="weighted_predictions_round_2")
weighted_sum_round_2 = tf.reduce_sum(weighted_predictions_round_2,
axis=1, keep_dims=True,
name="weighted_sum_round_2")
caps2_output_round_2 = squash(weighted_sum_round_2,
axis=-2,
name="caps2_output_round_2")
caps2_output = caps2_output_round_2
print(caps2_output)
y_proba = safe_norm(caps2_output, axis=-2, name="y_proba")
y_proba_argmax = tf.argmax(y_proba, axis=2, name="y_proba")
y_pred = tf.squeeze(y_proba_argmax, axis=[1,2], name="y_pred")
y_pred
y = tf.placeholder(shape=[None], dtype=tf.int64, name="y")
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5
T = tf.one_hot(y, depth=caps2_n_caps, name="T")
with tf.Session():
print(T.eval(feed_dict={y: np.array([0, 1])}))
caps2_output_norm = safe_norm(caps2_output, axis=-2, keep_dims=True,
name="caps2_output_norm")
present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm),
name="present_error_raw")
present_error = tf.reshape(present_error_raw, shape=(-1, 2),
name="present_error")
absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus),
name="absent_error_raw")
absent_error = tf.reshape(absent_error_raw, shape=(-1, 2),
name="absent_error")
L = tf.add(T * present_error, lambda_ * (1.0 - T) * absent_error,
name="L")
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")
mask_with_labels = tf.placeholder_with_default(False, shape=(),
name="mask_with_labels")
reconstruction_targets = tf.cond(mask_with_labels, # condition
lambda: y, # if True
lambda: y_pred, # if False
name="reconstruction_targets")
reconstruction_mask = tf.one_hot(reconstruction_targets,
depth=caps2_n_caps,
name="reconstruction_mask")
reconstruction_mask_reshaped = tf.reshape(
reconstruction_mask, [-1, 1, caps2_n_caps, 1, 1],
name="reconstruction_mask_reshaped")
caps2_output_masked = tf.multiply(
caps2_output, reconstruction_mask_reshaped,
name="caps2_output_masked")
decoder_input = tf.reshape(caps2_output_masked,
[-1, caps2_n_caps * caps2_n_dims],
name="decoder_input")
n_hidden1 = 512
n_hidden2 = 1024
n_output = 4097 * 1
with tf.name_scope("decoder"):
hidden1 = tf.layers.dense(decoder_input, n_hidden1,
activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2,
activation=tf.nn.relu,
name="hidden2")
decoder_output = tf.layers.dense(hidden2, n_output,
activation=tf.nn.sigmoid,
name="decoder_output")
X_flat = tf.reshape(X, [-1, n_output], name="X_flat")
squared_difference = tf.square(X_flat - decoder_output,
name="squared_difference")
reconstruction_loss = tf.reduce_mean(squared_difference,
name="reconstruction_loss")
alpha = 0.0005
loss = tf.add(margin_loss, alpha * reconstruction_loss, name="loss")
print(loss)
correct = tf.equal(y, y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss, name="training_op")
init = tf.global_variables_initializer()
saver = tf.train.Saver()
y_res=df[df.columns[-1]].values
x=df[df.columns[0:-1]].values
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y_res,test_size=0.3,random_state=42)
len(x_train),len(y_train),len(x_test),len(y_train)
restore_checkpoint = True
n_iterations_per_epoch = len(x_train)// batch_size
n_iterations_validation = len(x_test) // batch_size
best_loss_val = np.infty
start1=0
start2=0
start=0
batch_size = 1
n_epochs = 10
checkpoint_path = "./my_capsule_network"
with tf.Session() as sess:
if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
saver.restore(sess, checkpoint_path)
else:
init.run()
for epoch in range(n_epochs):
for iteration in range(1, n_iterations_per_epoch + 1):
X_batch=x_train[start1:start1+batch_size]
y_batch=y_train[start1:start1+batch_size]
start1+=batch_size
#Run the training operation and measure the loss:
_,loss_train = sess.run(
[training_op, loss],
feed_dict={X: X_batch.reshape([-1, 4097,1]),
y: y_batch,
mask_with_labels: True})
print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f}".format(
iteration, n_iterations_per_epoch,
iteration * 100 / n_iterations_per_epoch,
loss_train),
end="")
# At the end of each epoch,
# measure the validation loss and accuracy:
loss_vals = []
acc_vals = []
for iteration in range(1, n_iterations_validation + 1):
X_batch=x_test[start2:start2+batch_size]
y_batch=y_test[start2:start2+batch_size]
start2+=batch_size
loss_val, acc_val = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 4097,1]),
y: y_batch})
loss_vals.append(loss_val)
acc_vals.append(acc_val)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_validation,
iteration * 100 / n_iterations_validation),
end=" " * 10)
loss_val = np.mean(loss_vals)
start+=batch_size
acc_val = np.mean(acc_vals)
print("\rEpoch: {} Val accuracy: {:.4f}% Loss: {:.6f}{}".format(
epoch + 1, acc_val * 100, loss_val,
" (improved)" if loss_val < best_loss_val else ""))
# And save the model if it improved:
if loss_val < best_loss_val:
save_path = saver.save(sess, checkpoint_path)
best_loss_val = loss_val
and i am getting the output AS:
Epoch: 1 Val accuracy: 53.3333% Loss: 0.319774 (improved)
Epoch: 2 Val accuracy: nan% Loss: nan
Epoch: 3 Val accuracy: nan% Loss: nan
Epoch: 4 Val accuracy: nan% Loss: nan
Epoch: 5 Val accuracy: nan% Loss: nan
Epoch: 6 Val accuracy: nan% Loss: nan
Epoch: 7 Val accuracy: nan% Loss: nan
Epoch: 8 Val accuracy: nan% Loss: nan
Epoch: 9 Val accuracy: nan% Loss: nan
Epoch: 10 Val accuracy: nan% Loss: nan
i opted batch size to be 1 because even after trying some batch sizes , i didnt get the result
Upvotes: 0
Views: 484
Reputation: 1114
There is no problem with normalization or dataset.
You should just reset start1
and start2
variables to zero at the beginning of each epochs:
...
for epoch in range(n_epochs):
start1 = 0
start2 = 0
for iteration in range(1, n_iterations_per_epoch + 1):
...
Upvotes: 1