Reputation: 99
I'm new to tensorflow and trying to run a CNN on Twitter embedding matrices (each embedding matrix is 574x300 - word x embedding length) in batches of 100 tweets at a time. I keep getting the error ValueError: setting an array element with a sequence.
at the following line at the bottom: sess.run(training_op, feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
.
filter_size = 2
embedding_size = 300
length_embedding = 575
num_filters = 100
filter_shape = [filter_size, embedding_size, 1, num_filters]
batch_size = 100
n_epochs = 10
n_inputs = length_embedding*embedding_size
n_outputs = 2 #classify between 2 categories
num_train_examples = 2000
with tf.name_scope("inputs"):
input_tweets = tf.placeholder(tf.float32, shape = [batch_size, length_embedding], name="input_tweets")
input_tweets_reshaped = tf.expand_dims(input_tweets, -1)
tweet_labels = tf.placeholder(tf.int32, shape = [batch_size], name="tweet_labels")
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(input_tweets_reshaped, W,
strides = [1,1,1,1], padding="VALID", name="conv")
conv_bias = tf.nn.bias_add(conv, b)
#pooling
sequence_length=input_tweets_reshaped.shape[1]
with tf.name_scope("pool"):
pool = tf.nn.max_pool(conv, ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1,1,1,1],
padding="VALID",
name="pool")
pool_flat = tf.reshape(pool, shape=[-1, num_filters])
#fully-connected layer
with tf.name_scope("fc_layer"):
fc_layer = tf.layers.dense(pool_flat, num_filters, activation=tf.nn.relu, name="fc_layer")
#output
with tf.name_scope("output_layer"):
logits = tf.layers.dense(fc_layer, n_outputs, name="output_layer")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
#train
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tweet_labels)
loss=tf.reduce_mean(xentropy)
optimizer=tf.train.AdamOptimizer()
training_op=optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, tweet_labels, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
#--run model
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(num_train_examples // batch_size):
print("iteration: "+str(iteration))
x_batch = x_train[iteration*batch_size : (iteration+1)*batch_size]
y_batch = y_train[iteration*batch_size : (iteration+1)*batch_size]
sess.run(training_op, feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_train = accuracy.eval(feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_test = accuracy.eval(feed_dict={input_tweets: x_test, tweet_labels: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
x_batch is a numpy array of length 100, and each element is a matrix of dimension 575 x 300 (though when I call x_batch.shape, it returns (100, 575)). y_batch is a 1d numpy array of 1's and 0's; y_batch.shape returns (100,). I think the problem is maybe about the dimensions of the inputs - can anyone see clearly what the mismatch is?
Thank you!
Upvotes: 2
Views: 104
Reputation: 8585
conv2d
must have rank=4
, but you have rank=3
. embedding_size
, which determines the second dimension of your filter, must be less than or equal to the third dimension of your input tensor. You have third dimension equal to 1
- expanded dimension. Therefore, it cannot be greater than 1
!tf.layers.conv2d()
that will automatically create variables for convolution.tf.layers.conv1d()
It expects a tensor of rank=3
as input.I'm not sure what you want to achieve with your code, but here's the modified version that works:
import tensorflow as tf
import numpy as np
filter_size = 2
embedding_size = 300
length_embedding = 575
num_filters = 100
filter_shape = [filter_size, 1, 1, num_filters]
batch_size = 100
n_epochs = 10
n_inputs = length_embedding*embedding_size
n_outputs = 2 #classify between 2 categories
num_train_examples = 2000
with tf.name_scope("inputs"):
input_tweets = tf.placeholder(tf.float32, shape = [None, length_embedding], name="input_tweets")
input_tweets_reshaped = input_tweets[..., tf.newaxis, tf.newaxis]
tweet_labels = tf.placeholder(tf.int32, shape = [None], name="tweet_labels")
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(0.1*tf.ones([num_filters]), name="b")
conv = tf.nn.conv2d(input_tweets_reshaped,
W,
strides=[1,1,1,1],
padding="VALID",
name="conv")
conv_bias = tf.nn.bias_add(conv, b)
#pooling
sequence_length=input_tweets_reshaped.shape[1]
with tf.name_scope("pool"):
pool = tf.nn.max_pool(conv, ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1,1,1,1],
padding="VALID",
name="pool")
pool_flat = tf.reshape(pool, shape=[-1, num_filters])
#fully-connected layer
with tf.name_scope("fc_layer"):
fc_layer = tf.layers.dense(pool_flat, num_filters, activation=tf.nn.relu, name="fc_layer")
#output
with tf.name_scope("output_layer"):
logits = tf.layers.dense(fc_layer, n_outputs, name="output_layer")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
#train
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tweet_labels)
loss=tf.reduce_mean(xentropy)
optimizer=tf.train.AdamOptimizer()
training_op=optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, tweet_labels, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
x_train = np.random.normal(size=(10*batch_size, length_embedding, ))
y_train = np.random.randint(low=0, high=2, size=10*batch_size)
x_test = x_train
y_test = y_train
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(num_train_examples // batch_size):
print("iteration: "+str(iteration))
x_batch = x_train[iteration*batch_size : (iteration+1)*batch_size]
y_batch = y_train[iteration*batch_size : (iteration+1)*batch_size]
sess.run(training_op, feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_train = accuracy.eval(feed_dict={input_tweets: x_batch, tweet_labels: y_batch})
acc_test = accuracy.eval(feed_dict={input_tweets: x_test, tweet_labels: y_test})
print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)
Upvotes: 1