Reputation: 31
I am trying 2 class classification of images with Tensorflow. Although it was able to be executed successfully in the RGB version, I tried to do it in Grayscale in order to improve the calculation speed. After changing the number of channels, I got an error saying.
ValueError: Can not feed value of shape (10, 3072) for Tensor 'Placeholder: 0', which has shape '(?, 1024)'
The codes operated when RGB is shown below
import sys
sys.path.append('/usr/local/opt/opencv3/lib/python3.5.4/site-packages')
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
import os
log_dir = '/tmp/tensorflow/mnist/logs/simple01'
if tf.gfile.Exists(log_dir):
tf.gfile.DeleteRecursively(log_dir)
tf.gfile.MakeDirs(log_dir)
NUM_CLASSES = 2
IMAGE_SIZE = 32
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('train', 'train2.txt', 'File name of train data')
flags.DEFINE_string('test', 'test2.txt', 'File name of train data')
flags.DEFINE_string('image_dir', 'data2', 'Directory of images')
flags.DEFINE_string('train_dir', '/tmp/data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 200, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size'
'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-5, 'Initial learning rate.')
def inference(images_placeholder, keep_prob):
"""
images_placeholder:imaging placeholder
keep_prob: dropout rate place_holder
y_conv:
"""
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x_image = tf.reshape(images_placeholder, [-1, 32, 32, 3])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu6(conv2d(x_image, W_conv1) + b_conv1)
tf.summary.histogram("wc1", W_conv1)
with tf.name_scope('pool1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu6(conv2d(h_pool1, W_conv2) + b_conv2)
tf.summary.histogram("wc2", W_conv2)
with tf.name_scope('pool2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([8*8*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 8*8*64])
h_fc1 = tf.nn.relu6(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
with tf.name_scope('softmax') as scope:
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
return y_conv
def loss(logits, labels):
"""
logits: tensor, float - [batch_size, NUM_CLASSES]
labels: tensor, int32 - [batch_size, NUM_CLASSES]
cross_entropy: tensor, float
"""
cross_entropy = -tf.reduce_sum(labels*tf.log(logits))
tf.summary.scalar("cross_entropy", cross_entropy)
return cross_entropy
def training(loss, learning_rate):
"""
loss: tensor, loss()
learning_rate:
train_step:
"""
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
def accuracy(logits, labels):
"""(accuracy)
logits: inference()
labels: tensor, int32 - [batch_size, NUM_CLASSES]
accuracy: (float)
"""
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.summary.scalar("accuracy", accuracy)
return accuracy
if __name__ == '__main__':
f = open(FLAGS.train, 'r')
train_image = []
train_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(FLAGS.image_dir + '/' + l[0])
img = cv2.resize(img, (32, 32))
train_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
f.close()
f = open(FLAGS.test, 'r')
test_image = []
test_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(FLAGS.image_dir + '/' + l[0])
img = cv2.resize(img, (32, 32))
test_image.append(img.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
test_label.append(tmp)
test_image = np.asarray(test_image)
test_label = np.asarray(test_label)
f.close()
with tf.Graph().as_default():
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
keep_prob = tf.placeholder("float")
logits = inference(images_placeholder, keep_prob)
loss_value = loss(logits, labels_placeholder)
train_op = training(loss_value, FLAGS.learning_rate)
acc = accuracy(logits, labels_placeholder)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
for step in range(FLAGS.max_steps):
for i in range(int(len(train_image)/FLAGS.batch_size)):
batch = FLAGS.batch_size*i
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch+FLAGS.batch_size],
labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
keep_prob: 0.5})
train_accuracy = sess.run(acc, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
print("step %d, training accuracy %g"%(step, train_accuracy))
summary_str = sess.run(summary_op, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
summary_writer.add_summary(summary_str, step)
print("test accuracy %g"%sess.run(acc, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0}))
save_path = saver.save(sess, os.getcwd() + "\\model.ckpt")
I changed line 20 to work with Grayscale
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*1
and changed line 62 to 70
x_image = tf.reshape(images_placeholder, [-1, 32, 32, 1])
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
tf.summary.histogram("wc1", W_conv1)
I got an error. The image used is also converted to Gray color information, but it seems that RGB is still in the form of (10, 3072) and 1024 × 3.
Please tell me if there is a solution. Please.
Upvotes: 0
Views: 249
Reputation: 4542
Your problem comes from the fact that even though you updated Tensorflow placeholders to have only one channel per pixel, you're still loading your images with OpenCV in RGB and trying to feed them directly to the network.
You should convert them to grayscale using cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
before using flatten
.
Upvotes: 1