how to create batches from TFrecord for training network in tensorflow?

Question

I have data saved to a tfrecord file. it has 1000 samples and 2 features (one is input the other output). the input is shape [1,20] and output [1,10]. they were both created from flattened numpy arrays. I am trying to create batches from them so i can use them to train my network but i am unable to figure out how.

this is my code for training the network

learning_rate = 0.01
epochs = 2
batch_size = 200 #total 5 batches
dataSize = 1000

dataset = rd.getData()

x = tf.placeholder(shape=(None,20), dtype=tf.float32)
y = tf.placeholder(shape=(None,10), dtype=tf.float32)

w1 = tf.Variable(tf.random_normal([20, 20], stddev=0.03))
w2 = tf.Variable(tf.random_normal([20, 20], stddev=0.03))
w3 = tf.Variable(tf.random_normal([20, 20], stddev=0.03))
w4 = tf.Variable(tf.random_normal([20, 20], stddev=0.03))
w5 = tf.Variable(tf.random_normal([20, 10], stddev=0.03))

b1 = tf.Variable(tf.random_normal([20]))
b2 = tf.Variable(tf.random_normal([20]))
b3 = tf.Variable(tf.random_normal([20]))
b4 = tf.Variable(tf.random_normal([20]))
b5 = tf.Variable(tf.random_normal([10]))

out1 = tf.add(tf.matmul(x, w1), b1)
out1 = tf.tanh(out1)

out2 = tf.add(tf.matmul(out1, w2), b2)
out2 = tf.tanh(out2)

out3 = tf.add(tf.matmul(out2, w3), b3)
out3 = tf.tanh(out3)

out4 = tf.add(tf.matmul(out3, w4), b4)
out4 = tf.tanh(out4)

out5 = tf.add(tf.matmul(out4, w5), b5)
finalOut = tf.tanh(out5)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=finalOut))


optimiser = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

# finally setup the initialisation operator
init_op = tf.global_variables_initializer()


with tf.Session() as sess:
   # initialise the variables
  sess.run(init_op)
  total_batch = int(dataSize / batch_size)
  for epoch in range(epochs):

    iterator = dataset.make_one_shot_iterator()
    avg_cost = 0

    for i in range(total_batch):

      #create batch
      batch_y = []
      batch_x = []
      for counter in range(0,batch_size):
        uv, z = iterator.get_next()
        batch_x.append(uv)
        batch_y.append(z)

      _, c = sess.run([optimiser, cost], 
                     feed_dict={x: batch_x, y: batch_y})
      avg_cost += c / total_batch
      print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost))

this is the file where i am getting the data from.

def decode(serialized_example):

  features = tf.parse_single_example(
  serialized_example,
  features={'uv': tf.FixedLenFeature([1,20], tf.float32),
            'z': tf.FixedLenFeature([1,10], tf.float32)}) 

  return features['uv'], features['z']


def getData():

  filename = ["train.tfrecords"]
  dataset = tf.data.TFRecordDataset(filename).map(decode)
  return dataset

error:

Traceback (most recent call last):
 File "network.py", line 102, in 
feed_dict={x: batch_x, y: batch_y})
  File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\client\session.py", line 889, in run
run_metadata_ptr)
  File "C:\Users\User\AppData\Local\Programs\Python\Python36\lib\site-packages	ensorflow\python\client\session.py", line 1089, in _run
np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
  File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages
umpy\core
umeric.py", line 531, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.

after looking at other questions, i was thinking maybe my batch should be a ndarray or something? but i cant work out how to get my dataset into that form. I cant even work out how to use my data without an iterator. any guidance would be great! thanks

how to create batches from TFrecord for training network in tensorflow?

Answers (1)

Related Questions