Setting different batch size for training and validation using Tensorflow's tf.data API

Question

I am migrating from the older queue-based data pipeline to the newer tf.data API. Suppose I have a code like the following, how can I explicitly set different batch sizes for my training and validation iterators.

filenames = tf.placeholder(tf.string, shape=[None])
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(...)  # Parse the record into tensors.
dataset = dataset.repeat()  # Repeat the input indefinitely.
dataset = dataset.batch(32)
iterator = dataset.make_initializable_iterator()



# Initialize `iterator` with training data.
training_filenames = ["/var/data/file1.tfrecord", 
"/var/data/file2.tfrecord"]
sess.run(iterator.initializer, feed_dict={filenames: 
training_filenames})

# Initialize `iterator` with validation data.
validation_filenames = ["/var/data/validation1.tfrecord", ...]
sess.run(iterator.initializer, feed_dict={filenames: 
validation_filenames})

EDIT:

Thank you. Based on the reply, my implementation is as follows: My implementation is like follows, but I'm not able to figure out why I'm getting this error:

import tensorflow as tf


def _parse(filename, label):
  image_string = tf.read_file(filename)
  image_decoded = tf.image.decode_jpeg(image_string)
  image_resized = tf.image.resize_images(image_decoded, [224, 224])
  image_resized.set_shape([224,224,3])
  return image_resized, label


def input_pipeline(imglist,labellist, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((imglist, labellist))
    dataset = dataset.map(_parse)  # Parse the record into tensors.
    dataset = dataset.repeat()  # Repeat the input indefinitely.
    dataset = dataset.batch(batch_size)
    return dataset


imglist = glob.glob('/var/temp/*.jpg')
train_imgs=imglist[0:100]
train_labels = [i for i in range(100)]

val_imgs=imglist[200:250]
val_labels = [i for i in range(50)]
training_batch_size = 4

validation_batch_size = 1

training_ds = input_pipeline(train_imgs, train_labels, training_batch_size)
validation_ds = input_pipeline(val_imgs, val_labels, validation_batch_size)

handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
            handle, training_ds.output_types, training_ds.output_shapes)
input_batch = iterator.get_next()

train_iter = training_ds.make_initializable_iterator()
val_iter = validation_ds.make_initializable_iterator()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

     # Define training and validation handlers
    training_handle = sess.run(train_iter.string_handle())
    validation_handle = sess.run(val_iter.string_handle())

    # Initialize training and validation dataset
    sess.run(train_iter)
    sess.run(val_iter)


    # If we use training_handle, then input_batch tensor comes from training tfrecords
    training_batch = sess.run(input_batch, feed_dict={handle: training_handle})

    # If we use validation_handle, then input_batch tensor comes from validation tfrecords
    validation_batch = sess.run(input_batch, feed_dict={handle: validation_handle})

But I end up getting the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, fetches, contraction_fn)
    281         self._unique_fetches.append(ops.get_default_graph().as_graph_element(
--> 282             fetch, allow_tensor=True, allow_operation=True))
    283       except TypeError as e:

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in as_graph_element(self, obj, allow_tensor, allow_operation)
   3589     with self._lock:
-> 3590       return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
   3591 

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _as_graph_element_locked(self, obj, allow_tensor, allow_operation)
   3678       raise TypeError("Can not convert a %s into a %s." % (type(obj).__name__,
-> 3679                                                            types_str))
   3680 

TypeError: Can not convert a Iterator into a Tensor or Operation.

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
 in ()
     47 
     48     # Initialize training and validation dataset
---> 49     sess.run(train_iter)
     50     sess.run(val_iter)
     51 

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    898     try:
    899       result = self._run(None, fetches, feed_dict, options_ptr,
--> 900                          run_metadata_ptr)
    901       if run_metadata:
    902         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1118     # Create a fetch handler to take care of the structure of fetches.
   1119     fetch_handler = _FetchHandler(
-> 1120         self._graph, fetches, feed_dict_tensor, feed_handles=feed_handles)
   1121 
   1122     # Run request and get response.

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, graph, fetches, feeds, feed_handles)
    425     """
    426     with graph.as_default():
--> 427       self._fetch_mapper = _FetchMapper.for_fetch(fetches)
    428     self._fetches = []
    429     self._targets = []

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in for_fetch(fetch)
    251         if isinstance(fetch, tensor_type):
    252           fetches, contraction_fn = fetch_fn(fetch)
--> 253           return _ElementFetchMapper(fetches, contraction_fn)
    254     # Did not find anything.
    255     raise TypeError('Fetch argument %r has invalid type %r' % (fetch,

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, fetches, contraction_fn)
    284         raise TypeError('Fetch argument %r has invalid type %r, '
    285                         'must be a string or Tensor. (%s)' %
--> 286                         (fetch, type(fetch), str(e)))
    287       except ValueError as e:
    288         raise ValueError('Fetch argument %r cannot be interpreted as a '

TypeError: Fetch argument  has invalid type , must be a string or Tensor. (Can not convert a Iterator into a Tensor or Operation.)

HuckleberryFinn · Accepted Answer

Slight modification needed to get the right answer:

import tensorflow as tf

imglist = glob.glob('/var/temp/*.jpg')
train_imgs=imglist[0:100]
train_labels = [i for i in range(100)]

val_imgs=imglist[200:250]
val_labels = [i for i in range(50)]

training_ds = tf.data.Dataset.from_tensor_slices((train_imgs,train_labels)).batch(4)
validation_ds = tf.data.Dataset.from_tensor_slices((val_imgs,val_labels)).batch(1)

handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
            handle, training_ds.output_types, training_ds.output_shapes)
input_batch = iterator.get_next()

train_iter = training_ds.make_initializable_iterator()
val_iter = validation_ds.make_initializable_iterator()



with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())


     # Define training and validation handlers
    training_handle = sess.run(train_iter.string_handle())
    validation_handle = sess.run(val_iter.string_handle())



    sess.run(train_iter.initializer)
    # If we use training_handle, then input_batch tensor comes from training tfrecords
    training_batch = sess.run(input_batch, feed_dict={handle: training_handle})
    print("Training...")
    print(training_batch)

    sess.run(val_iter.initializer)
    # If we use validation_handle, then input_batch tensor comes from validation tfrecords
    print("Validation")
    validation_batch = sess.run(input_batch, feed_dict={handle: validation_handle})
    print(validation_batch)

Setting different batch size for training and validation using Tensorflow's tf.data API

Answers (2)

Related Questions

Setting different batch size for training and validation using Tensorflow&#39;s tf.data API

Answers (2)

Related Questions

Setting different batch size for training and validation using Tensorflow's tf.data API