Reputation: 153

tf.contrib.data.DataSet batch size can only set to 1

I converted pascal voc dataset to tfrecords via code create_pascal_tf_record.py. I used tf.contrib.data.Dataset to read data. I used code as follows:

import tensorflow as tf
from tensorflow.contrib.data import Iterator

slim_example_decoder = tf.contrib.slim.tfexample_decoder

flags = tf.app.flags
flags.DEFINE_string('data_dir', '/home/aurora/workspaces/data/tfrecords_data/voc_dataset/trainval.tfrecords',
                'tfrecords file output path')
flags.DEFINE_integer('batch_size', 1, 'training batch size')
flags.DEFINE_integer('capacity', 10000, 'training batch size')
FLAGS = flags.FLAGS

features = {"image/height": tf.FixedLenFeature((), tf.int64, default_value=1),
        "image/width": tf.FixedLenFeature((), tf.int64, default_value=1),
        "image/filename": tf.FixedLenFeature((), tf.string, default_value=""),
        "image/source_id": tf.FixedLenFeature((), tf.string, default_value=""),
        "image/key/sha256": tf.FixedLenFeature((), tf.string, default_value=""),
        "image/encoded": tf.FixedLenFeature((), tf.string, default_value=""),
        "image/format": tf.FixedLenFeature((), tf.string, default_value="jpeg"),
        "image/object/object_number": tf.FixedLenFeature((), tf.int64, default_value=1),
        "image/object/bbox/xmin": tf.VarLenFeature(tf.float32),
        "image/object/bbox/xmax": tf.VarLenFeature(tf.float32),
        "image/object/bbox/ymin": tf.VarLenFeature(tf.float32),
        "image/object/bbox/ymax": tf.VarLenFeature(tf.float32),
        "image/object/class/text": tf.VarLenFeature(tf.string),
        "image/object/class/label": tf.VarLenFeature(tf.int64),
        "image/object/difficult": tf.VarLenFeature(tf.int64),
        "image/object/truncated": tf.VarLenFeature(tf.int64),
        "image/object/view": tf.VarLenFeature(tf.string),
      }

items_to_handlers = {
    'image': slim_example_decoder.Image(
        image_key='image/encoded', format_key='image/format', channels=3),
    'height': (
        slim_example_decoder.Tensor('image/height')),
    'width': (
        slim_example_decoder.Tensor('image/width')),
    'source_id': (
        slim_example_decoder.Tensor('image/source_id')),
    'key': (
        slim_example_decoder.Tensor('image/key/sha256')),
    'filename': (
        slim_example_decoder.Tensor('image/filename')),
    # Object boxes and classes.
    'groundtruth_boxes': (
        slim_example_decoder.BoundingBox(
            ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
    'groundtruth_classes': (
        slim_example_decoder.Tensor('image/object/class/label')),
    'groundtruth_difficult': (
        slim_example_decoder.Tensor('image/object/difficult')),
    'image/object/truncated': (
        slim_example_decoder.Tensor('image/object/truncated')),
    }

decoder = slim_example_decoder.TFExampleDecoder(features, items_to_handlers)
keys = decoder.list_items()


def _parse_function_train(example):
    serialized_example = tf.reshape(example, shape=[])
    decoder = slim_example_decoder.TFExampleDecoder(features, items_to_handlers)
    keys = decoder.list_items()
    tensors = decoder.decode(serialized_example, items=keys)
    tensor_dict = dict(zip(keys, tensors))
    tensor_dict['image'].set_shape([None, None, 3])
    # tensor_dict['image'] = tf.expand_dims(tensor_dict['image'], 0)
    images = tensor_dict['image']
    float_images = tf.cast(images, tf.uint8)
    tensor_dict['image'] = float_images
    return tensor_dict


def build_pipleline(train_data_dir, test_data_dir, batch_size, capacity):
    train_dataset = tf.contrib.data.TFRecordDataset(train_data_dir)
    train_dataset = train_dataset.map(_parse_function_train)
    train_dataset = train_dataset.repeat(1)
    train_dataset = train_dataset.batch(batch_size)
    train_dataset = train_dataset.shuffle(buffer_size=capacity)

    iterator = Iterator.from_structure(train_dataset.output_types,
                                   train_dataset.output_shapes)
    next_element = iterator.get_next()
    training_init_op = iterator.make_initializer(train_dataset)

    return training_init_op, next_element 


if __name__ == '__main__':
    # TODO: only support batch size 1
    training_init_op, next_element = build_pipleline(FLAGS.data_dir, None, FLAGS.batch_size, FLAGS.capacity)
    sess = tf.Session()
    sess.run(training_init_op)
    counter = 0
    while True:
        try:
            next_element_val = sess.run(next_element)
            print(next_element_val['image'].shape, next_element_val['filename'])
            print(next_element_val['groundtruth_boxes'])
            print('-'*30)
            counter += 1
        except tf.errors.OutOfRangeError:
            print('End of training data in step %d' %counter)
            break

The code can run correctly when the batch size set to 1, When I change the batch size to larger than 1, the code will have errors. Errors as flollows:

/usr/software/anaconda3/bin/python3.6 /home/aurora/workspaces/PycharmProjects/object_detection_models/builder/voc_input_pipline_dataset_builder.py
2017-10-11 15:55:05.886856: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886869: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886872: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886874: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886876: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.974850: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:893] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2017-10-11 15:55:05.975103: I tensorflow/core/common_runtime/gpu/gpu_device.cc:955] Found device 0 with properties: 
name: GeForce GTX 1080 Ti
major: 6 minor: 1 memoryClockRate (GHz) 1.683
pciBusID 0000:01:00.0
Total memory: 10.90GiB
Free memory: 10.46GiB
2017-10-11 15:55:05.975112: I tensorflow/core/common_runtime/gpu/gpu_device.cc:976] DMA: 0 
2017-10-11 15:55:05.975114: I tensorflow/core/common_runtime/gpu/gpu_device.cc:986] 0:   Y 
2017-10-11 15:55:05.975118: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1045] Creating TensorFlow device (/gpu:0) -> (device: 0,       name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0)
2017-10-11 15:55:06.027798: W tensorflow/core/framework/op_kernel.cc:1192] Internal: HandleElementToSlice Cannot copy slice: number of elements does not match.  Shapes are: [element]: [1,4], [parent slice]: [5,4]
Traceback (most recent call last):
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1327, in _do_call
return fn(*args)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1306, in _run_fn
status, run_metadata)
  File "/usr/software/anaconda3/lib/python3.6/contextlib.py", line 89, in __exit__
next(self.gen)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
 tensorflow.python.framework.errors_impl.InternalError: HandleElementToSlice Cannot copy slice: number of elements does not match.  Shapes are: [element]: [1,4], [parent slice]: [5,4]
 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?], [?,?,?,3], [?,?], [?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_STRING, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/aurora/workspaces/PycharmProjects/object_detection_models/builder/voc_input_pipline_dataset_builder.py", line 98, in <module>
next_element_val = sess.run(next_element)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: HandleElementToSlice Cannot copy slice: number of elements does not match.  Shapes are: [element]: [1,4], [parent slice]: [5,4]
 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?], [?,?,?,3], [?,?], [?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_STRING, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]

Caused by op 'IteratorGetNext', defined at:
  File "/home/aurora/workspaces/PycharmProjects/object_detection_models/builder/voc_input_pipline_dataset_builder.py", line 92, in <module>
training_init_op, next_element = build_pipleline(FLAGS.data_dir, None, FLAGS.batch_size, FLAGS.capacity)
  File "/home/aurora/workspaces/PycharmProjects/object_detection_models/builder/voc_input_pipline_dataset_builder.py", line 84, in build_pipleline
next_element = iterator.get_next()
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/dataset_ops.py", line 304, in get_next
name=name))
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 379, in iterator_get_next
output_shapes=output_shapes, name=name)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
  File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InternalError (see above for traceback): HandleElementToSlice Cannot copy slice: number of elements does not match.  Shapes are: [element]: [1,4], [parent slice]: [5,4]
 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?], [?,?,?,3], [?,?], [?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_STRING, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]

How could I change the batch-size to larger than 1? Thanks

Upvotes: 1

Answers (3)

Jianmin Yu

Reputation: 1

check your data carefully. There must be some NotFixLen column in your data. For example, if your "col-1" has value ["a|b", "a|b|c"], then the HanleError will throw out. The tf has not deal with NotFixLen col especially using dataset. However, some news said that tf-1.5 will be better at processing VarLen column.

Upvotes: 0

Tim Smole

Reputation: 121

Function Dataset.batch() works only for tensors that all have the same size. If your input data has varying size you should use Dataset.padded_batch() function, which enables you to batch tensors of different shape by specifying one or more dimensions in which they may be padded.

From tensorflow documentation:

padded_batch( batch_size, padded_shapes, padding_values=None ) Combines consecutive elements of this dataset into padded batches.

Like Dataset.dense_to_sparse_batch(), this method combines multiple consecutive elements of this dataset, which might have different shapes, into a single element. The tensors in the resulting element have an additional outer dimension, and are padded to the respective shape in padded_shapes.

Args:

batch_size: A tf.int64 scalar tf.Tensor, representing the number of consecutive elements of this dataset to combine in a single batch.

padded_shapes: A nested structure of tf.TensorShape or tf.int64 vector tensor-like objects representing the shape to which the respective component of each input element should be padded prior to batching. Any unknown dimensions (e.g. tf.Dimension(None) in a tf.TensorShape or -1 in a tensor-like object) will be padded to the maximum size of that dimension in each batch.

padding_values: (Optional.) A nested structure of scalar-shaped tf.Tensor, representing the padding values to use for the respective components. Defaults are 0 for numeric types and the empty string for string types.

Returns: A Dataset.

You can also find example of function usage (below) on the official Tensorflow Programmer's guide.

dataset = tf.data.Dataset.range(100)
dataset = dataset.map(lambda x: tf.fill([tf.cast(x, tf.int32)], x))
dataset = dataset.padded_batch(4, padded_shapes=[None])

iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

print(sess.run(next_element))  # ==> [[0, 0, 0], [1, 0, 0], [2, 2, 0], [3, 3, 3]]
print(sess.run(next_element))  # ==> [[4, 4, 4, 4, 0, 0, 0],
                               #      [5, 5, 5, 5, 5, 0, 0],
                               #      [6, 6, 6, 6, 6, 6, 0],
                               #      [7, 7, 7, 7, 7, 7, 7]]

Upvotes: 4

mrry

Reputation: 126194

That error message suggests that the Dataset.batch() transformation is attempting to build a dense batch from tensors of different sizes. When you use Dataset.batch(), all tensors must be the same size.

To make the images the same size, consider using an image op like tf.image.resize_image_with_crop_or_pad() inside _parse_function_train().

Upvotes: 0

tf.contrib.data.DataSet batch size can only set to 1

Answers (3)

Related Questions