Reputation: 153
I converted pascal voc dataset to tfrecords via code I used
to read data. I used code as follows:
import tensorflow as tf
from import Iterator
slim_example_decoder = tf.contrib.slim.tfexample_decoder
flags =
flags.DEFINE_string('data_dir', '/home/aurora/workspaces/data/tfrecords_data/voc_dataset/trainval.tfrecords',
'tfrecords file output path')
flags.DEFINE_integer('batch_size', 1, 'training batch size')
flags.DEFINE_integer('capacity', 10000, 'training batch size')
features = {"image/height": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/width": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/filename": tf.FixedLenFeature((), tf.string, default_value=""),
"image/source_id": tf.FixedLenFeature((), tf.string, default_value=""),
"image/key/sha256": tf.FixedLenFeature((), tf.string, default_value=""),
"image/encoded": tf.FixedLenFeature((), tf.string, default_value=""),
"image/format": tf.FixedLenFeature((), tf.string, default_value="jpeg"),
"image/object/object_number": tf.FixedLenFeature((), tf.int64, default_value=1),
"image/object/bbox/xmin": tf.VarLenFeature(tf.float32),
"image/object/bbox/xmax": tf.VarLenFeature(tf.float32),
"image/object/bbox/ymin": tf.VarLenFeature(tf.float32),
"image/object/bbox/ymax": tf.VarLenFeature(tf.float32),
"image/object/class/text": tf.VarLenFeature(tf.string),
"image/object/class/label": tf.VarLenFeature(tf.int64),
"image/object/difficult": tf.VarLenFeature(tf.int64),
"image/object/truncated": tf.VarLenFeature(tf.int64),
"image/object/view": tf.VarLenFeature(tf.string),
items_to_handlers = {
'image': slim_example_decoder.Image(
image_key='image/encoded', format_key='image/format', channels=3),
'height': (
'width': (
'source_id': (
'key': (
'filename': (
# Object boxes and classes.
'groundtruth_boxes': (
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/')),
'groundtruth_classes': (
'groundtruth_difficult': (
'image/object/truncated': (
decoder = slim_example_decoder.TFExampleDecoder(features, items_to_handlers)
keys = decoder.list_items()
def _parse_function_train(example):
serialized_example = tf.reshape(example, shape=[])
decoder = slim_example_decoder.TFExampleDecoder(features, items_to_handlers)
keys = decoder.list_items()
tensors = decoder.decode(serialized_example, items=keys)
tensor_dict = dict(zip(keys, tensors))
tensor_dict['image'].set_shape([None, None, 3])
# tensor_dict['image'] = tf.expand_dims(tensor_dict['image'], 0)
images = tensor_dict['image']
float_images = tf.cast(images, tf.uint8)
tensor_dict['image'] = float_images
return tensor_dict
def build_pipleline(train_data_dir, test_data_dir, batch_size, capacity):
train_dataset =
train_dataset =
train_dataset = train_dataset.repeat(1)
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.shuffle(buffer_size=capacity)
iterator = Iterator.from_structure(train_dataset.output_types,
next_element = iterator.get_next()
training_init_op = iterator.make_initializer(train_dataset)
return training_init_op, next_element
if __name__ == '__main__':
# TODO: only support batch size 1
training_init_op, next_element = build_pipleline(FLAGS.data_dir, None, FLAGS.batch_size, FLAGS.capacity)
sess = tf.Session()
counter = 0
while True:
next_element_val =
print(next_element_val['image'].shape, next_element_val['filename'])
counter += 1
except tf.errors.OutOfRangeError:
print('End of training data in step %d' %counter)
The code can run correctly when the batch size set to 1, When I change the batch size to larger than 1, the code will have errors. Errors as flollows:
/usr/software/anaconda3/bin/python3.6 /home/aurora/workspaces/PycharmProjects/object_detection_models/builder/
2017-10-11 15:55:05.886856: W tensorflow/core/platform/] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886869: W tensorflow/core/platform/] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886872: W tensorflow/core/platform/] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886874: W tensorflow/core/platform/] The TensorFlow library wasn't compiled to use AVX2 instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.886876: W tensorflow/core/platform/] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speed up CPU computations.
2017-10-11 15:55:05.974850: I tensorflow/stream_executor/cuda/] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2017-10-11 15:55:05.975103: I tensorflow/core/common_runtime/gpu/] Found device 0 with properties:
name: GeForce GTX 1080 Ti
major: 6 minor: 1 memoryClockRate (GHz) 1.683
pciBusID 0000:01:00.0
Total memory: 10.90GiB
Free memory: 10.46GiB
2017-10-11 15:55:05.975112: I tensorflow/core/common_runtime/gpu/] DMA: 0
2017-10-11 15:55:05.975114: I tensorflow/core/common_runtime/gpu/] 0: Y
2017-10-11 15:55:05.975118: I tensorflow/core/common_runtime/gpu/] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0)
2017-10-11 15:55:06.027798: W tensorflow/core/framework/] Internal: HandleElementToSlice Cannot copy slice: number of elements does not match. Shapes are: [element]: [1,4], [parent slice]: [5,4]
Traceback (most recent call last):
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/", line 1327, in _do_call
return fn(*args)
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/", line 1306, in _run_fn
status, run_metadata)
File "/usr/software/anaconda3/lib/python3.6/", line 89, in __exit__
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/", line 466, in raise_exception_on_not_ok_status
tensorflow.python.framework.errors_impl.InternalError: HandleElementToSlice Cannot copy slice: number of elements does not match. Shapes are: [element]: [1,4], [parent slice]: [5,4]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?], [?,?,?,3], [?,?], [?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_STRING, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/aurora/workspaces/PycharmProjects/object_detection_models/builder/", line 98, in <module>
next_element_val =
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/", line 895, in run
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/", line 1321, in _do_run
options, run_metadata)
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: HandleElementToSlice Cannot copy slice: number of elements does not match. Shapes are: [element]: [1,4], [parent slice]: [5,4]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?], [?,?,?,3], [?,?], [?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_STRING, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
Caused by op 'IteratorGetNext', defined at:
File "/home/aurora/workspaces/PycharmProjects/object_detection_models/builder/", line 92, in <module>
training_init_op, next_element = build_pipleline(FLAGS.data_dir, None, FLAGS.batch_size, FLAGS.capacity)
File "/home/aurora/workspaces/PycharmProjects/object_detection_models/builder/", line 84, in build_pipleline
next_element = iterator.get_next()
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/", line 304, in get_next
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/", line 379, in iterator_get_next
output_shapes=output_shapes, name=name)
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/", line 767, in apply_op
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/software/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): HandleElementToSlice Cannot copy slice: number of elements does not match. Shapes are: [element]: [1,4], [parent slice]: [5,4]
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?], [?,?,4], [?,?], [?,?], [?], [?,?,?,3], [?,?], [?], [?], [?]], output_types=[DT_STRING, DT_FLOAT, DT_INT64, DT_INT64, DT_INT64, DT_UINT8, DT_INT64, DT_STRING, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/cpu:0"](Iterator)]]
How could I change the batch-size to larger than 1? Thanks
Upvotes: 1
Views: 3487
Reputation: 1
check your data carefully. There must be some NotFixLen column in your data. For example, if your "col-1" has value ["a|b", "a|b|c"], then the HanleError will throw out. The tf has not deal with NotFixLen col especially using dataset. However, some news said that tf-1.5 will be better at processing VarLen column.
Upvotes: 0
Reputation: 121
Function Dataset.batch() works only for tensors that all have the same size. If your input data has varying size you should use Dataset.padded_batch() function, which enables you to batch tensors of different shape by specifying one or more dimensions in which they may be padded.
From tensorflow documentation:
padded_batch( batch_size, padded_shapes, padding_values=None ) Combines consecutive elements of this dataset into padded batches.
Like Dataset.dense_to_sparse_batch(), this method combines multiple consecutive elements of this dataset, which might have different shapes, into a single element. The tensors in the resulting element have an additional outer dimension, and are padded to the respective shape in padded_shapes.
batch_size: A tf.int64 scalar tf.Tensor, representing the number of consecutive elements of this dataset to combine in a single batch.
padded_shapes: A nested structure of tf.TensorShape or tf.int64 vector tensor-like objects representing the shape to which the respective component of each input element should be padded prior to batching. Any unknown dimensions (e.g. tf.Dimension(None) in a tf.TensorShape or -1 in a tensor-like object) will be padded to the maximum size of that dimension in each batch.
padding_values: (Optional.) A nested structure of scalar-shaped tf.Tensor, representing the padding values to use for the respective components. Defaults are 0 for numeric types and the empty string for string types.
Returns: A Dataset.
You can also find example of function usage (below) on the official Tensorflow Programmer's guide.
dataset =
dataset = x: tf.fill([tf.cast(x, tf.int32)], x))
dataset = dataset.padded_batch(4, padded_shapes=[None])
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
print( # ==> [[0, 0, 0], [1, 0, 0], [2, 2, 0], [3, 3, 3]]
print( # ==> [[4, 4, 4, 4, 0, 0, 0],
# [5, 5, 5, 5, 5, 0, 0],
# [6, 6, 6, 6, 6, 6, 0],
# [7, 7, 7, 7, 7, 7, 7]]
Upvotes: 4
Reputation: 126194
That error message suggests that the Dataset.batch()
transformation is attempting to build a dense batch from tensors of different sizes. When you use Dataset.batch()
, all tensors must be the same size.
To make the images the same size, consider using an image op like tf.image.resize_image_with_crop_or_pad()
inside _parse_function_train()
Upvotes: 0