Reputation: 71
I have written a generator for multi-input nn but while using tf.data.Dataset.from_generator() function im getting error, all the data is in numpy where : input 1 is of shape(16,100,223,3), input 2 is of shape(100,223), input 3 is of shape (16,) and label is of shape(2,). data is a array of all these combined
my code
def data_generator(train_list, batch_size):
i = 0
j = 0
flag = True
while True:
# inputs = []
# outputs = []
if i < len(train_list):
if flag == True:
train_path = os.path.join(training_dir, train_list[i])
data = np.load(train_path, allow_pickle=True)
flag = False
if j >= len(data):
j = 0
i += 1
flag = True
del data
else:
if len(data[j:]) >= batch_size:
input_1 = data[j:(j+batch_size), 0]
input_2 = data[j:(j+batch_size), 1]
input_3 = data[j:(j + batch_size), 2]
outputs= data[j:(j+batch_size), -1]
j += (batch_size)
yield {'Input_Branch-1' : input_1,'Input_Branch-2': input_2, 'Input_Branch-3': input_3}, outputs
elif len(data[j:])< batch_size:
input_1 = data[j:, 0]
input_2 = data[j:, 1]
input_3 = data[j:, 2]
outputs= data[j:, -1]
j = 0
i+= 1
flag = True
del data
yield {'Input_Branch-1': input_1, 'Input_Branch-2': input_2, 'Input_Branch-3': input_3}, outputs
else:
i = 0
del data
flag = True
np.random.shuffle(train_list)
batch_size = 5
dataset = tf.data.Dataset.from_generator(data_generator, args= [train_list, batch_size],
output_types = ({'Input_Branch-1': tf.uint8, 'Input_Branch-2': tf.uint8, 'Input_Branch-3': tf.float32}, tf.float32),)
# for seeing the output of data generator
num = 0
for data, labels in dataset:
print(data.shape, labels.shape)
print(labels)
print()
num = num + 1
if num > 1: break
I'm getting the following error
2020-08-04 17:43:30.653430: W tensorflow/core/framework/op_kernel.cc:1741] Invalid argument: TypeError: `generator` yielded an element that could not be converted to the expected type. The expected type was uint8, but the yielded element was [array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8)
array([[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
...,
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0],
[0, 0, 0, ..., 0, 0, 0]], dtype=uint8)].
TypeError: only size-1 arrays can be converted to Python scalars
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\panwa\.conda\envs\Vision\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 801, in generator_py_func
ret, dtype=dtype.as_numpy_dtype))
File "C:\Users\panwa\.conda\envs\Vision\lib\site-packages\tensorflow\python\ops\script_ops.py", line 203, in _convert
result = np.asarray(value, dtype=dtype, order="C")
File "C:\Users\panwa\.conda\envs\Vision\lib\site-packages\numpy\core\_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
This same error is repeated multiple times Please Help!!!
Upvotes: 2
Views: 4995
Reputation: 303
I had a similar problem, which I believe I solved by specifying the shape that the generator generated. For me, the output was 2 images 32x32x3,as well as two labels so i used this, as an argument to the from_generator method.
dataset = tf.data.Dataset.from_generator
generator,
# this is the part that you care about #
output_signature=(
tf.TensorSpec(shape=(2,32,32,3), dtype=tf.int32),
tf.TensorSpec(shape=(2,1), dtype=tf.int32,))
#
)
I tried to run your code but train_list is not defined. Still I believe that this could work, maybe with a change or two
output_signature = (
# the one is there to clarify that there is one of these objects
tf.TensorSpec(shape=(1,16,100,223,3), dtype=tf.int32)
tf.TensorSpec(shape=(1,100,223), dtype=tf.int32)
tf.TensorSpec(shape=(1,16,) ,dtype=tf.int32)
tf.TensorSpec(shape=(2,1),dtype=tf.int32)
)
Here is a link with a toy dataset that I made in order to make sense out of this mess
https://colab.research.google.com/drive/17pu6jJYLGP-I1nJnzigOx2YOh3Ih4cvG?usp=sharing
Upvotes: 5