Invalid argument error tensorflow dataset

Question

I am trying to create a model that takes two images (one taken right after the other) and train it so that it can predict how much the camera has moved between the two images. I use a smaller model that processes one image at a time, then concatenate the two outputs in a larger model.

I tried testing it and the model compiles just fine, but it crashes when I call fit() and gives me an invalid argument error.

Epoch 1/5
2021-10-11 11:41:23.993854: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2021-10-11 11:41:28.606390: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8200
      7/Unknown - 18s 81ms/step - loss: 63.80682021-10-11 11:41:41.279014: W tensorflow/core/framework/op_kernel.cc:1692] OP_REQUIRES failed at transpose_op.cc:143 : Invalid argument: transpose expects a vector of size 3. But input(1) is a vector of size 4
2021-10-11 11:41:41.279317: W tensorflow/core/framework/op_kernel.cc:1692] OP_REQUIRES failed at transpose_op.cc:143 : Invalid argument: transpose expects a vector of size 3. But input(1) is a vector of size 4
Traceback (most recent call last):
  File "d:/.../Deep Sight/deep_sight.py", line 155, in 
    main()
  File "d:/.../Deep Sight/deep_sight.py", line 150, in main
    final_model.fit(train_data, epochs=5)
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine	raining.py", line 1184, in fit
    tmp_logs = self.train_function(iterator)
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages	ensorflow\python\eager\def_function.py", line 885, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages	ensorflow\python\eager\def_function.py", line 917, in _call
    return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages	ensorflow\python\eager\function.py", line 3040, in __call__
    filtered_flat_args, captured_inputs=graph_function.captured_inputs)  # pylint: disable=protected-access
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages	ensorflow\python\eager\function.py", line 1964, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages	ensorflow\python\eager\function.py", line 596, in call
    ctx=ctx)
  File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\site-packages	ensorflow\python\eager\execute.py", line 60, in quick_execute
    inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  transpose expects a vector of size 3. But input(1) is a vector of size 4
         [[{{node gradient_tape/model_1/model/conv2d/Conv2D/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer}}]]
         [[Func/mean_squared_error/cond/then/_0/input/_29/_48]]
  (1) Invalid argument:  transpose expects a vector of size 3. But input(1) is a vector of size 4
         [[{{node gradient_tape/model_1/model/conv2d/Conv2D/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_3137]

Function call stack:
train_function -> train_function

My batch size is 32 as defined in my dataset. I believe it has something to do with how I am using the tf.Datasets. Previously, I just loaded all the data into memory and the model ran just fine. However, because my dataset is much larger now, I shifted to using tf.Datasets to input the data. This required me to input the 2 images together in a single tensor as the input. It also required me to add the tf.split and squeeze methods to separate the images inside the model.

The shape of train_data is a nested structure as follows: (batch: 32, features: [2, 128, 128, 3], labels: a scalar (the vertical movement between the two images))

def load_data(image_files):
    image_file, image_file2 = bytes.decode(image_files.numpy()[0]), bytes.decode(image_files.numpy()[1])
    # Extract number of png file
    run_folder = image_file[:image_file.rfind('\')][:-6]
    pic_number = int(image_file[image_file.rfind('\')+1:image_file.find('.')])

    # Grab the y positions for the indicated pictures, then find their difference
    with open(run_folder+"\yPos.txt", 'r') as yPosFile:
        for _ in range(pic_number):
            yPosFile.readline()
        oldY = float(yPosFile.readline())
        dY = float(yPosFile.readline()) - oldY

    # Load in the images from their file names, and strip the Alpha value from the RGBA values. It's always 255, so we don't need that extra data.
    image = imageio.imread(image_file)
    # Scale the RGB data down to between 0-1 so that the model has an easier time creating weights.
    return image[:, :, :-1]/255, imageio.imread(image_file2)[:, :, :-1]/255, dY


# Takes the list of output from the load_data function (which must be wrapped in tf.py_function)
# and outputs the data in the nested structure necessary for training, which the map function can process.
# Unfortunately, the py_function cannot output nested data structures, so we have to do a little wrapping here.
def load_data_wrapper(image_files):
    image, image2, dY = tf.py_function(load_data, [image_files], [tf.float32, tf.float32, tf.float32])
    return ([image, image2], dY)


# Takes dataset like [0, 1, 2, 3, 4]
# and converts it to: [[0,1],[1,2],[2,3],[3,4]]
def prep_dataset(dtst):
    # First repeat individual elements, then print those repeated elements after each other
    dtst = dtst.interleave(lambda x: tf.data.Dataset.from_tensors(x).repeat(2), cycle_length=2, block_length=2)
    # Skip the first element so that numbers are paired with the next greatest in the sequence with the batch function. 
    return dtst.skip(1).batch(2, drop_remainder=True) #.take_while(lambda x: tf.squeeze(tf.greater(tf.shape(x), 1)))


def tf_load_data():
    runs = os.listdir("Data")
    image_datasets = None

    for run in runs:
        image_dataset = tf.data.Dataset.list_files("Data/"+run+"/photos/?.png", shuffle=False).apply(prep_dataset)
        image_dataset = image_dataset.map(load_data_wrapper, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        if image_datasets == None:
            image_datasets = image_dataset
        else:
            image_datasets = image_datasets.concatenate(image_dataset)

    #print(image_datasets)

    image_datasets = image_datasets.shuffle(buffer_size=int(599*25/32)).batch(32)

    # for data in image_datasets.take(1):
    #     print(data)

    return image_datasets


def main():
    # Create model

    # Start with smaller model that processes the two images in the same way.
    single_image_input = keras.Input(shape=(128,128,3))

    image = layers.Conv2D(64, (3,3))(single_image_input)
    image = layers.LeakyReLU()(image)
    image = layers.BatchNormalization()(image)
    # Run through MaxPool2D to help the algorithm identify features in different areas of the image.
    # Has the effect of downsampling and cutting the dimensions in half.
    image = layers.MaxPool2D()(image)

    image = layers.Conv2D(128, (3, 3))(image)
    image = layers.LeakyReLU()(image)
    image = layers.BatchNormalization()(image)
    image = layers.Dropout(.3)(image)

    image_model = keras.Model(single_image_input, image)
    
    # Create larger model
    image_inputs = keras.Input(shape=(2,128,128,3))

    first_image, second_image = tf.split(image_inputs, num_or_size_splits=2, axis=1)
    first_image, second_image = tf.squeeze(first_image), tf.squeeze(second_image)

    image_outputs = [image_model(first_image), image_model(second_image)]
    model = layers.Concatenate()(image_outputs)

    model = layers.Flatten()(model)

    model = layers.Dense(128)(model)
    model = layers.LeakyReLU()(model)
    model = layers.BatchNormalization()(model)
    model = layers.Dropout(.3)(model)

    # Output is change in y-position of drone
    out_layer = layers.Dense(1, activation='linear')(model)

    final_model = keras.Model(image_inputs, out_layer)
    final_model.compile(loss="mse", optimizer=optimizers.Adam(lr=0.0003, beta_1=0.7))

    image_model.summary()

    final_model.summary()


    #Preprocess data
    print("Loading and processing data...")
    train_data = tf_load_data()

    #Train model
    final_model.fit(train_data, epochs=5)



if __name__ == "__main__":
    main()
    #tf_load_data()

Here is my full code file and a sample of the data that I am working with in case it helps: Data

elbe · Accepted Answer

Below is a code that uses the Sequence class from Keras. The code is quite simple and it works. I understand that tf.datasets are more attractive, but you are doing complex operations.

class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, datadir, batch_size=32, shuffle=True):
    # 'Initialization'
    self.datdir = datadir
    self.yposdir = datadir
    self.batch_size = batch_size
    self.shuffle = shuffle
    
    # read Y positions
    with open(os.path.join(datadir, 'yPos.txt'), 'r') as f:
        yData = [float(s) for s in f.readlines()]
    self.yData = np.array(yData)
    print(self.yData.shape)

    self.photofiles = glob.glob(os.path.join(datadir, 'photos', '*'))
    print(len(self.photofiles))
    # get image size
    image0 = np.array(imageio.imread(self.photofiles[0]))
    self.xsize, self.ysize, _ = image0.shape
    print(self.xsize)
    print(self.ysize)
    
    n_samples = len(self.photofiles)
    self.set_len = n_samples-1
    self.indexes = range(self.set_len)

    if self.shuffle == True:
        self.indexes = random.sample(range(self.set_len), k=self.set_len)
    
def __len__(self):
    return self.set_len//self.batch_size

def __getitem__(self, index):
    # Generate indexes of the batch
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    
    ypos = np.zeros((self.batch_size,), dtype='float32')
    image_concat_pair_batch = np.zeros((self.batch_size, 2, self.xsize, self.ysize, 3), dtype='float32')
    
    for count, ind in enumerate(indexes):
        # get images
        curr_image = imageio.imread(self.photofiles[ind])
        next_image = imageio.imread(self.photofiles[ind+1])
        # get positions
        curr_ypos = self.yData[ind]
        new_ypos = self.yData[ind+1]
        # diff
        ypos[count] = new_ypos-curr_ypos
        # expand axis
        curr_image = np.array(curr_image)[np.newaxis, :, :, :-1]
        next_image = np.array(next_image)[np.newaxis, :, :, :-1]
        # concatenat image pairs
        image_concat = np.concatenate((curr_image, next_image), axis=0)/255.
        # store
        image_concat_pair_batch[count] = image_concat
    
    return tf.convert_to_tensor(image_concat_pair_batch, dtype=tf.dtypes.float32), tf.convert_to_tensor(ypos, dtype=tf.dtypes.float32)

def on_epoch_end(self):
    if self.shuffle == True:
        self.indexes = random.sample(range(self.set_len), k=self.set_len)

Invalid argument error tensorflow dataset

Answers (1)

Related Questions