Luis Cruz
Luis Cruz

Reputation: 31

"ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions."

I'm running a python file "data_prep.py" in which it prepares and load image data for use in a machine learning model, particularly for classification tasks involving images of different categories. However, Ive been getting an error everytime I tried to return the value in prep_and_load_data():

import math
import os 
from random import shuffle
import constants as CONST 
import cv2
import pickle
import glob
import numpy as np

def get_size_statistics():
    heights = []
    widths = []
    img_count = 0
    DIR = CONST.TRAIN_DIR2
    for img in os.listdir(CONST.TRAIN_DIR2):
        path = os.path.join(DIR, img)
        data = cv2.imread(path)
        # data = np.array(Image.open(path))
        heights.append(data.shape[0])
        widths.append(data.shape[1])
        img_count += 1
    avg_height = sum(heights) / len(heights)
    avg_width = sum(widths) / len(widths)
    print("Average Height: " + str(avg_height))
    print("Max Height: " + str(max(heights)))
    print("Min Height: " + str(min(heights)))
    print('\n')
    print("Average Width: " + str(avg_width))
    print("Max Width: " + str(max(widths)))
    print("Min Width: " + str(min(widths)))


def label_img(name):
    # print(name)
    word_label = name.split('.')[0]
    # word_label = 'cat'
    label = CONST.LABEL_MAP[word_label]
    label_arr = np.zeros(2)
    label_arr[label] = 1
    return label_arr
def prep_and_load_data():
    DIR2 = CONST.TRAIN_DIR2
    data = []
    image_paths = os.listdir(DIR2)
    shuffle(image_paths)
    count = 0
    max_images = 100 
    for img_path in image_paths:
        # Skip directories and non-image files
        if not img_path.endswith(('.jpg', '.jpeg', '.png')):
            print(f"Skipping non-image file or directory: {img_path}")
            continue
        
        label = label_img(img_path)
        path = os.path.join(DIR2, img_path)
        image = cv2.imread(path)
        image = cv2.resize(image, (CONST.IMG_SIZE, CONST.IMG_SIZE))
        image = image.astype('float') / 255.0 
        data.append([image, label])
        count += 1
        # print(count)
        if count == max_images:
            print(f"Reached the limit of {max_images} images.")
            break
    shuffle(data)
    print(len(data))
    print('done')
    print(np.array(data))
    return data

if __name__ == "__main__":
    prep_and_load_data()
    

The error in questions is this, as I'm struggling to determine what went wrong as ths is a pre-existing code I've found on Github for learning purposes: "ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (100, 2) + inhomogeneous part."

For the record, the constant import is taken from this code:

TRAIN_DIR2 = 'Code\\dogs-vs-cats\\train\\train'
TEST_DIR2 = 'Code\\dogs-vs-cats\\test1\\test1'
CAT_LBL = 0
DOG_LBL = 1
CAT = 'cat'
DOG = 'dog'
LABEL_MAP = {
    CAT: CAT_LBL,
    DOG: DOG_LBL
}
DATA_SIZE = 18_000
IMG_SIZE = 110
SPLIT_RATIO = 0.8

Is there a way to fix this?

Upvotes: 0

Views: 192

Answers (0)

Related Questions