Reputation: 31
I'm running a python file "data_prep.py" in which it prepares and load image data for use in a machine learning model, particularly for classification tasks involving images of different categories. However, Ive been getting an error everytime I tried to return the value in prep_and_load_data():
import math
import os
from random import shuffle
import constants as CONST
import cv2
import pickle
import glob
import numpy as np
def get_size_statistics():
heights = []
widths = []
img_count = 0
DIR = CONST.TRAIN_DIR2
for img in os.listdir(CONST.TRAIN_DIR2):
path = os.path.join(DIR, img)
data = cv2.imread(path)
# data = np.array(Image.open(path))
heights.append(data.shape[0])
widths.append(data.shape[1])
img_count += 1
avg_height = sum(heights) / len(heights)
avg_width = sum(widths) / len(widths)
print("Average Height: " + str(avg_height))
print("Max Height: " + str(max(heights)))
print("Min Height: " + str(min(heights)))
print('\n')
print("Average Width: " + str(avg_width))
print("Max Width: " + str(max(widths)))
print("Min Width: " + str(min(widths)))
def label_img(name):
# print(name)
word_label = name.split('.')[0]
# word_label = 'cat'
label = CONST.LABEL_MAP[word_label]
label_arr = np.zeros(2)
label_arr[label] = 1
return label_arr
def prep_and_load_data():
DIR2 = CONST.TRAIN_DIR2
data = []
image_paths = os.listdir(DIR2)
shuffle(image_paths)
count = 0
max_images = 100
for img_path in image_paths:
# Skip directories and non-image files
if not img_path.endswith(('.jpg', '.jpeg', '.png')):
print(f"Skipping non-image file or directory: {img_path}")
continue
label = label_img(img_path)
path = os.path.join(DIR2, img_path)
image = cv2.imread(path)
image = cv2.resize(image, (CONST.IMG_SIZE, CONST.IMG_SIZE))
image = image.astype('float') / 255.0
data.append([image, label])
count += 1
# print(count)
if count == max_images:
print(f"Reached the limit of {max_images} images.")
break
shuffle(data)
print(len(data))
print('done')
print(np.array(data))
return data
if __name__ == "__main__":
prep_and_load_data()
The error in questions is this, as I'm struggling to determine what went wrong as ths is a pre-existing code I've found on Github for learning purposes: "ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (100, 2) + inhomogeneous part."
For the record, the constant import is taken from this code:
TRAIN_DIR2 = 'Code\\dogs-vs-cats\\train\\train'
TEST_DIR2 = 'Code\\dogs-vs-cats\\test1\\test1'
CAT_LBL = 0
DOG_LBL = 1
CAT = 'cat'
DOG = 'dog'
LABEL_MAP = {
CAT: CAT_LBL,
DOG: DOG_LBL
}
DATA_SIZE = 18_000
IMG_SIZE = 110
SPLIT_RATIO = 0.8
Is there a way to fix this?
Upvotes: 0
Views: 192