Reputation:
Here I used PIL Library to load image and it's a single data not for image dataset and convert numpy array using numpy Library. It's perfectly working for single image data.
Now, I want to convert into numpy array from image dataset. where will be presented training, testing and validation data.
below I share the code for converting single image data to numpy array.
Imported needed library
from PIL import Image
from numpy import asarray
load the image
image = Image.open('flower/1.jpg')
convert image to numpy array
data = asarray(image)
#data is array format of image
Upvotes: 2
Views: 3688
Reputation: 76
If you just want to convert the numpy array back to image then following code snipped should work. If you want replicate the process for entire dataset then you need to call it on every single image. How you do it would depend on the model you're trying to build(image classification, object detection etc) and what you're using to build it(tensorflow, theano, etc)
Solution 1
from PIL import Image
from numpy import asarray
image = Image.open('flower/1.jpg')
data = asarray(image)
img_w, img_h = 200, 200
img = Image.fromarray(data, 'RGB')
img.save('test.png')
img.show()
Since you're working on an image classification problem. The following code could serve you well. Customize it as per your problem. I've commented in the code where you need to make the changes.
Solution 2
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.applications import MobileNetV2 #Change Here: Select the classification architecture you need
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
def build_model(size, num_classes):
inputs = Input((size, size, 3))
backbone = MobileNetV2(input_tensor=inputs, include_top=False, weights="imagenet") #Change Here: Select the classification architecture you need
backbone.trainable = True
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.2)(x) #Chage Here: Try different droput values b/w .2 to .8
x = Dense(1024, activation="relu")(x)
x = Dense(num_classes, activation="softmax")(x)
model = tf.keras.Model(inputs, x)
return model
def read_image(path, size):
image = cv2.imread(path, cv2.IMREAD_COLOR)
train_datagen = ImageDataGenerator(rescale=1./255,
rotation_range=30, #Change Here: Select any rotation range b/w 10 to 90
zoom_range = 0.3,
width_shift_range=0.2, #Change Here: Select width shift as per your images. My advice- try b/w .2 to .5
height_shift_range=0.2, #Change Here: Select height shift as per your images., My advice try b/w .2 to .5
horizontal_flip = 'true')
image = train_datagen.flow_from_directory(path, shuffle=False, batch_size=10, seed=10) #Change Here: Select batch_size as per your need
image = cv2.resize(image, (size, size))
image = image / 255.0
image = image.astype(np.float32)
return image
def parse_data(x, y):
x = x.decode()
num_class = 120 #Change Here: num_class should be equal to types of blood cells you have in your dataset i.e. number of labels
size = 224 #Change Here: Select size as per your chosen model architecture
image = read_image(x, size)
label = [0] * num_class
label[y] = 1
label = np.array(label)
label = label.astype(np.int32)
return image, label
def tf_parse(x, y):
x, y = tf.numpy_function(parse_data, [x, y], [tf.float32, tf.int32])
x.set_shape((224, 224, 3))
y.set_shape((120))
return x, y
def tf_dataset(x, y, batch=8): #Change Here: Choose default batch size as per your needs
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.map(tf_parse)
dataset = dataset.batch(batch)
dataset = dataset.repeat()
return dataset
if __name__ == "__main__":
path = "/content/gdrive/My Drive/Dog Breed Classification/" #Change Here: Give path to your parent directory
train_path = os.path.join(path, "train/*")
test_path = os.path.join(path, "test/*")
labels_path = os.path.join(path, "labels.csv") #Change Here: Give name of your csv file
labels_df = pd.read_csv(labels_path)
breed = labels_df["breed"].unique() #Change Here: replace breed with the column name, denoting class, in your csv file
print("Number of Breed: ", len(breed))
breed2id = {name: i for i, name in enumerate(breed)} #Change Here: replace breed & id with the column names denoting class & image file in your csv file
#repeat the same every place where breed or id is mentioned
ids = glob(train_path)
labels = []
for image_id in ids:
# print(image_id,"\n\n\n")
image_id = image_id.split("/")[-1]
breed_name = list(labels_df[labels_df.id == image_id]["breed"])[0]
breed_idx = breed2id[breed_name]
labels.append(breed_idx)
## Spliting the dataset
train_x, valid_x = train_test_split(ids, test_size=0.2, random_state=42) #Change Here: select test size as per your need. My advice go between .2 to .3
train_y, valid_y = train_test_split(labels, test_size=0.2, random_state=42)
## Parameters
size = 224 #Change Here: Select size as per your chosen model architecture
num_classes = 120 #Change Here: num_class should be equal to types of blood cells you have in your dataset i.e. number of labels
lr = 1e-4 #Change Here: Select as per you need. My advice chose any where b/w 1e-4 to 1e-2
batch = 16 #Change Here: Select as per your need
epochs = 50 #Change Here: Select as per your need
## Model
model = build_model(size, num_classes)
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr), metrics=["acc"])
# model.summary()
## Dataset
train_dataset = tf_dataset(train_x, train_y, batch=batch)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch)
## Training
callbacks = [
ModelCheckpoint("/content/gdrive/My Drive/Dog Breed Classification/Model/model-1-{epoch:02d}.h5", #Change Here :Give the path where you want to store your model
verbose=1, save_best_only=True),
ReduceLROnPlateau(factor=0.1, patience=5, min_lr=1e-6)] #Change Here: Set factor, patience, min_lr as per your need. My advice leave as it is and then change to see if model performance improves.
train_steps = (len(train_x)//batch) + 1
valid_steps = (len(valid_x)//batch) + 1
model.fit(train_dataset,
steps_per_epoch=train_steps,
validation_steps=valid_steps,
validation_data=valid_dataset,
epochs=epochs,
callbacks=callbacks)
Upvotes: 3