Reputation: 63
I have a trained model, which has been trained to recognize different documents, I got the dataset from http://www.cs.cmu.edu/~aharley/rvl-cdip/.
Below is how I built my model
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import pickle
from keras.optimizers import SGD
from keras.models import Sequential, save_model
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D
# Set image information
channels = 1
height = 1000
width = 754
model = Sequential()
# Add a Conv2D layer with 32 nodes to the model
model.add(Conv2D(32, (3, 3), input_shape=(1000, 754, 3)))
# Add the reLU activation function to the model
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('relu'))
model.compile(loss='categorical_crossentropy', # sparse_categorical_crossentropy
# Adam(lr=.0001) SGD variation with learning rate
optimizer='adam',
metrics=['accuracy'])
# Image data generator to import iamges from data folder
datagen = ImageDataGenerator()
# Flowing images from folders sorting by labels, and generates batches of images
train_it = datagen.flow_from_directory(
"data/train/", batch_size=16, target_size=(height, width), shuffle=True, class_mode='categorical')
test_it = datagen.flow_from_directory(
"data/test/", batch_size=16, target_size=(height, width), shuffle=True, class_mode='categorical')
val_it = datagen.flow_from_directory(
"data/validate/", batch_size=16, target_size=(height, width), shuffle=True, class_mode='categorical')
history = model.fit(
train_it,
epochs=2,
batch_size=16,
validation_data=val_it,
shuffle=True,
steps_per_epoch=2000 // 16,
validation_steps=800 // 16)
save_model(model, "./ComplexDocumentModel")
model.save("my_model", save_format='h5')
As in the last line, I saved my model in an h5 format.
I am trying now to use that trained model to predict on a single image, to see on which category it belongs with the below script.
from keras.models import load_model
import cv2
import numpy as np
import keras
from keras.preprocessing import image
model = load_model('my_model')
# First try
def prepare(file):
img_array = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (1000, 754))
return new_array.reshape(3, 1000, 754, 1)
# Second try
img = image.load_img(
"/home/user1/Desktop/Office/image-process/test/0000113760.tif")
img = image.img_to_array(img)
img = np.expand_dims(img, axis=-1)
prediction = model.predict(
[prepare("/home/user1/Desktop/Office/image-process/test/0000113760.tif")])
print(prediction)
I tried predicting the image in two ways, but both give the error
ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 3 but received input with shape (None, 762, 3, 1)
I have also tried opening the image with PIL and converting it to NumPy array, an approach found on google. Unfortunately no other answer, blog, or video tutorial that I found, helped me.
Upvotes: 1
Views: 464
Reputation: 11631
You are trying to feed a grayscale image to a network that expects an image with 3 channels. You can stack the last channel 3 times to have a compatible shape, but it is possible that the prediction will be poor:
def prepare(file):
img_array = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (1000, 754)) # shape is (1000,754)
# converting to RGB
array_color = cv2.cvtColor(new_array, cv2.COLOR_GRAY2RGB) # shape is (1000,754,3)
array_with_batch_dim = np.expand_dims(array_color, axis=0) # shape is (1,1000,754,3)
return array_with_batch_dim
Another solution is to not convert your image to grayscale when you read it, by omitting the flag cv2.IMREAD_GRAYSCALE
. The default behaviour of opencv is to load an image with 3 channels.
def prepare(file):
img_array = cv2.imread(file)
new_array = cv2.resize(img_array, (1000, 754)) # shape is (1000,754, 3)
# converting to RGB
array_with_batch_dim = np.expand_dims(new_array, axis=0) # shape is (1,1000,754,3)
return array_with_batch_dim
Note: Depending on your preprocessing, you might need to normalize your image between 0 and 1 by dividing it by 255 before feeding it to the network.
Upvotes: 1