Reputation: 976
I have a set of images that are located in 3 separate folders, based on their Type. I want to iterate through every Type and count the red pixel values of every image. I have set a limit for red, being in range from 200 to 256. I want to create histograms for each type and later cluster the histogram and discriminate between the 3 classes. My experience with Python is very limited and I am stuck at how to isolate and count the red pixel values. I have attached my code and the resulting histogram for Type 1, which is a straight line. Could someone help on this?
import numpy as np
import cv2
import os.path
import glob
import matplotlib.pyplot as plt
## take the image, compute sum of all row colors and return the percentage
#iterate through every Type
for t in [1]:
#load_files
files = glob.glob(os.path.join("..", "data", "train", "Type_{}".format(t), "*.jpg"))
no_files = len(files)
#iterate and read
for n, file in enumerate(files):
try:
image = cv2.imread(file)
hist = cv2.calcHist([img], [0], None, [56], [200, 256])
print(file, t, "-files left", no_files - n)
except Exception as e:
print(e)
print(file)
plt.plot(hist)
plt.show()
Upvotes: 1
Views: 2055
Reputation: 13743
This is the solution I came up with. I have taken the liberty to refactor and simplify your code a bit.
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
root = 'C:\Users\you\imgs' # Change this appropriately
folders = ['Type_1', 'Type_2', 'Type_3']
extension = '*.bmp' # Change if necessary
threshold = 150 # Adjust to fit your neeeds
n_bins = 5 # Tune these values to customize the plot
width = 2.
colors = ['cyan', 'magenta', 'yellow']
edges = np.linspace(0, 100, n_bins+1)
centers = .5*(edges[:-1]+ edges[1:])
# This is just a convenience class used to encapsulate data
class img_type(object):
def __init__(self, folder, color):
self.folder = folder
self.percents = []
self.color = color
lst = [img_type(f, c) for f, c in zip(folders, colors)]
fig, ax = plt.subplots()
for n, obj in enumerate(lst):
filenames = glob.glob(os.path.join(root, obj.folder, extension))
for fn in filenames:
img = io.imread(fn)
red = img[:, :, 0]
obj.percents.append(100.*np.sum(red >= threshold)/red.size)
h, _ = np.histogram(obj.percents, bins=edges)
h = np.float64(h)
h /= h.sum()
h *= 100.
ax.bar(centers + (n - .5*len(lst))*width, h, width, color=obj.color)
ax.legend(folders)
ax.set_xlabel('% of pixels whose red component is >= threshold')
ax.set_ylabel('% of images')
plt.show()
Notice that I have I used scikit-image rather than OpenCV to read the images. If this is not an option for you, insert import cv2
and change:
img = io.imread(fn)
red = img[:, :, 0]
to:
img = cv2.imread(fn)
red = img[:, :, 2]
Upvotes: 2