Reputation: 65
I'm developing a CNN for image recognition. I have a set of different images, where in each image I have a set of different bounding boxes (al least 3 bounding boxes for each image).
I would like to automatically extract the part within the bounding boxes then cropping them obtain a set of cropped image corresponding to the content of each bounding boxes. I have created both an voc xml and a cumulative .csv file which contains all the details for each image, here an extract:
,filepath,x1,x2,y1,y2,class_name
0,71.jpeg,81,118,98,122,os
1,71.jpeg,120,156,83,110,od
2,71.jpeg,107,161,136,154,m
Basically, I have the mentioned images in .jpeg format in a dedicated folder (\train_images) and the annotation files. Do you have a fast implementation that deals with this issue.
Thank you
Upvotes: 0
Views: 6040
Reputation:
If anyone is still searching for the answer, you can see these scripts:
This script will crop each bounding box and save them to corresponding class
folder automatically
from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET
original_file = './images/' #you images directory
dst = './save/'
def check_folder_exists(path):
if not os.path.exists(path):
try:
os.makedirs(path)
print ('create ' + path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
seed_arr = []
for xml_file in glob.glob('./labels/*.xml'): #your xml directory
root = ET.parse(xml_file).getroot()
filename = root.find('filename').text
for type_tag in root.findall('size'):
#file_name = type_tag.find('filename').text
width = type_tag.find('width').text
height = type_tag.find('height').text
for type_tag in root.findall('object'):
class_name = type_tag.find('name').text
xmin = type_tag.find('bndbox/xmin').text
ymin = type_tag.find('bndbox/ymin').text
xmax = type_tag.find('bndbox/xmax').text
ymax = type_tag.find('bndbox/ymax').text
all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]
seed_arr.append(all_list)
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))
for index, line in enumerate(seed_arr):
filename = line[0]
width = line[1]
height = line[2]
class_name = line[3]
xmin = line[4]
ymin = line[5]
xmax = line[6]
ymax = line[7]
#print(len(class_name))
load_img_path = os.path.join(original_file, filename)
#save img path
#save img path----------
save_class_path = os.path.join(dst, class_name)
check_folder_exists(save_class_path)
save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
img = Image.open(load_img_path)
crop_img = img.crop((int(xmin) ,int(ymin) ,int(xmax) ,int(ymax)))
newsize = (224, 224)
im1 = crop_img.resize(newsize)
im1.save(save_img_path, 'JPEG')
print('save ' + save_img_path)
If you are trying to crop from csv then check this link :
Upvotes: 0
Reputation: 1700
You can just return an array of tuples instead and iterate that instead as below.
#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob
root_images="/content/images"
root_annots="/content/annotation"
all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")
breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")
breed_map={}
for annot in annotation:
breed=annot.split("/")[-2]
index=breed.split("-")[0]
breed_map.setdefault(index,breed)
print(f"Total Breeds : {len(breed_map)}")
def bounding_box(image):
retval = []
#bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
#print (bpath)
#print(root_annots)
#print (str(breed_map[image.split("_")[0]]))
#print (str(image.split(".")[0]))
bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
tree = ET.parse(bpath)
root = tree.getroot()
objects = root.findall('object')
for o in objects:
bndbox = o.find('bndbox') # reading bound box
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
retval.append(tuple((xmin, ymin, xmax, ymax)))
return retval
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
bboxarray=bounding_box(image)
for x,bbox in enumerate(bboxarray):
bbox=bounding_box(image)
print(bbox)
im=Image.open(os.path.join(root_images,image))
im=im.crop(bbox)
im.save(f'/content/results_imgs/{i}-{x}.jpeg')
Upvotes: 0
Reputation: 2995
I have found a git repository to create cropped images from all bounding boxes of the detected objects from Pascal VOC images (images with bounding boxes generated by LabelImg): https://github.com/giovannicimolin/PascalVOC-to-Images
The code works pretty well. Hope it will help you solve your problem.
Upvotes: 1
Reputation: 4162
I assume that you want to crop the images for the bounding boxes. You can simply ue a numpy array:
Please find a working example here.
import matplotlib.pyplot as plt
mydic = {
"annotations": [
{
"class": "rect",
"height": 98,
"width": 113,
"x": 177,
"y": 12
},
{
"class": "rect",
"height": 80,
"width": 87,
"x": 373,
"y": 43
}
],
"class": "image",
"filename": "https://i.sstatic.net/9qe6z.png"
}
def crop(dic, i):
image = plt.imread(dic["filename"])
x0 = dic["annotations"][i]["x"]
y0 = dic["annotations"][i]["y"]
width = dic["annotations"][i]["width"]
height = dic["annotations"][i]["height"]
return image[y0:y0+height , x0:x0+width, :]
fig = plt.figure()
ax = fig.add_subplot(121)
ax.imshow(plt.imread(mydic["filename"]))
ax1 = fig.add_subplot(222)
ax1.imshow(crop(mydic, 0))
ax2 = fig.add_subplot(224)
ax2.imshow(crop(mydic, 1))
plt.show()
NOTE: This is not my code but I found it some times ago while searching for the same problem.
Upvotes: 0
Reputation: 65
Well, I found the way to extract cropped images with this code:
#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob
root_images="/content/images"
root_annots="/content/annotation"
all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")
breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")
breed_map={}
for annot in annotation:
breed=annot.split("/")[-2]
index=breed.split("-")[0]
breed_map.setdefault(index,breed)
print(f"Total Breeds : {len(breed_map)}")
def bounding_box(image):
#bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
#print (bpath)
#print(root_annots)
#print (str(breed_map[image.split("_")[0]]))
#print (str(image.split(".")[0]))
bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
tree = ET.parse(bpath)
root = tree.getroot()
objects = root.findall('object')
for o in objects:
bndbox = o.find('bndbox') # reading bound box
xmin = int(bndbox.find('xmin').text)
ymin = int(bndbox.find('ymin').text)
xmax = int(bndbox.find('xmax').text)
ymax = int(bndbox.find('ymax').text)
return (xmin,ymin,xmax,ymax)
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
bbox=bounding_box(image)
print(bbox)
im=Image.open(os.path.join(root_images,image))
im=im.crop(bbox)
im.save('/content/results_imgs/{}.jpeg'.format(i,im))
But if you run this code it extract only one image from the multiple bounding boxes within each xml. How I should modify it in order to get all the images from multiple bounding boxes annotated within each xml?
Upvotes: 0