fflpdqqoeit
fflpdqqoeit

Reputation: 65

Automatic image cropping from bounding boxes annotation python

I'm developing a CNN for image recognition. I have a set of different images, where in each image I have a set of different bounding boxes (al least 3 bounding boxes for each image).

I would like to automatically extract the part within the bounding boxes then cropping them obtain a set of cropped image corresponding to the content of each bounding boxes. I have created both an voc xml and a cumulative .csv file which contains all the details for each image, here an extract:

,filepath,x1,x2,y1,y2,class_name
0,71.jpeg,81,118,98,122,os
1,71.jpeg,120,156,83,110,od
2,71.jpeg,107,161,136,154,m

Basically, I have the mentioned images in .jpeg format in a dedicated folder (\train_images) and the annotation files. Do you have a fast implementation that deals with this issue.

Thank you

Upvotes: 0

Views: 6040

Answers (5)

user12321371
user12321371

Reputation:

If anyone is still searching for the answer, you can see these scripts:

This script will crop each bounding box and save them to corresponding class folder automatically

from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET

original_file = './images/' #you images directory
dst = './save/'


def check_folder_exists(path):
        if not os.path.exists(path):
            try:
                os.makedirs(path)
                print ('create ' + path)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise




seed_arr = []
for xml_file in glob.glob('./labels/*.xml'): #your xml directory 
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text

    for type_tag in root.findall('size'):
        #file_name = type_tag.find('filename').text
        width = type_tag.find('width').text
        height = type_tag.find('height').text

    for type_tag in root.findall('object'):
        class_name = type_tag.find('name').text
        xmin = type_tag.find('bndbox/xmin').text
        ymin = type_tag.find('bndbox/ymin').text
        xmax = type_tag.find('bndbox/xmax').text
        ymax = type_tag.find('bndbox/ymax').text
        all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]

        seed_arr.append(all_list)
    
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))


for index, line in enumerate(seed_arr):
    filename = line[0]
    width = line[1]
    height = line[2]
    class_name = line[3]
    xmin = line[4]
    ymin = line[5]
    xmax = line[6]
    ymax = line[7]
    

#print(len(class_name))
    

    
    load_img_path = os.path.join(original_file, filename)
    #save img path

#save img path----------
    save_class_path = os.path.join(dst, class_name)
    check_folder_exists(save_class_path)
    save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
    
    img = Image.open(load_img_path)
    crop_img = img.crop((int(xmin) ,int(ymin) ,int(xmax) ,int(ymax)))
    newsize = (224, 224) 
    im1 = crop_img.resize(newsize) 
    im1.save(save_img_path, 'JPEG')
    print('save ' + save_img_path)

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_xml/crop_from_xml.py

If you are trying to crop from csv then check this link :

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_csv_bbox/crop_image_from_csv.py

Upvotes: 0

SpeedOfSpin
SpeedOfSpin

Reputation: 1700

You can just return an array of tuples instead and iterate that instead as below.

#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob

root_images="/content/images"
root_annots="/content/annotation"

all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")

breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
    annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")

breed_map={}
for annot in annotation:
    breed=annot.split("/")[-2]
    index=breed.split("-")[0]
    breed_map.setdefault(index,breed)
    
print(f"Total Breeds : {len(breed_map)}")

def bounding_box(image):
retval = []
    #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
    #print (bpath)
    #print(root_annots)
    #print (str(breed_map[image.split("_")[0]]))
    #print (str(image.split(".")[0]))
    bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
    tree = ET.parse(bpath)
    root = tree.getroot()
    objects = root.findall('object')
    
    for o in objects:
        bndbox = o.find('bndbox') # reading bound box
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        retval.append(tuple((xmin, ymin, xmax, ymax)))
        
        
    return retval
    
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
    bboxarray=bounding_box(image)
    for x,bbox in enumerate(bboxarray):
        bbox=bounding_box(image) 
        print(bbox)  
        im=Image.open(os.path.join(root_images,image))
        im=im.crop(bbox)           
        im.save(f'/content/results_imgs/{i}-{x}.jpeg') 

Upvotes: 0

nirojshrestha019
nirojshrestha019

Reputation: 2995

I have found a git repository to create cropped images from all bounding boxes of the detected objects from Pascal VOC images (images with bounding boxes generated by LabelImg): https://github.com/giovannicimolin/PascalVOC-to-Images

The code works pretty well. Hope it will help you solve your problem.

Upvotes: 1

Deshwal
Deshwal

Reputation: 4162

I assume that you want to crop the images for the bounding boxes. You can simply ue a numpy array:

Please find a working example here.

import matplotlib.pyplot as plt

mydic = {
  "annotations": [
  {
    "class": "rect",
    "height": 98,
    "width": 113,
    "x": 177,
    "y": 12
  },
  {
    "class": "rect",
    "height": 80,
    "width": 87,
    "x": 373,
    "y": 43
  }
 ],
   "class": "image",
   "filename": "https://i.sstatic.net/9qe6z.png"
}


def crop(dic, i):
    image = plt.imread(dic["filename"])
    x0 = dic["annotations"][i]["x"]
    y0 = dic["annotations"][i]["y"]
    width = dic["annotations"][i]["width"]
    height = dic["annotations"][i]["height"]
    return image[y0:y0+height , x0:x0+width, :]


fig = plt.figure()
ax = fig.add_subplot(121)
ax.imshow(plt.imread(mydic["filename"]))

ax1 = fig.add_subplot(222)
ax1.imshow(crop(mydic, 0))

ax2 = fig.add_subplot(224)
ax2.imshow(crop(mydic, 1))

plt.show()

NOTE: This is not my code but I found it some times ago while searching for the same problem.

Upvotes: 0

fflpdqqoeit
fflpdqqoeit

Reputation: 65

Well, I found the way to extract cropped images with this code:

#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob

root_images="/content/images"
root_annots="/content/annotation"

all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")

breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
    annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")

breed_map={}
for annot in annotation:
    breed=annot.split("/")[-2]
    index=breed.split("-")[0]
    breed_map.setdefault(index,breed)
    
print(f"Total Breeds : {len(breed_map)}")

def bounding_box(image):
    #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
    #print (bpath)
    #print(root_annots)
    #print (str(breed_map[image.split("_")[0]]))
    #print (str(image.split(".")[0]))
    bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
    tree = ET.parse(bpath)
    root = tree.getroot()
    objects = root.findall('object')
    
    for o in objects:
        bndbox = o.find('bndbox') # reading bound box
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        
        
    return (xmin,ymin,xmax,ymax)
    
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
    bbox=bounding_box(image) 
    print(bbox)  
    im=Image.open(os.path.join(root_images,image))
    im=im.crop(bbox)           
    im.save('/content/results_imgs/{}.jpeg'.format(i,im)) 

But if you run this code it extract only one image from the multiple bounding boxes within each xml. How I should modify it in order to get all the images from multiple bounding boxes annotated within each xml?

Upvotes: 0

Related Questions