How to create a binary mask from a yolo8 segmentation result

I want to segment an image using yolo8 and then create a mask for all objects in the image with specific class.

I have developed this code:

img=cv2.imread('images/bus.jpg')
model = YOLO('yolov8m-seg.pt')
results = model.predict(source=img.copy(), save=False, save_txt=False)
class_ids = np.array(results[0].boxes.cls.cpu(), dtype="int")
for i in range(len(class_ids)):
    if class_ids[i]==0:
         empty_image = np.zeros((height, width,3), dtype=np.uint8)
         res_plotted = results[0][i].plot(boxes=0, img=empty_image)

In the above code, res_plotted is the mask for one object, in RGB. I want to add all of these images to each other and create a mask for all objects with class 0 (it is a pedestrian in this example)

My questions:

How can I complete this code?
Is there any better way to achieve this without having a loop?
Is there any utility function in the yolo8 library to do this?

Upvotes: 2

Answers (3)

SerifDogru

Reputation: 41

import cv2
from ultralytics import YOLO
import torch

model = YOLO('seg.pt')


img = cv2.imread('ballon2.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

results = model.predict(img, show = True)

cv2.imshow('seg',(results[0].masks.data[0].numpy() * 255).astype("uint8"))

So answer is

cv2.imshow('seg',(results[0].masks.data[0].numpy() * 255).astype("uint8"))

Upvotes: 0

Mike B

Reputation: 3476

Extract the people segmentations using the bbox classes. You will get an array of shape [channels, w, h]. Then you can use any over the channel dimension (which is equal to the number of people) to flatten the multi-channel array into a single channel array.

import cv2
from ultralytics import YOLO
import numpy as np
import torch


img= cv2.imread('ultralytics/assets/bus.jpg')
model = YOLO('yolov8m-seg.pt')
results = model.predict(source=img.copy(), save=True, save_txt=False, stream=True)
for result in results:
    # get array results
    masks = result.masks.data
    boxes = result.boxes.data
    # extract classes
    clss = boxes[:, 5]
    # get indices of results where class is 0 (people in COCO)
    people_indices = torch.where(clss == 0)
    # use these indices to extract the relevant masks
    people_masks = masks[people_indices]
    # scale for visualizing results
    people_mask = torch.any(people_masks, dim=0).int() * 255
    # save to file
    cv2.imwrite(str(model.predictor.save_dir / 'merged_segs.jpg'), people_mask.cpu().numpy())

Input w bboxes and segmentations / Output:

Everything is computed on GPU with internal torch operations for maximum performance

Upvotes: 11

Karthik Pillai

Reputation: 337

Here's the code that I use to extract the masks. Checkout the comments in the code. Any improvements are welcome! Please comment below.

from ultralytics import YOLO
import cv2
import torch
from pathlib import Path

# Load a pretrained YOLOv8n-seg Segment model
model = YOLO("./weights/best.pt")

# Run inference on an image
results = model('./images/img (1).jpg')  # results list

result = results[0]

print(result.names)
# print(result.boxes.xyxy)
# print(result.boxes.conf)
# print(result.boxes.cls)
# print(result.masks.data)

Path("./test_output/").mkdir(parents=True, exist_ok=True)

cv2.imwrite(f"./test_output/original_image.jpg", result.orig_img)

seg_classes = list(result.names.values())
# seg_classes = ["door", "insulator", "wall", "window"]

for result in results:

    masks = result.masks.data
    boxes = result.boxes.data

    clss = boxes[:, 5]
    print("clss")
    print(clss)

    #EXTRACT A SINGLE MASK WITH ALL THE CLASSES
    obj_indices = torch.where(clss != -1)
    obj_masks = masks[obj_indices]
    obj_mask = torch.any(obj_masks, dim=0).int() * 255
    cv2.imwrite(str(f'./test_output/all-masks.jpg'), obj_mask.cpu().numpy())

    #MASK OF ALL INSTANCES OF A CLASS
    for i, seg_class in enumerate(seg_classes):

        obj_indices = torch.where(clss == i)
        print("obj_indices")
        print(obj_indices)
        obj_masks = masks[obj_indices]
        obj_mask = torch.any(obj_masks, dim=0).int() * 255

        cv2.imwrite(str(f'./test_output/{seg_class}s.jpg'), obj_mask.cpu().numpy())

        #MASK FOR EACH INSTANCE OF A CLASS
        for i, obj_index in enumerate(obj_indices[0].numpy()):
            obj_masks = masks[torch.tensor([obj_index])]
            obj_mask = torch.any(obj_masks, dim=0).int() * 255
            cv2.imwrite(str(f'./test_output/{seg_class}_{i}.jpg'), obj_mask.cpu().numpy())

Upvotes: 2

How to create a binary mask from a yolo8 segmentation result

Answers (3)

Related Questions