Reputation: 21
I am using an OAK-D from luxonis and trying to implement myself YOLOv8 segmentation in the camera.
I am having problems with latency because for each frame I use rigth now about 0.14s, the problem is that the np.array is a huge bottleneck.
Using yolov8n-seg I get the next results:
For output0 of YOLOv8-seg with dimensions -> (1, 116, 8400) and total len list = 974400 I need 0.05s
For output1 of YOLOv8-seg with dimensions -> (1, 32, 160, 160) and total len list = 819200 I need 0.03s
In total 0.08s of a total 0.14s (about 57% of each frame only to do this).
My question is if there is some way of transform a list of lenght of a million to a numpy array in a fast way.
Or which is the best way to transform a flat list into a (1, 116, 8400) or (1, 32, 160, 160) numpy array.
I have tried np.reshape, np.fromiter and transforming first the list with np.array but I get more or less the same result.
UPDATE: I update with my code here:
import cv2
import numpy as np
import depthai as dai
import time
from YOLOSeg import YOLOSeg
pathYoloBlob = "./yolov8n-seg.blob"
# Create OAK-D pipeline
pipeline = dai.Pipeline()
cam_rgb = pipeline.createColorCamera()
cam_rgb.setPreviewSize(640, 640)
cam_rgb.setInterleaved(False)
nn = pipeline.create(dai.node.NeuralNetwork)
nn.setBlobPath(pathYoloBlob)
cam_rgb.preview.link(nn.input)
xout_rgb = pipeline.createXLinkOut()
xout_rgb.setStreamName("rgb")
cam_rgb.preview.link(xout_rgb.input)
xout_nn_yolo = pipeline.createXLinkOut()
xout_nn_yolo.setStreamName("nn_yolo")
nn.out.link(xout_nn_yolo.input)
# Start aplication
with depthai.Device(pipeline) as device:
q_rgb = device.getOutputQueue("rgb")
q_nn_yolo = device.getOutputQueue("nn_yolo")
frame = None
# Since the detections returned by nn have values from <0..1> range, they need to be multiplied by frame width/height to
# receive the actual position of the bounding box on the image
def frameNorm(frame, bbox):
normVals = np.full(len(bbox), frame.shape[0])
normVals[::2] = frame.shape[1]
return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
# Main host-side application loop
while True:
in_rgb = q_rgb.tryGet()
in_nn_yolo = q_nn_yolo.tryGet()
if in_rgb is not None:
frame = in_rgb.getCvFrame()
if in_nn_yolo is not None:
# Here is the problem
output0 = np.reshape(in_nn_yolo.getLayerFp16("output0"), newshape=([1, 116, 8400]))
output1 = np.reshape(in_nn_yolo.getLayerFp16("output1"), newshape=([1, 32, 160, 160]))
# Si tenemos ambos outputs podemos calcular la mascara final
if( len(output0) > 0 and len(output1) > 0 ):
#Post-process, this is fast, no problems here
yoloseg = YOLOSeg("", conf_thres=0.3, iou_thres=0.5)
yoloseg.prepare_input_for_oakd(frame.shape[:2])
yoloseg.segment_objects_from_oakd(output0,output1)
combined_img = yoloseg.draw_masks(frame.copy())
cv2.imshow("Output", combined_img)
else:
print("in_nn_yolo EMPTY")
else:
print("in_rgb EMPTY")
# at any time, you can press "q" and exit the main loop, therefore exiting the program itself
if cv2.waitKey(1) == ord('q'):
break
Upvotes: 0
Views: 157