numpy array slow with large list

Question

I am using an OAK-D from luxonis and trying to implement myself YOLOv8 segmentation in the camera.

I am having problems with latency because for each frame I use rigth now about 0.14s, the problem is that the np.array is a huge bottleneck.

Using yolov8n-seg I get the next results:

For output0 of YOLOv8-seg with dimensions -> (1, 116, 8400) and total len list = 974400 I need 0.05s

For output1 of YOLOv8-seg with dimensions -> (1, 32, 160, 160) and total len list = 819200 I need 0.03s

In total 0.08s of a total 0.14s (about 57% of each frame only to do this).

My question is if there is some way of transform a list of lenght of a million to a numpy array in a fast way.

Or which is the best way to transform a flat list into a (1, 116, 8400) or (1, 32, 160, 160) numpy array.

I have tried np.reshape, np.fromiter and transforming first the list with np.array but I get more or less the same result.

UPDATE: I update with my code here:


import cv2
import numpy as np
import depthai as dai
import time
from YOLOSeg import YOLOSeg

pathYoloBlob = "./yolov8n-seg.blob"

# Create OAK-D pipeline
pipeline = dai.Pipeline()

cam_rgb = pipeline.createColorCamera()
cam_rgb.setPreviewSize(640, 640)  
cam_rgb.setInterleaved(False)

nn = pipeline.create(dai.node.NeuralNetwork)
nn.setBlobPath(pathYoloBlob)

cam_rgb.preview.link(nn.input)

xout_rgb = pipeline.createXLinkOut()
xout_rgb.setStreamName("rgb")
cam_rgb.preview.link(xout_rgb.input)

xout_nn_yolo = pipeline.createXLinkOut()
xout_nn_yolo.setStreamName("nn_yolo")
nn.out.link(xout_nn_yolo.input)

# Start aplication
with depthai.Device(pipeline) as device:

    q_rgb = device.getOutputQueue("rgb")
    q_nn_yolo = device.getOutputQueue("nn_yolo")


    frame = None

    # Since the detections returned by nn have values from <0..1> range, they need to be multiplied by frame width/height to
    # receive the actual position of the bounding box on the image
    def frameNorm(frame, bbox):
        normVals = np.full(len(bbox), frame.shape[0])
        normVals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)

    # Main host-side application loop
    while True:

        in_rgb = q_rgb.tryGet()
        in_nn_yolo = q_nn_yolo.tryGet()

        if in_rgb is not None:

            frame = in_rgb.getCvFrame()  

            if in_nn_yolo is not None:
                
                # Here is the problem
                output0 = np.reshape(in_nn_yolo.getLayerFp16("output0"), newshape=([1, 116, 8400]))
                output1 = np.reshape(in_nn_yolo.getLayerFp16("output1"), newshape=([1, 32, 160, 160]))               
 
                # Si tenemos ambos outputs podemos calcular la mascara final
                if( len(output0) > 0 and len(output1) > 0 ):                 

                    #Post-process, this is fast, no problems here
                    yoloseg = YOLOSeg("", conf_thres=0.3, iou_thres=0.5)
                    yoloseg.prepare_input_for_oakd(frame.shape[:2])
                    yoloseg.segment_objects_from_oakd(output0,output1)
                    combined_img = yoloseg.draw_masks(frame.copy())
                    cv2.imshow("Output", combined_img)

            else:
                print("in_nn_yolo EMPTY")

        else:
            print("in_rgb EMPTY")
        # at any time, you can press "q" and exit the main loop, therefore exiting the program itself
        if cv2.waitKey(1) == ord('q'):
            break

numpy array slow with large list

Answers (0)

Related Questions