How do I translate Decode(Packet) function in C++?

Question

I am learning depth ai and I found this example on their repo. https://github.com/luxonis/depthai-experiments/tree/master/gen2-road-segmentation. I started translating this code in C++ to be consistent with the project I am putting together. I run into this function named "Decode"

def decode(packet):
    data = np.squeeze(toTensorResult(packet)["L0317_ReWeight_SoftMax"])
    class_colors = [[0, 0, 0], [0, 255, 0], [255, 0, 0], [0, 0, 255]]
    class_colors = np.asarray(class_colors, dtype=np.uint8)
    indices = np.argmax(data, axis=0)
    output_colors = np.take(class_colors, indices, axis=0)
    return output_colors

Adding more detail regarding the problem. DepthAI offers a lot of examples in their core repo https://github.com/luxonis/depthai-core

I used some of those example to start shaping the segmentation script since its a feature that I don't find written in C++ between all of the examples.

Here is my progress so far.


#include 
#include "depthai-core/examples/utility/utility.hpp"
#include 
#include "slar.hpp"

using namespace slar;
using namespace std;
using namespace std::chrono;

static std::atomic syncNN{true};

void slar_depth_segmentation::segment(int argc, char **argv, dai::Pipeline &pipeline,
                                      cv::Mat frame,
                                      dai::Device *device_unused) {
    // blob model
    std::string nnPath("/Users/alessiograncini/road-segmentation-adas-0001.blob");
    if (argc > 1) {
        nnPath = std::string(argv[1]);
    }
    printf("Using blob at path: %s
", nnPath.c_str());

    // in
    auto camRgb = pipeline.create();
    auto imageManip = pipeline.create();
    auto mobilenetDet = pipeline.create();
    // out
    auto xoutRgb = pipeline.create();
    auto nnOut = pipeline.create();
    auto xoutManip = pipeline.create();
    // stream names
    xoutRgb->setStreamName("camera");
    xoutManip->setStreamName("manip");
    nnOut->setStreamName("segmentation");
    //
    imageManip->initialConfig.setResize(300, 300);
    imageManip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);

    // properties
    camRgb->setPreviewSize(300, 300);
    camRgb->setBoardSocket(dai::CameraBoardSocket::RGB);
    camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
    camRgb->setInterleaved(false);
    camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::RGB);
    //
    mobilenetDet->setConfidenceThreshold(0.5f);
    mobilenetDet->setBlobPath(nnPath);
    mobilenetDet->setNumInferenceThreads(2);
    mobilenetDet->input.setBlocking(false);
    // link
    camRgb->preview.link(xoutRgb->input);
    imageManip->out.link(mobilenetDet->input);
    //
    if (syncNN) {
        mobilenetDet->passthrough.link(xoutManip->input);
    } else {
        imageManip->out.link(xoutManip->input);
    }
    //
    mobilenetDet->out.link(nnOut->input);
    // device
    dai::Device device(pipeline);

    // queues
    auto previewQueue = device.getOutputQueue("camera", 4, false);
    auto detectionNNQueue = device.getOutputQueue("segmentation", 4, false);

    // fps
    auto startTime = steady_clock::now();
    int counter = 0;
    float fps = 0;
    auto color = cv::Scalar(255, 255, 255);

    // main
    while (true) {
        auto inRgb = previewQueue->get();
        auto inSeg = detectionNNQueue->get();
        //?
        auto segmentations = inSeg->getData();
        //
        counter++;
        auto currentTime = steady_clock::now();
        auto elapsed = duration_cast>(currentTime - startTime);
        if(elapsed > seconds(1)) {
            fps = counter / elapsed.count();
            counter = 0;
            startTime = currentTime;
        }

        // testing if mat is a good replacement for
        // the input array as in "decode" the inSeg data is manipulated
        // cv::Mat img(500, 1000, CV_8UC1, cv::Scalar(70));
        // slar_depth_segmentation::draw(segmentations, frame);
        std::stringstream fpsStr;
        fpsStr << std::fixed << std::setprecision(2) << fps;

        cv::imshow("camera window", inRgb->getCvFrame());
        //cv::imshow("camera window", frame);


        int key = cv::waitKey(1);
        if (key == 'q' || key == 'Q') {
            break;
        }
    }
}


void slar_depth_segmentation::draw(cv::InputArray data, cv::OutputArray frame) {
    cv::addWeighted(frame, 1, data, 0.2, 0, frame);
}
//https://jclay.github.io/dev-journal/simple_cpp_argmax_argmin.html
void slar_depth_segmentation::decode( cv::InputArray data) {
    vector  class_colors [4] =
            {{0,0,0},{0,255,0},{255, 0, 0}, {0, 0, 255}};
  
}

I can successfully play the camera using this script - but as you can tell the only part of the segmentation that is translated is the draw method, that is a function equivalent for both py and c++ since it's part of the openCV library. I am getting stuck in trying to write the equivalent of the decode method. Thanks

[edit]

Any suggestion regarding this follow up??

C++

cv::InputArray slar_depth_segmentation::decode(std::vector data) {

    // reshape or np.squeeze
    data.resize(1, 1);
    // create a vector array
    std::vector> classColors{
            {0,   0,   0},
            {0,   255, 0},
            {255, 0,   0},
            {0,   0,   255}};

    double minVal;
    double maxVal;
    cv::minMaxIdx(
            data,
            &minVal,
            &maxVal);
    // get max value of class colors
    auto output_colors = classColors[&maxVal, 0];
    return output_colors;
}

Py

def decode(packet):
    data = np.squeeze(toTensorResult(packet)["L0317_ReWeight_SoftMax"])
    class_colors = [[0, 0, 0], [0, 255, 0], [255, 0, 0], [0, 0, 255]]
    class_colors = np.asarray(class_colors, dtype=np.uint8)
    indices = np.argmax(data, axis=0)
    output_colors = np.take(class_colors, indices, axis=0)
    return output_colors

How do I translate Decode(Packet) function in C++?

Answers (1)

Related Questions