Reputation: 35
I am learning depth ai and I found this example on their repo. https://github.com/luxonis/depthai-experiments/tree/master/gen2-road-segmentation. I started translating this code in C++ to be consistent with the project I am putting together. I run into this function named "Decode"
def decode(packet):
data = np.squeeze(toTensorResult(packet)["L0317_ReWeight_SoftMax"])
class_colors = [[0, 0, 0], [0, 255, 0], [255, 0, 0], [0, 0, 255]]
class_colors = np.asarray(class_colors, dtype=np.uint8)
indices = np.argmax(data, axis=0)
output_colors = np.take(class_colors, indices, axis=0)
return output_colors
Adding more detail regarding the problem. DepthAI offers a lot of examples in their core repo https://github.com/luxonis/depthai-core
I used some of those example to start shaping the segmentation script since its a feature that I don't find written in C++ between all of the examples.
Here is my progress so far.
#include <chrono>
#include "depthai-core/examples/utility/utility.hpp"
#include <depthai/depthai.hpp>
#include "slar.hpp"
using namespace slar;
using namespace std;
using namespace std::chrono;
static std::atomic<bool> syncNN{true};
void slar_depth_segmentation::segment(int argc, char **argv, dai::Pipeline &pipeline,
cv::Mat frame,
dai::Device *device_unused) {
// blob model
std::string nnPath("/Users/alessiograncini/road-segmentation-adas-0001.blob");
if (argc > 1) {
nnPath = std::string(argv[1]);
}
printf("Using blob at path: %s\n", nnPath.c_str());
// in
auto camRgb = pipeline.create<dai::node::ColorCamera>();
auto imageManip = pipeline.create<dai::node::ImageManip>();
auto mobilenetDet = pipeline.create<dai::node::MobileNetDetectionNetwork>();
// out
auto xoutRgb = pipeline.create<dai::node::XLinkOut>();
auto nnOut = pipeline.create<dai::node::XLinkOut>();
auto xoutManip = pipeline.create<dai::node::XLinkOut>();
// stream names
xoutRgb->setStreamName("camera");
xoutManip->setStreamName("manip");
nnOut->setStreamName("segmentation");
//
imageManip->initialConfig.setResize(300, 300);
imageManip->initialConfig.setFrameType(dai::ImgFrame::Type::BGR888p);
// properties
camRgb->setPreviewSize(300, 300);
camRgb->setBoardSocket(dai::CameraBoardSocket::RGB);
camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P);
camRgb->setInterleaved(false);
camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::RGB);
//
mobilenetDet->setConfidenceThreshold(0.5f);
mobilenetDet->setBlobPath(nnPath);
mobilenetDet->setNumInferenceThreads(2);
mobilenetDet->input.setBlocking(false);
// link
camRgb->preview.link(xoutRgb->input);
imageManip->out.link(mobilenetDet->input);
//
if (syncNN) {
mobilenetDet->passthrough.link(xoutManip->input);
} else {
imageManip->out.link(xoutManip->input);
}
//
mobilenetDet->out.link(nnOut->input);
// device
dai::Device device(pipeline);
// queues
auto previewQueue = device.getOutputQueue("camera", 4, false);
auto detectionNNQueue = device.getOutputQueue("segmentation", 4, false);
// fps
auto startTime = steady_clock::now();
int counter = 0;
float fps = 0;
auto color = cv::Scalar(255, 255, 255);
// main
while (true) {
auto inRgb = previewQueue->get<dai::ImgFrame>();
auto inSeg = detectionNNQueue->get<dai::NNData>();
//?
auto segmentations = inSeg->getData();
//
counter++;
auto currentTime = steady_clock::now();
auto elapsed = duration_cast<duration<float>>(currentTime - startTime);
if(elapsed > seconds(1)) {
fps = counter / elapsed.count();
counter = 0;
startTime = currentTime;
}
// testing if mat is a good replacement for
// the input array as in "decode" the inSeg data is manipulated
// cv::Mat img(500, 1000, CV_8UC1, cv::Scalar(70));
// slar_depth_segmentation::draw(segmentations, frame);
std::stringstream fpsStr;
fpsStr << std::fixed << std::setprecision(2) << fps;
cv::imshow("camera window", inRgb->getCvFrame());
//cv::imshow("camera window", frame);
int key = cv::waitKey(1);
if (key == 'q' || key == 'Q') {
break;
}
}
}
void slar_depth_segmentation::draw(cv::InputArray data, cv::OutputArray frame) {
cv::addWeighted(frame, 1, data, 0.2, 0, frame);
}
//https://jclay.github.io/dev-journal/simple_cpp_argmax_argmin.html
void slar_depth_segmentation::decode( cv::InputArray data) {
vector <int> class_colors [4] =
{{0,0,0},{0,255,0},{255, 0, 0}, {0, 0, 255}};
}
I can successfully play the camera using this script - but as you can tell the only part of the segmentation that is translated is the draw method, that is a function equivalent for both py and c++ since it's part of the openCV library. I am getting stuck in trying to write the equivalent of the decode method. Thanks
[edit]
Any suggestion regarding this follow up??
C++
cv::InputArray slar_depth_segmentation::decode(std::vector<std::uint8_t> data) {
// reshape or np.squeeze
data.resize(1, 1);
// create a vector array
std::vector<std::vector<int>> classColors{
{0, 0, 0},
{0, 255, 0},
{255, 0, 0},
{0, 0, 255}};
double minVal;
double maxVal;
cv::minMaxIdx(
data,
&minVal,
&maxVal);
// get max value of class colors
auto output_colors = classColors[&maxVal, 0];
return output_colors;
}
Py
def decode(packet):
data = np.squeeze(toTensorResult(packet)["L0317_ReWeight_SoftMax"])
class_colors = [[0, 0, 0], [0, 255, 0], [255, 0, 0], [0, 0, 255]]
class_colors = np.asarray(class_colors, dtype=np.uint8)
indices = np.argmax(data, axis=0)
output_colors = np.take(class_colors, indices, axis=0)
return output_colors
Upvotes: 1
Views: 163
Reputation: 180
#I think you are looking for the
cv::argmax # function.
cv::argmax # returns the index of the maximum value in an array.
cv::argmax # is available in OpenCV 3.4.3 and later.
Upvotes: 0