Reputation: 3125
I have this opencv function:
static std::vector<cv::Point> findBoundaryPixels(const cv::Mat_<uchar> &trimap, int a, int b)
{
std::vector<cv::Point> result;
for (int x = 1; x < trimap.cols - 1; ++x)
for (int y = 1; y < trimap.rows - 1; ++y)
{
if (trimap(y, x) == a)
{
if (trimap(y - 1, x) == b ||
trimap(y + 1, x) == b ||
trimap(y, x - 1) == b ||
trimap(y, x + 1) == b)
{
result.push_back(cv::Point(x, y));
}
}
}
return result;
}
i am looking into speeding my application up using the opencv ParallelLoopBody
code, to run every-pixel operations in parallel.
I have added this class:
class Parallel_process : public cv::ParallelLoopBody
{
private:
cv::Mat trimap;
int a;
int b;
std::vector<cv::Point>& result;
public:
Parallel_process(cv::Mat inputImgage, std::vector<cv::Point>& presult, int aa, int bb)
: trimap(inputImgage), result(presult), a(aa), b(bb) {}
virtual void operator()(const cv::Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
int x = i / trimap.cols;
int y = i / trimap.rows;
if (trimap.at<uchar>(y, x) == a)
{
if (trimap.at<uchar>(y - 1, x) == b ||
trimap.at<uchar>(y + 1, x) == b ||
trimap.at<uchar>(y, x - 1) == b ||
trimap.at<uchar>(y, x + 1) == b)
{
result.push_back(cv::Point(x, y));
}
}
}
}
};
and adjusted the function so that it reads:
static std::vector<cv::Point> findBoundaryPixels(const cv::Mat_<uchar> &trimap, int a, int b)
{
std::vector<cv::Point> result;
// create 8 threads and use TBB
cv::parallel_for_(cv::Range(0, 8), Parallel_process(trimap, result, a, b));
return result;
}
However, this crashes my application. I have tried to follow the docs, as here:
https://docs.opencv.org/trunk/d7/dff/tutorial_how_to_use_OpenCV_parallel_for_.html
But have clearly failed. Where am i going wrong with this?
Thank you.
Upvotes: 1
Views: 1102
Reputation: 20959
In tutorial input cv::Range
for cv::parallel_for_
is [0, img.cols * img.width]
, you should do it in the same way. This input range is splitted into smaller ranges and they are passed as parameters of operator() which is executed by thread.
So you should call
cv::parallel_for_(cv::Range(0, trimap.cols * trimap.rows), Parallel_process(trimap, result, a, b));
In void operator()(const cv::Range& range)
you should use value of range
to calculate x
and y
. When your image has WIDTH and HEIGHT, and we know that pixels are stored in rows, you can use these formulas
x = r % WIDTH (r value of range)
y = r / WIDTH
next you should add conditions to check if pixel is on borders (x == 0, y == 0, etc)
int x = i % trimap.cols;
int y = i / trimap.cols;
if (x == 0 || y == 0 || x == trimap.cols-1 || y == trimap.rows-1)
continue;
if (trimap.at<uchar>(y, x) == a)
{
if (trimap.at<uchar>(y - 1, x) == b ||
trimap.at<uchar>(y + 1, x) == b ||
trimap.at<uchar>(y, x - 1) == b ||
trimap.at<uchar>(y, x + 1) == b)
{
// ---> result.push_back(cv::Point(x, y));
}
}
and the most important thing you call push_back
on vector without synchronizing. You should use mutex
to lock access to vector. If you are using C++11 you can define mutex in your operator()
as static variable
static cv::Mutex mtx;
mtx.lock();
result.push_back(cv::Point(x,y));
mtx.unlock();
when you are using older version than C++11, you can keep reference to mutex (must be created before calling cv::parallel_for_
) in Parallel_process
and call lock/unlock on it.
In tutorial synchronization is not needed because output cv::Mat was created and for each value of range different pixel (x,y) is written.
Upvotes: 3