Reputation: 3582
I am using cv::EM
algorithm to do gaussian mixture model classification for image streams. However, while classifying pixels into different models using EM::prediction
method, I found it is too much slow, uses about 3 seconds for one 600x800 image. On the other hand, the MOG background subtractor
that is provided by OpenCV is performing this part very quickly, uses only about 30ms. So I decided to use its perform method to replace EM::prediction
part. However, I don't know how to change it.
The code that I am using up to the prediction
part is as follows:
cv::Mat floatSource;
source.convertTo ( floatSource, CV_32F );
cv::Mat samples ( source.rows * source.cols, 3, CV_32FC1 );
int idx = 0;
for ( int y = 0; y < source.rows; y ++ )
{
cv::Vec3f* row = floatSource.ptr <cv::Vec3f> (y);
for ( int x = 0; x < source.cols; x ++ )
{
samples.at<cv::Vec3f> ( idx++, 0 ) = row[x];
}
}
cv::EMParams params(2); // num of mixture we use is 2 here
cv::ExpectationMaximization em ( samples, cv::Mat(), params );
cv::Mat means = em.getMeans();
cv::Mat weight = em.getWeights();
const int fgId = weights.at<float>(0) > weights.at<flaot>(1) ? 0:1;
idx = 0;
for ( int y = 0; y < source.rows; y ++ )
{
for ( int x = 0; x < source.cols; x ++ )
{
const int result = cvRound ( em.predict ( samples.row ( idx++ ), NULL );
}
}
The partial code I found from "cvbgfg_gaussmix.cpp" for EM prediction
is like this:
static void process8uC3 ( BackgroundSubtractorMOG& obj, const Mat& image, Mat& fgmask, double learningRate )
{
int x, y, k, k1, rows = image.rows, cols = image.cols;
float alpha = (float)learningRate, T = (float)obj.backgroundRatio, vT = (float)obj.varThreshold;
int K = obj.nmixtures;
const float w0 = (float)CV_BGFG_MOG_WEIGHT_INIT;
const float sk0 = (float)(CV_BGFG_MOG_WEIGHT_INIT/CV_BGFG_MOG_SIGMA_INIT);
const float var0 = (float) (CV_BGFG_MOG_SIGMA_INIT*CV_BGFG_MOG_SIGMA_INIT);
for ( y = 0; y < rows; y ++ )
{
const uchar* src = image.ptr<uchar>(y);
uchar* dst = fgmask.ptr<uchar>(y);
MixData<Vec3f>* mptr = (MixData<Vec3f>*)obj.bgmodel.ptr(y);
for ( x = 0; x < cols; x++, mptr += K )
{
float wsum = 0, dw = 0;
Vec3f pix ( src [x*3], src[x*3+1], src[x*3+2]);
for ( k = 0; k < K; k ++ )
{
float w = mptr[k].weight;
Vec3f mu = mptr[k].mean[0];
Vec3f var = mptr[k].var[0];
Vec3f diff = pix - mu;
float d2 = diff.dot(diff);
if ( d2 < vT * (var[0] +var[1] + var[2] )
{
dw = alpha * ( 1.f - w );
mptr[k].weight = w + dw;
mptr[k].mean = mu + alpha * diff;
var = Vec3f ( max ( var[0] + alpha * ( diff[0] * diff[1] - var[0] ), FLT_EPSILON),
max ( var[1] + alpha * ( diff[1]*diff[1] - var[1] ), FLT_EPSILON,
max ( var[2] + alpha * ( diff[2]*diff[2] - var[2] ), FLT_EPSILON ));
mptr[k].var = var;
mptr[k].sortKey = w/sqrt ( var[0] + var[1] + var[2] );
for ( k1 = k-1; k1 >= 0; k1-- )
{
if ( mptr[k1].sortKey > mptr[k1+1].sortKey)
break;
std::swap ( mptr[k1],mptr[k1+1]);
}
break;
}
wsum += w;
}
dst[x] = (uchar) (-(wsum >= T ));
wsum += dw;
if ( k == K )
{
wsum += w0 - mptr[K-1].weight;
mptr[k-1].weight = w0;
mptr[K-1].mean = pix;
mptr[K-1].var = Vec3f ( var0, var0, var0 );
mptr[K-1].sortKey = sk0;
}
else
for ( ; k < K; k ++ )
wsum += mptr[k].weight;
dw = 1.f/wsum;
for ( k = 0; k < K; k ++ )
{
mptr[k].weight *= dw;
mptr[k].sortKey *= dw;
}
}
}
}
How can I change this partial code so that it can be used in my first code to em.predict
part? Thank you in advance.
Update
I did it by myself like this for using the process8uC3
function in my code:
cv::Mat fgImg ( 600, 800, CV_8UC3 );
cv::Mat bgImg ( 600, 800, CV_8UC3 );
double learningRate = 0.001;
int x, y, k, k1;
int rows = sourceMat.rows; //source opencv matrix
int cols = sourceMat.cols; //source opencv matrix
float alpha = (float) learningRate;
float T = 2.0;
float vT = 0.30;
int K = 3;
const float w0 = (float) CV_BGFG_MOG_WEIGTH_INIT;
const float sk0 = (float) (CV_BGFG_MOG_WEIGHT_INIT/CV_BGFG_MOG_SIGMA_INIT);
const float var0 = (float) (CV_BGFG_MOG_SIGMA_INIT*CV_BGFG_MOG_SIGMA_INIT);
const float minVar = FLT_EPSILON;
for ( y = 0; y < rows; y ++ )
{
const char* src = source.ptr < uchar > ( y );
uchar* dst = fgImg.ptr < uchar > ( y );
uchar* tmp = bgImg.ptr ( y );
MixData<cv::Vec3f>* mptr = (MixData<cv::Vec3f>*)tmp;
for ( x = 0; x < cols; x ++, mptr += K )
{
float w = mptr[k].weight;
cv::Vec3f mu = mpptr[k].mean[0];
cv::Vec3f var = mptr[k].var[0];
cv::Vec3f diff = pix - mu;
float d2 = diff.dot ( diff );
if ( d2 < vT * ( var[0] + var[1] + var[2] ) )
{
dw = alpha * ( 1.f - w );
mptr[k].weight = w + dw;
mptr[k].mean = mu + alpha * diff;
var = cv::Vec3f ( max ( var[0] + alpha*(diff[0]*diff[0]-var[0]),minVar),
max ( var[1]+ alpha*(diff[1]*diff[1]-var[1]),minVar),
max ( var[2] + alpha*(diff[2]*diff[2]-var[2]),minVar) );
mptr[k].var = var;
mptr[k].sortKey = w/sqrt ( var[0] + var[1] + var[2] );
for ( k1 = k-1; k1 >= 0; k1 -- )
{
if ( mptr[k1].sortKey > mptr[k1+1].sortKey )
break;
std::swap ( mptr[k1], mptr[k1+1] );
}
break;
}
wsum += w;
}
dst[x] = (uchar) (-(wsum >= T ));
wsum += dw;
if ( k == K )
{
wsum += w0 - mptr[k-1].weight;
mptr[k-1].weight = w0;
mptr[k-1].mean = pix;
mptr[k-1].var = cv::Vec3f ( var0, var0, var0 );
mptr[k-1].sortKey = sk0;
}
else
for ( ; k < K; k ++ )
{
mptr[k].weight *= dw;
mptr[k].sortKey *= dw;
}
}
}
}
It compiled without error, but the result is totally a mass. I doubt maybe it is something related to the values T
and vT
, and changed them with several other values, but it didn't make any difference. So I believe even it compiled without error, I used it in a wrong way.
Upvotes: 3
Views: 3749
Reputation: 1680
Take a look into source code of GrabCut in OpenCV: modules/imgproc/src/grabcut.cpp. There are private class GMM (implements training Gaussian Mixture Model and sample classification) in this module. To initialize GMM the k-means is used. If you need even more faster initialization you could try k-means++ algorithm (refer to generateCentersPP function in modules/core/src/matrix.cpp module).
Upvotes: 1
Reputation: 3988
Not direct answer to your questions but a few comments on your code:
int idx = 0;
for ( int y = 0; y < source.rows; y ++ )
{
cv::Vec3f* row = floatSource.ptr <cv::Vec3f> (y);
for ( int x = 0; x < source.cols; x ++ )
{
samples.at<cv::Vec3f> ( idx++, 0 ) = row[x];
}
}
My guess is that here you want to create a matrix with rows-by-cols rows and 3 columns, storing the pixels RGB (or any other color space you might be using) values. First, your samples matrix is wrongly initialised, since you forget a loop on the image channels. Only the first channel is filled in your code. But anyway, you can do just the same by calling reshape
:
cv::Mat samples = floatSource.reshape(1, source.rows*source.cols)
This will not only fix your bug, but also speed up your process as accessing to pixels using Mat.at<> is really not that fast, and reshape is O(1) operation, as the underlying matrix data is not changed, just the number of rows/cols/channels.
Second, you can save some time by passing the full sample matrix to em::predict instead of each sample. At the moment, you make rows-by-col calls to em::predict while you can do just one, plus rows-by-cols call to mat.row() which creates a temporary matrix(header).
One way to speed this further would be to parallelize the calls to predict, e.g. using TBB which is used by OpenCV (have you turned TBB ON when compiling OpenCV? Maybe predict is already multithreaded, not checked that).
Upvotes: 1