blender
blender

Reputation: 361

Android OpenCV poor performance with JNI

I have a huge problem with OpenCV 3.10 under Android. I am developing an App which does TemplateMatching of an Camera Preview. The first approach has been to use the OpenCV Java Wrapper which worked okay. One Processing cycle took about 3.6s. To speed this up i redeveloped the code in C++. For some reason since of that the execution of one cycle started to take up to 35s. Trying to speed this up and leverage the multithreading abilities i move the JNI execution to an AsyncTask. Since that, a single execution takes up to 65s.

I am using the gradle experimental plugin 0.7.0 which is considered stable and the most recent NDK (12.1 as of now).

here's my module build.gradle

    ndk {
        moduleName "OpenCVWrapper"
        ldLibs.addAll(["android", "log", "z"])
        cppFlags.add("-std=c++11")
        cppFlags.add("-fexceptions")
        cppFlags.add("-I"+file("src/main/jni").absolutePath)
        cppFlags.add("-I"+file("src/main/jni/opencv2").absolutePath)
        cppFlags.add("-I"+file("src/main/jni/opencv").absolutePath)
        stl = "gnustl_shared"
        debuggable = "true"
    }
    productFlavors {
        create("arm") {
            ndk.with {
                abiFilters.add("armeabi")
                String libsDir = file('../openCVLibrary310/src/main/jniLibs/armeabi/').absolutePath+'/'
                ldLibs.add(libsDir + "libopencv_core.a")
                ldLibs.add(libsDir + "libopencv_highgui.a")
                ldLibs.add(libsDir + "libopencv_imgproc.a")
                ldLibs.add(libsDir + "libopencv_java3.so")
                ldLibs.add(libsDir + "libopencv_ml.a")

            }
        }
        create("armv7") {
            ndk.with {
                abiFilters.add("armeabi-v7a")
                String libsDir = file('../openCVLibrary310/src/main/jniLibs/armeabi-v7a/').absolutePath+'/'
                ldLibs.add(libsDir + "libopencv_core.a")
                [... and so on ...]

So heres the Android-Java code which executed in about 3-4 seconds:

    // data is byte[] from camera
    Mat yuv = new Mat(height+height/2, width, CvType.CV_8UC1);
    yuv.put(0,0,data);
    Mat input = new Mat(height, width, CvType.CV_8UC3);

    Imgproc.cvtColor(yuv, input, Imgproc.COLOR_YUV2RGB_NV12, 3);
    yuv.release();

    int midPoint = Math.min(input.cols(), input.rows())/2;
    Mat rotated = new Mat();
    Imgproc.warpAffine(input, rotated,
            Imgproc.getRotationMatrix2D(new Point(midPoint, midPoint), 270, 1.0),
            new Size(input.rows(), input.cols()));
    input.release();

    android.util.Size packageRect = midRect.getSize();
    input.release();

    Rect r = new Rect(((rotated.cols()/2)-(packageRect.getWidth()/2)),
            ((rotated.rows()/2)-(packageRect.getHeight()/2)),
            packageRect.getWidth(), packageRect.getHeight());
    Mat cut = new Mat(rotated, r);
    Mat scaled = new Mat();
    Imgproc.resize(cut,scaled, new Size(323, 339), 0, 0, Imgproc.INTER_AREA);
    Imgcodecs.imwrite(getExternalFileName("cutout").getAbsolutePath(), cut);
    cut.release();

    Mat output = new Mat();
    Imgproc.matchTemplate(pattern, scaled, output, Imgproc.TM_CCOEFF_NORMED);
    Core.MinMaxLocResult tmplResult = Core.minMaxLoc(output);

    findPackage(tmplResult.maxLoc.x+150);
    scaled.release();
    input.release();
    output.release();
    cut.release();

In turn thats the C++ code to do exactly the same:

JNIEXPORT void JNICALL Java_at_identum_planogramscanner_ScanActivity_scanPackage(JNIEnv *env, jobject instance, jbyteArray input_, jobject data, jlong output, jint width, jint height, jint rectWidth, jint rectHeight) {
jbyte *input = env->GetByteArrayElements(input_, NULL);

jclass resultDataClass = env->GetObjectClass(data);
jmethodID setResultMaxXPos = env->GetMethodID(resultDataClass, "setMaxXPos", "(I)V");
jmethodID setResultMinXPos = env->GetMethodID(resultDataClass, "setMinXPos", "(I)V");
jmethodID setResultMinVal = env->GetMethodID(resultDataClass, "setMinVal", "(F)V");
jmethodID setResultMaxVal = env->GetMethodID(resultDataClass, "setMaxVal", "(F)V");

LOGE("Before work");

Mat convert(height+height/2, width, CV_8UC1, (unsigned char*)input);
Mat img(height, width, CV_8UC3);
cvtColor(convert, img, CV_YUV2RGB_NV12, 3);
convert.release();

LOGE("After Colorconvert");

int midCoord = min(img.cols, img.rows)/2;
Mat rot;
Mat rotMat = getRotationMatrix2D(Point2f(midCoord,midCoord), 270, 1.0);
warpAffine(img, rot, rotMat, Size(img.rows, img.cols));
rotMat.release();

LOGE("After Rotation");

Rect r(
        (rot.cols/2-rectWidth/2),
        (rot.rows/2-rectHeight/2),
        rectWidth, rectHeight );
Mat cut(rot,r);
rot.release();

LOGE("After Cutting");

Mat scaled(Size(323, 339), CV_8UC3);
resize(cut, scaled, Size(323,339),0,0,INTER_AREA);
cut.release();

LOGE("After Scaling");

Mat match(pattern.cols, 1, CV_8UC1);
matchTemplate(pattern, scaled, match, TM_SQDIFF_NORMED);
scaled.release();

LOGE("After Templatematching and normalize");

double minVal; double maxVal; Point minLoc; Point maxLoc;
minMaxLoc(match, &minVal, &maxVal, &minLoc, &maxLoc, Mat());

img.release();
env->CallVoidMethod(data, setResultMinXPos, minLoc.x);
env->CallVoidMethod(data, setResultMaxXPos, maxLoc.x);
env->CallVoidMethod(data, setResultMinVal, minVal);
env->CallVoidMethod(data, setResultMaxVal, maxVal);

LOGE("After Calling JNI funcs");

env->ReleaseByteArrayElements(input_, input, 0);

as you can see it is practically exactly the same work and i expected it to run a little faster than written in Android-Java but for sure not 10 times slower and definetely not 20 times slower when ran from AsyncTask.

My best conclusion is that the .a archives of OpenCV need some kind of Compiler settings to speed up as much as possible. I hope anyone can point me into the right direction!

Thanks in advance!

Upvotes: 1

Views: 1435

Answers (1)

Red Dango
Red Dango

Reputation: 372

I recently did a real-time face recognition application using the OpenCV's JAVA wrapper, and like you I wanted to squeeze more performance out of it so I implemented a JNI version. Again like your case, JNI version turns out to be slower than JAVA wrapper version albeit just a little.

For your case I can see why the performance suddenly suffered, which occurs here

jbyte *input = env->GetByteArrayElements(input_, NULL);

You can read more online that this is slow because JNI always copy (using GetByteArrayElements) from JAVA to C++. Depends on the camera preview size, the copy can be very significant especially for real-time process .

Here's a way to quicken up your code, instead of sending the Mat bytes to JNI, you can send the Mat pointer address directly,

In JAVA

public void processFrame(byte[] data) {
    Mat raw = new Mat();
    raw.put(0, 0, data); //place the bytes into a Mat
    scanPackage(...,raw.native_obj, ...);
}

where native_obj is the address of the Mat object, which is type long

To convert jlong back to Mat in C++, change your jbyteArray input_ to jlong input_

JNIEXPORT void JNICALL Java_at_identum_planogramscanner_ScanActivity_scanPackage(..., jlong input_, ...) {

cv::Mat* pframe_addr = (cv::Mat*)input_;
Mat img(height, width, CV_8UC3);
cv::cvtColor(*pframe_addr,img,CV_YUV2RGB_NV12, 3);
/** The rest of your code */

Upvotes: 1

Related Questions