Tariq
Tariq

Reputation: 2344

OpenCV GPU object detection is slow and gives less detections as compared to CPU version

Below are CPU and GPU implementations of the object detection code from OpenCV.

1) The GPU implementation is slow as compared to the CPU version

2) Detection rate is slow as compared to the CPU version of the code for same classifier

Any idea why is it like that?

CPU Version of CODE

#include <windows.h>
#include <mmsystem.h>
#pragma comment(lib, "winmm.lib")

#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

#include <iostream>
#include <stdio.h>

using namespace std;
using namespace cv;

int main(int argc, const char** argv)
{
    //create the cascade classifier object used for the face detection
    CascadeClassifier face_cascade;
    //use the haarcascade_frontalface_alt.xml library
    face_cascade.load("C:/cascades/haarcascade_frontalface_alt_tree.xml");

    //setup video capture device and link it to the first capture device
    VideoCapture captureDevice;
    captureDevice.open(3);

    //setup image files used in the capture process
    Mat captureFrame;
    Mat grayscaleFrame;

    //create a window to present the results
    namedWindow("outputCapture", 1);

    //create a loop to capture and find faces
    while(true)
    {
        //capture a new image frame
        captureDevice>>captureFrame;

        //convert captured image to gray scale and equalize
        cvtColor(captureFrame, grayscaleFrame, CV_BGR2GRAY);
        equalizeHist(grayscaleFrame, grayscaleFrame);

    //create a vector array to store the face found
    std::vector<Rect> faces;

    //find faces and store them in the vector array
    face_cascade.detectMultiScale(grayscaleFrame, faces);

    //draw a rectangle for all found faces in the vector array on the original image
    for(int i = 0; i < (int)faces.size(); i++)
    {
        Scalar color(0, 0, 255);

        Point pt1(faces[i].x + faces[i].width, faces[i].y + faces[i].height);
        Point pt2(faces[i].x, faces[i].y);

        rectangle(captureFrame, pt1, pt2, color, 1, 8, 0);

        string text = "Adam yuzi";
        int fontFace = FONT_HERSHEY_TRIPLEX;
        double fontScale = 1;
        int thickness = 2;  

        putText(captureFrame, text, pt2, fontFace, fontScale, color, thickness);
        //PlaySound(TEXT("C:/cascades/adam.wav"), NULL, SND_FILENAME | SND_SYNC);
        // the correct code
        //Sleep(1000);
        //break;
        //cout<<char(7);
        }
       //print the output
        imshow("outputCapture", captureFrame);

       //pause for 33ms
        waitKey(33);
    }
    return 0;
}

and the GPU version implementation is provided in this sample ink GPU Version of CODE

// WARNING: this sample is under construction! Use it on your own risk.
#if defined _MSC_VER && _MSC_VER >= 1400
#pragma warning(disable : 4100)
#endif


#include <iostream>
#include <iomanip>
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/cuda.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"

using namespace std;
using namespace cv;
using namespace cv::cuda;

static void help()
{
    cout << "Usage: ./cascadeclassifier_gpu \n\t--cascade <cascade_file>\n\t(<image>|--    video <video>|--camera <camera_id>)\n"
            "Using OpenCV version " << CV_VERSION << endl << endl;
}


static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale)
{
    if (src.channels() == 3)
    {
        cv::cvtColor( src, gray, COLOR_BGR2GRAY );
    }
    else
    {
        gray = src;
    }

    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));

    if (scale != 1)
    {
        cv::resize(gray, resized, sz);
    }
    else
    {
        resized = gray;
    }
}

static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double     scale)
{
    if (src.channels() == 3)
    {
        cv::cuda::cvtColor( src, gray, COLOR_BGR2GRAY );
    }
    else
    {
        gray = src;
    }

    Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));

    if (scale != 1)
    {
        cv::cuda::resize(gray, resized, sz);
    }
    else
    {
        resized = gray;
    }
}
static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
{
    int fontFace = FONT_HERSHEY_DUPLEX;
    double fontScale = 0.8;
    int fontThickness = 2;
    Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);

    Point org;
    org.x = 1;
    org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
    putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
    putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
}


static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool     bFilter, double fps)
{
    Scalar fontColorRed = Scalar(255,0,0);
    Scalar fontColorNV  = Scalar(118,185,0);

    ostringstream ss;
    ss << "FPS = " << setprecision(1) << fixed << fps;
    matPrint(canvas, 0, fontColorRed, ss.str());
    ss.str("");
    ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
        (bGpu ? "GPU, " : "CPU, ") <<
        (bLargestFace ? "OneFace, " : "MultiFace, ") <<
        (bFilter ? "Filter:ON" : "Filter:OFF");
    matPrint(canvas, 1, fontColorRed, ss.str());

    // by Anatoly. MacOS fix. ostringstream(const string&) is a private
    // matPrint(canvas, 2, fontColorNV, ostringstream("Space - switch GPU / CPU"));
   if (bHelp)
    {
        matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
        matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
        matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
        matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
        matPrint(canvas, 6, fontColorNV, "1/Q - increase/decrease scale");
    }
    else
    {
        matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
    }
}


int main(int argc, const char *argv[])
{
    if (argc == 1)
    {
        help();
        return -1;
    }

    if (getCudaEnabledDeviceCount() == 0)
    {
        return cerr << "No GPU found or the library is compiled without CUDA support"     << endl, -1;
    }

    cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());

    string cascadeName;
    string inputName;
    bool isInputImage = false;
    bool isInputVideo = false;
    bool isInputCamera = false;

    for (int i = 1; i < argc; ++i)
    {
        if (string(argv[i]) == "--cascade")
            cascadeName = argv[++i];
        else if (string(argv[i]) == "--video")
        {
            inputName = argv[++i];
            isInputVideo = true;
        }
        else if (string(argv[i]) == "--camera")
        {
            inputName = argv[++i];
            isInputCamera = true;
        }
        else if (string(argv[i]) == "--help")
        {
            help();
            return -1;
        }    
        else if (!isInputImage)
        {
            inputName = argv[i];
            isInputImage = true;
        }
        else
        {
            cout << "Unknown key: " << argv[i] << endl;
            return -1;
        }
    }

    CascadeClassifier_CUDA cascade_gpu;
    if (!cascade_gpu.load(cascadeName)){
        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName <<     "\"" << endl, help(), -1;
    }

    CascadeClassifier cascade_cpu;
    if (!cascade_cpu.load(cascadeName)) {
        return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName <<     "\"" << endl, help(), -1;
    }

    VideoCapture capture;
    Mat image;

    if (isInputImage) {
        image = imread(inputName);
        CV_Assert(!image.empty());
        }
    else if (isInputVideo) {
        capture.open(inputName);
        CV_Assert(capture.isOpened());
    }
else   {
        capture.open(atoi(inputName.c_str()));
        CV_Assert(capture.isOpened());
    }

    namedWindow("result", 1);

    Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
    vector<Rect> facesBuf_cpu;

    GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;

/* parameters */
    bool useGPU = true;
    double scaleFactor = 1.0;
    bool findLargestObject = false;
    bool filterRects = true;
    bool helpScreen = false;

    int detections_num;
    for (;;)    {
        if (isInputCamera || isInputVideo)        {
            capture >> frame;
            if (frame.empty())            {
                break;
            }
        }

        (image.empty() ? frame : image).copyTo(frame_cpu);
        frame_gpu.upload(image.empty() ? frame : image);

        convertAndResize(frame_gpu, gray_gpu, resized_gpu, scaleFactor);
        convertAndResize(frame_cpu, gray_cpu, resized_cpu, scaleFactor);

        TickMeter tm;
        tm.start();

    if (useGPU)        {
            //cascade_gpu.visualizeInPlace = true;
            cascade_gpu.findLargestObject = findLargestObject;

            detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu,     1.2,
                                                          (filterRects ||     findLargestObject) ? 4 : 0);
            facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
        }
        else        {
            Size minSize = cascade_gpu.getClassifierSize();
            cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2,
                                         (filterRects || findLargestObject) ? 4 : 0,
                                         (findLargestObject ?     CASCADE_FIND_BIGGEST_OBJECT : 0)
                                            | CASCADE_SCALE_IMAGE,
                                         minSize);
            detections_num = (int)facesBuf_cpu.size();
        }

        if (!useGPU && detections_num)      {
            for (int i = 0; i < detections_num; ++i)            {
                rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
            }
        }

        if (useGPU)        {
            resized_gpu.download(resized_cpu);
             for (int i = 0; i < detections_num; ++i)     {
                rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i],     Scalar(255));
             }
        }

           tm.stop();
        double detectionTime = tm.getTimeMilli();
        double fps = 1000 / detectionTime;
        //print detections to console
        cout << setfill(' ') << setprecision(2);
        cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det";
    if ((filterRects || findLargestObject) && detections_num > 0)        {
            Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
            for (int i = 0; i < min(detections_num, 2); ++i)            {
                cout << ", [" << setw(4) << faceRects[i].x
                     << ", " << setw(4) << faceRects[i].y
                         << ", " << setw(4) << faceRects[i].width
                         << ", " << setw(4) << faceRects[i].height << "]";
                    }
            }
            cout << endl;

            cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
            displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects,     fps);
            imshow("result", frameDisp);

            char key = (char)waitKey(5);
            if (key == 27)        {
                break;
            }    
            switch (key)            {
            case ' ':
                useGPU = !useGPU;
                break;
            case 'm':
            case 'M':
                findLargestObject = !findLargestObject;
                break;
            case 'f':
                case 'F':
                filterRects = !filterRects;
                break;
            case '1':
                scaleFactor *= 1.05;
                break;
                case 'q':
            case 'Q':
                scaleFactor /= 1.05;
                break;
            case 'h':
            case 'H':
                helpScreen = !helpScreen;
                break;
            }
        }
        return 0;
    }

NOTE: I did not write this code, I took the CPU version from and the GPU version from here . I also posted my observatios in.

Upvotes: 0

Views: 7599

Answers (1)

Andrey  Smorodov
Andrey Smorodov

Reputation: 10850

Try this code, it works fine for me:

#define  _CRT_SECURE_NO_DEPRECATE
#include <stdio.h>
#include <direct.h>
#include "fstream"
#include "iostream"
#include <vector>
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/opengl_interop.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/ml/ml.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/video/tracking.hpp"
#include "opencv2/imgproc/imgproc.hpp"

using namespace std;
using namespace cv;
using namespace cv::gpu;

cv::gpu::CascadeClassifier_GPU cascade_gpu;

//-------------------------------------------------------------------------------------------------------------
vector<Rect> detect_faces(Mat& image)
{
        vector<Rect> res;
        bool findLargestObject = true;
        bool filterRects = true;
        int detections_num;
        Mat faces_downloaded;
        Mat im(image.size(),CV_8UC1);
        GpuMat facesBuf_gpu;
        if(image.channels()==3)
        {
                cvtColor(image,im,CV_BGR2GRAY);
        }
        else
        {
                image.copyTo(im);
        }
        GpuMat gray_gpu(im);

        cascade_gpu.visualizeInPlace = false;
        cascade_gpu.findLargestObject = findLargestObject;
        detections_num = cascade_gpu.detectMultiScale(gray_gpu, facesBuf_gpu, 1.2,(filterRects || findLargestObject) ? 4 : 0,Size(image.cols/4,image.rows/4));


        if(detections_num==0){return res;}

        facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
        Rect *faceRects = faces_downloaded.ptr<Rect>();

        for(int i=0;i<detections_num;i++)
        {
                res.push_back(faceRects[i]);
        }
        gray_gpu.release();
        facesBuf_gpu.release();
        return res;
}
//-----------------------------------------------------------------------------------------------------------------

//----------------------------------------------------------------------
// MAIN
//----------------------------------------------------------------------
int main(int argc, char * argv[])
{
        cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
        cascade_gpu.load("haarcascade_frontalface_alt2.xml");
        Mat frame,img;
        namedWindow("frame");
        VideoCapture capture(0);
        capture >> frame;
        vector<Rect> rects;
        if (capture.isOpened())
        {
                while(waitKey(20)!=27) // Exit by escape press
                {
                        capture >> frame;
                        cvtColor(frame,img,CV_BGR2GRAY);
                        rects=detect_faces(img);
                        if(rects.size()>0)
                        {
                                cv::rectangle(frame,rects[0],CV_RGB(255,0,0));
                        }
                        imshow("frame",frame);
                }
        }

        return 0;
}

Upvotes: 1

Related Questions