Reputation: 2344
Below are CPU and GPU implementations of the object detection code from OpenCV.
1) The GPU implementation is slow as compared to the CPU version
2) Detection rate is slow as compared to the CPU version of the code for same classifier
Any idea why is it like that?
CPU Version of CODE
#include <windows.h>
#include <mmsystem.h>
#pragma comment(lib, "winmm.lib")
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <stdio.h>
using namespace std;
using namespace cv;
int main(int argc, const char** argv)
{
//create the cascade classifier object used for the face detection
CascadeClassifier face_cascade;
//use the haarcascade_frontalface_alt.xml library
face_cascade.load("C:/cascades/haarcascade_frontalface_alt_tree.xml");
//setup video capture device and link it to the first capture device
VideoCapture captureDevice;
captureDevice.open(3);
//setup image files used in the capture process
Mat captureFrame;
Mat grayscaleFrame;
//create a window to present the results
namedWindow("outputCapture", 1);
//create a loop to capture and find faces
while(true)
{
//capture a new image frame
captureDevice>>captureFrame;
//convert captured image to gray scale and equalize
cvtColor(captureFrame, grayscaleFrame, CV_BGR2GRAY);
equalizeHist(grayscaleFrame, grayscaleFrame);
//create a vector array to store the face found
std::vector<Rect> faces;
//find faces and store them in the vector array
face_cascade.detectMultiScale(grayscaleFrame, faces);
//draw a rectangle for all found faces in the vector array on the original image
for(int i = 0; i < (int)faces.size(); i++)
{
Scalar color(0, 0, 255);
Point pt1(faces[i].x + faces[i].width, faces[i].y + faces[i].height);
Point pt2(faces[i].x, faces[i].y);
rectangle(captureFrame, pt1, pt2, color, 1, 8, 0);
string text = "Adam yuzi";
int fontFace = FONT_HERSHEY_TRIPLEX;
double fontScale = 1;
int thickness = 2;
putText(captureFrame, text, pt2, fontFace, fontScale, color, thickness);
//PlaySound(TEXT("C:/cascades/adam.wav"), NULL, SND_FILENAME | SND_SYNC);
// the correct code
//Sleep(1000);
//break;
//cout<<char(7);
}
//print the output
imshow("outputCapture", captureFrame);
//pause for 33ms
waitKey(33);
}
return 0;
}
and the GPU version implementation is provided in this sample ink GPU Version of CODE
// WARNING: this sample is under construction! Use it on your own risk.
#if defined _MSC_VER && _MSC_VER >= 1400
#pragma warning(disable : 4100)
#endif
#include <iostream>
#include <iomanip>
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/cuda.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
static void help()
{
cout << "Usage: ./cascadeclassifier_gpu \n\t--cascade <cascade_file>\n\t(<image>|-- video <video>|--camera <camera_id>)\n"
"Using OpenCV version " << CV_VERSION << endl << endl;
}
static void convertAndResize(const Mat& src, Mat& gray, Mat& resized, double scale)
{
if (src.channels() == 3)
{
cv::cvtColor( src, gray, COLOR_BGR2GRAY );
}
else
{
gray = src;
}
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
if (scale != 1)
{
cv::resize(gray, resized, sz);
}
else
{
resized = gray;
}
}
static void convertAndResize(const GpuMat& src, GpuMat& gray, GpuMat& resized, double scale)
{
if (src.channels() == 3)
{
cv::cuda::cvtColor( src, gray, COLOR_BGR2GRAY );
}
else
{
gray = src;
}
Size sz(cvRound(gray.cols * scale), cvRound(gray.rows * scale));
if (scale != 1)
{
cv::cuda::resize(gray, resized, sz);
}
else
{
resized = gray;
}
}
static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
{
int fontFace = FONT_HERSHEY_DUPLEX;
double fontScale = 0.8;
int fontThickness = 2;
Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);
Point org;
org.x = 1;
org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
}
static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps)
{
Scalar fontColorRed = Scalar(255,0,0);
Scalar fontColorNV = Scalar(118,185,0);
ostringstream ss;
ss << "FPS = " << setprecision(1) << fixed << fps;
matPrint(canvas, 0, fontColorRed, ss.str());
ss.str("");
ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
(bGpu ? "GPU, " : "CPU, ") <<
(bLargestFace ? "OneFace, " : "MultiFace, ") <<
(bFilter ? "Filter:ON" : "Filter:OFF");
matPrint(canvas, 1, fontColorRed, ss.str());
// by Anatoly. MacOS fix. ostringstream(const string&) is a private
// matPrint(canvas, 2, fontColorNV, ostringstream("Space - switch GPU / CPU"));
if (bHelp)
{
matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
matPrint(canvas, 6, fontColorNV, "1/Q - increase/decrease scale");
}
else
{
matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
}
}
int main(int argc, const char *argv[])
{
if (argc == 1)
{
help();
return -1;
}
if (getCudaEnabledDeviceCount() == 0)
{
return cerr << "No GPU found or the library is compiled without CUDA support" << endl, -1;
}
cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
string cascadeName;
string inputName;
bool isInputImage = false;
bool isInputVideo = false;
bool isInputCamera = false;
for (int i = 1; i < argc; ++i)
{
if (string(argv[i]) == "--cascade")
cascadeName = argv[++i];
else if (string(argv[i]) == "--video")
{
inputName = argv[++i];
isInputVideo = true;
}
else if (string(argv[i]) == "--camera")
{
inputName = argv[++i];
isInputCamera = true;
}
else if (string(argv[i]) == "--help")
{
help();
return -1;
}
else if (!isInputImage)
{
inputName = argv[i];
isInputImage = true;
}
else
{
cout << "Unknown key: " << argv[i] << endl;
return -1;
}
}
CascadeClassifier_CUDA cascade_gpu;
if (!cascade_gpu.load(cascadeName)){
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
}
CascadeClassifier cascade_cpu;
if (!cascade_cpu.load(cascadeName)) {
return cerr << "ERROR: Could not load cascade classifier \"" << cascadeName << "\"" << endl, help(), -1;
}
VideoCapture capture;
Mat image;
if (isInputImage) {
image = imread(inputName);
CV_Assert(!image.empty());
}
else if (isInputVideo) {
capture.open(inputName);
CV_Assert(capture.isOpened());
}
else {
capture.open(atoi(inputName.c_str()));
CV_Assert(capture.isOpened());
}
namedWindow("result", 1);
Mat frame, frame_cpu, gray_cpu, resized_cpu, faces_downloaded, frameDisp;
vector<Rect> facesBuf_cpu;
GpuMat frame_gpu, gray_gpu, resized_gpu, facesBuf_gpu;
/* parameters */
bool useGPU = true;
double scaleFactor = 1.0;
bool findLargestObject = false;
bool filterRects = true;
bool helpScreen = false;
int detections_num;
for (;;) {
if (isInputCamera || isInputVideo) {
capture >> frame;
if (frame.empty()) {
break;
}
}
(image.empty() ? frame : image).copyTo(frame_cpu);
frame_gpu.upload(image.empty() ? frame : image);
convertAndResize(frame_gpu, gray_gpu, resized_gpu, scaleFactor);
convertAndResize(frame_cpu, gray_cpu, resized_cpu, scaleFactor);
TickMeter tm;
tm.start();
if (useGPU) {
//cascade_gpu.visualizeInPlace = true;
cascade_gpu.findLargestObject = findLargestObject;
detections_num = cascade_gpu.detectMultiScale(resized_gpu, facesBuf_gpu, 1.2,
(filterRects || findLargestObject) ? 4 : 0);
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
}
else {
Size minSize = cascade_gpu.getClassifierSize();
cascade_cpu.detectMultiScale(resized_cpu, facesBuf_cpu, 1.2,
(filterRects || findLargestObject) ? 4 : 0,
(findLargestObject ? CASCADE_FIND_BIGGEST_OBJECT : 0)
| CASCADE_SCALE_IMAGE,
minSize);
detections_num = (int)facesBuf_cpu.size();
}
if (!useGPU && detections_num) {
for (int i = 0; i < detections_num; ++i) {
rectangle(resized_cpu, facesBuf_cpu[i], Scalar(255));
}
}
if (useGPU) {
resized_gpu.download(resized_cpu);
for (int i = 0; i < detections_num; ++i) {
rectangle(resized_cpu, faces_downloaded.ptr<cv::Rect>()[i], Scalar(255));
}
}
tm.stop();
double detectionTime = tm.getTimeMilli();
double fps = 1000 / detectionTime;
//print detections to console
cout << setfill(' ') << setprecision(2);
cout << setw(6) << fixed << fps << " FPS, " << detections_num << " det";
if ((filterRects || findLargestObject) && detections_num > 0) {
Rect *faceRects = useGPU ? faces_downloaded.ptr<Rect>() : &facesBuf_cpu[0];
for (int i = 0; i < min(detections_num, 2); ++i) {
cout << ", [" << setw(4) << faceRects[i].x
<< ", " << setw(4) << faceRects[i].y
<< ", " << setw(4) << faceRects[i].width
<< ", " << setw(4) << faceRects[i].height << "]";
}
}
cout << endl;
cv::cvtColor(resized_cpu, frameDisp, COLOR_GRAY2BGR);
displayState(frameDisp, helpScreen, useGPU, findLargestObject, filterRects, fps);
imshow("result", frameDisp);
char key = (char)waitKey(5);
if (key == 27) {
break;
}
switch (key) {
case ' ':
useGPU = !useGPU;
break;
case 'm':
case 'M':
findLargestObject = !findLargestObject;
break;
case 'f':
case 'F':
filterRects = !filterRects;
break;
case '1':
scaleFactor *= 1.05;
break;
case 'q':
case 'Q':
scaleFactor /= 1.05;
break;
case 'h':
case 'H':
helpScreen = !helpScreen;
break;
}
}
return 0;
}
NOTE: I did not write this code, I took the CPU version from and the GPU version from here . I also posted my observatios in.
Upvotes: 0
Views: 7599
Reputation: 10850
Try this code, it works fine for me:
#define _CRT_SECURE_NO_DEPRECATE
#include <stdio.h>
#include <direct.h>
#include "fstream"
#include "iostream"
#include <vector>
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/opengl_interop.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/ml/ml.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/contrib/contrib.hpp"
#include "opencv2/video/tracking.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;
cv::gpu::CascadeClassifier_GPU cascade_gpu;
//-------------------------------------------------------------------------------------------------------------
vector<Rect> detect_faces(Mat& image)
{
vector<Rect> res;
bool findLargestObject = true;
bool filterRects = true;
int detections_num;
Mat faces_downloaded;
Mat im(image.size(),CV_8UC1);
GpuMat facesBuf_gpu;
if(image.channels()==3)
{
cvtColor(image,im,CV_BGR2GRAY);
}
else
{
image.copyTo(im);
}
GpuMat gray_gpu(im);
cascade_gpu.visualizeInPlace = false;
cascade_gpu.findLargestObject = findLargestObject;
detections_num = cascade_gpu.detectMultiScale(gray_gpu, facesBuf_gpu, 1.2,(filterRects || findLargestObject) ? 4 : 0,Size(image.cols/4,image.rows/4));
if(detections_num==0){return res;}
facesBuf_gpu.colRange(0, detections_num).download(faces_downloaded);
Rect *faceRects = faces_downloaded.ptr<Rect>();
for(int i=0;i<detections_num;i++)
{
res.push_back(faceRects[i]);
}
gray_gpu.release();
facesBuf_gpu.release();
return res;
}
//-----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------
// MAIN
//----------------------------------------------------------------------
int main(int argc, char * argv[])
{
cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
cascade_gpu.load("haarcascade_frontalface_alt2.xml");
Mat frame,img;
namedWindow("frame");
VideoCapture capture(0);
capture >> frame;
vector<Rect> rects;
if (capture.isOpened())
{
while(waitKey(20)!=27) // Exit by escape press
{
capture >> frame;
cvtColor(frame,img,CV_BGR2GRAY);
rects=detect_faces(img);
if(rects.size()>0)
{
cv::rectangle(frame,rects[0],CV_RGB(255,0,0));
}
imshow("frame",frame);
}
}
return 0;
}
Upvotes: 1