Reputation: 41
I'm trying to implementing dft api of INTEL MKL to test its speed on CentOS 7. The compiling is successful but it gives segmentation fault on running. However, the code has already run successfully with Visual Studio 2017 on windows. The result on windows is like this:result on windows
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <execinfo.h>
#include <cxxabi.h>
#include <omp.h>
#include <complex>
#include "mkl_dfti.h"
int main() {
MKL_LONG len[2] = { 1080, 961 }, status;
float x_in[1080][1920];
DFTI_DESCRIPTOR_HANDLE fft;
status = DftiCreateDescriptor(&fft, DFTI_SINGLE, DFTI_REAL, 2, len);
status = DftiSetValue(fft, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
status = DftiCommitDescriptor(fft);
//float x[1080* 2000];
std::complex<float> x_out[1080][961];
for (int i = 0; i < 10; i++) {
double totalcputime = (double)cv::getTickCount();
//std::cout << status << std::endl;
status = DftiComputeForward(fft, x_in, x_out);
//std::cout << status << std::endl;
totalcputime = ((double)cv::getTickCount() - totalcputime) / cv::getTickFrequency();
std::cout << "MKL-DFT Time: " << totalcputime << std::endl;
}
cv::Mat sizedimage = cv::Mat::zeros(1080, 1920, CV_32FC1);
cv::Mat opencvtransform = cv::Mat(1080, 1920 / 2 + 1, CV_32FC1);
for (int i = 0; i < 10; i++) {
double totalcputime = (double)cv::getTickCount();
cv::dft(sizedimage, opencvtransform);
totalcputime = ((double)cv::getTickCount() - totalcputime) / cv::getTickFrequency();
std::cout << "opencv-DFT Time: " << totalcputime << std::endl;
}
return 0;
}
I have used GDB to debug my code ,it gives me the following information:
Program received signal SIGSEGV, Segmentation fault.
0x00000000004012b8 in main () at comparison.cpp:25
25 status = DftiCreateDescriptor(&fft, DFTI_SINGLE, DFTI_REAL, 2, len);
the file is compiled successfully by the following paramaters:
g++ comparison.cpp `pkg-config opencv --cflags --libs` -lmkl_rt -g
Does anyone have any idea about the reason of this bug?
Upvotes: 1
Views: 672
Reputation: 581
you may also try to set MKL_VERBOSE mode to see all needed runtime details: export MKL_VERBOSE=1 and here the mkl verbose output for fft's calls:
./a.out
MKL_VERBOSE Intel(R) MKL 2019.0 Update 4 Product build 20190411 for Intel(R) 64 architecture Intel(R) Advanced Vector Extensions (Intel(R) AVX) enabled processors, Lnx 2.80GHz intel_thread
.........
MKL_VERBOSE FFT(srfo1080:961:961x961:1:1,pack:ccs,tLim:20,desc:0x1b4df40) 3.83ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:20
MKL-DFT Time: 0.0038483
Upvotes: 0
Reputation: 581
Could you check the problem with MKL 2019 u4?
I slightly redesigned your code to check if there are some problems with the latest mkl 2019 by removing opencv entries and dynamically allocate in/out arrays
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
//#include <execinfo.h>
//#include <cxxabi.h>
#include <assert.h>
#include <omp.h>
#include <complex>
#include "mkl.h"
#define N1 1080
#define N2 961
#define N3 1920
int main()
{
// MKL_LONG len[2] = { 1080, 961 }, status;
MKL_LONG status;
MKL_LONG len[2];
len[0] = N1;
len[1] = N2;
//float x_in[1080][1920];
float* x_in = (float*)mkl_malloc(N1*N3*sizeof(float), 64);
assert(NULL != x_in);
DFTI_DESCRIPTOR_HANDLE fft;
status = DftiCreateDescriptor(&fft, DFTI_SINGLE, DFTI_REAL, 2, len);
if (0 != status){
std::cout << "\t DftiCreateDescriptor Error : " << status << std::endl;
}
status = DftiSetValue(fft, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
if (0 != status){
std::cout << "\t DftiSetValue Error : " << status << std::endl;
}
status = DftiCommitDescriptor(fft);
if (0 != status){
std::cout << "\t DftiCommitDescriptor Error : " << status << std::endl;
}
double t1,texec;
// std::complex<float> x_out[1080][961];
MKL_Complex8* x_out = (MKL_Complex8*)mkl_malloc(N1*N2*sizeof(MKL_Complex8), 64);
t1 = dsecnd();
for (int i = 0; i < 10; i++) {
t1 = dsecnd();
status = DftiComputeForward(fft, x_in, x_out);
if (0 != status){std::cout << "\t DftiComputeForward Error : " << status << std::endl;}
texec = dsecnd() - t1;
std::cout << "MKL-DFT Time: " << texec << std::endl;
}
status = DftiFreeDescriptor(&fft);
if (0 != status){
std::cout << "\t DftiFreeDescriptor Error : " << status << std::endl;
}
return 0;
}
and here is the output I see on my part:
]$ ./a.out
MKL-DFT Time: 0.00725237
MKL-DFT Time: 0.00381843
MKL-DFT Time: 0.00362679
MKL-DFT Time: 0.0021284
MKL-DFT Time: 0.00221884
MKL-DFT Time: 0.00215556
MKL-DFT Time: 0.00211133
MKL-DFT Time: 0.002133
MKL-DFT Time: 0.00212184
MKL-DFT Time: 0.00215306
Upvotes: 1