Lance
Lance

Reputation: 39

With OpenACC OpenCV ,pgc++ compiling error duplicate lives at 0x7ffff78db5c0 size 96 partially present

I used opencv to implement data transfer with openacc,but I got error when I compiled the code file.Please take a look at the information below:

#include<queue>
#include <vector>
#include<random>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include<openacc.h>

using namespace std;
using namespace cv;


int main(){

    cv::Mat srcImg=cv::imread("/home/usera/images/blue-mountains.jpg");
    Mat grayScale(srcImg.rows, srcImg.cols, CV_8UC1, Scalar::all(0));
    Mat duplicate(srcImg.rows,srcImg.cols, CV_8UC1,Scalar::all(255) );

    if(!srcImg.data){

     cout<<"The file is not loaded or does not exist"<<endl;
     return -1;

     }

             
       for(int i = 0; i < srcImg.rows; i++) {
          for(int j = 0; j < srcImg.cols; j++) {
             double gray = 0.21 * srcImg.at<cv::Vec3b>(i,j)[0] +
                           0.72 * srcImg.at<cv::Vec3b>(i,j)[1] +
                           0.07 * srcImg.at<cv::Vec3b>(i,j)[2];
             grayScale.at<uchar>(i,j) = (uchar) gray;
            }
       
        }

     cout<<"Matrix grayScale :"<<grayScale.rows<<" "<<grayScale.cols<<endl;

int vrows=srcImg.rows;
int vcols=srcImg.cols;

cout<<"Step"<<grayScale.step<<endl;


int b[3][5];

#pragma acc parallel loop copy(b[:3][:5])
for(int i=0;i<3;i++){
  #pragma acc loop  
  for(int j=0;j<5;j++){
    b[i][j]=i+j;


   }
}

cout<<"b[N-1][M-2] :"<<b[1][1]<<endl;
cout<<"b[N][M] :"<<b[2][4]<<endl;
 
auto *startaddress=grayScale.data;

cout<<(int)*(startaddress+1)<<endl;
cout<<(int)*(startaddress+2)<<endl;



        #pragma acc parallel loop collapse(2)
        for(int i=0;i<vrows;i++){
        //#pragma acc loop
        for(int j=0;j<vcols;j++){

          duplicate.at<uchar>(i,j)=10;//grayScale.at<uchar>(i,j);

       }

      }

    cout<<"duplicate"<<": "<<(int)grayScale.at<uchar>(23,45)<<endl;
    cout<<"duplicate"<<": "<<(int)duplicate.at<uchar>(23,45)<<endl;
}

The result shows:

Matrix grayScale :810 1440
Step1440
b[N-1][M-2] :2
b[N][M] :6
194
195
duplicate lives at 0x7ffff78db5c0 size 96 partially present

Present table dump for device[1]: NVIDIA Tesla GPU 0, compute capability 8.6, threadid=1
host:0x7ffff78db610 device:0x7f1ba56fa000 size:8 presentcount:1+0 line:129 name:(null)
allocated block device:0x7f1ba56fa000 size:512 thread:1
FATAL ERROR: variable in data clause is partially present on the device: name=duplicate

I have one doubt if there is not enough space for the gang or vectors requested by the object duplicate. Or should the duplicate object be copied first? (#pragma ACC parallel loop copy(duplicate.data[:rows*cols]), but I am not sure which kind of array I should use in order to copy duplicate to a GPU device.

Could anyone provide any hints or suggestions?

Thanks in advance.

Upvotes: 0

Views: 55

Answers (1)

Lance
Lance

Reputation: 39

I used these methods to implement parallelization.

Thanks to Mat for the helpful suggestion.

auto *startaddress=grayScale.data;
 #pragma acc enter data copyin(startaddress[0:vrows*vcols])
//   #pragma acc enter data copyin(grayScale) attach(grayScale.data)
 #pragma acc parallel loop default(present)
   for(int i=0;i<vrows*vcols;i++){
       startaddress[i]=20;
   }
#pragma acc exit data copyout(startaddress[0:vrows*vcols])

auto *srcimage=srcImg.data;

   #pragma acc enter data copyin(srcimage[:h*w*3],startaddress[0:vrows*vcols]) 
   #pragma acc parallel loop collapse(2) default(present)
   for(int i=0;i<h;i++){
       for(int j=0;j<w;j++){
          {
           srcimage[i*w*3+j*3+0]=20;
           srcimage[i*w*3+j*3+1]=69;
           srcimage[i*w*3+j*3+2]=120;
           double gray=0.21*srcimage[i*w*3+j*3+0] + 0.72*srcimage[i*w*3+j*3+1]+0.07*srcimage[i*w*3+j*3+2];
           startaddress[i*w+j]=(uchar)gray;
         }
       }

   #pragma acc exit data 
   copyout(srcimage[:h*w*3],startaddress[0:vrows*vcols])

Please check the deepcopy topic for your reference. Reference link

code reference

#include <opencv2/opencv.hpp>
#include<queue>
#include <vector>
#include<random>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include<openacc.h>

using namespace std;
using namespace cv;


int main(){

     cv::Mat srcImg=cv::imread("/home/usera/images/blue-mountains.jpg");



      Mat grayScale(srcImg.rows, srcImg.cols, CV_8UC1, Scalar::all(0));
      Mat duplicate(srcImg.rows,srcImg.cols, CV_8UC1,Scalar::all(255) );

    if(!srcImg.data){

     cout<<"The file is not loaded or does not exist"<<endl;
     return -1;

     }

         
     
       for(int i = 0; i < srcImg.rows; i++) {
          for(int j = 0; j < srcImg.cols; j++) {
             double gray = 0.21 * srcImg.at<cv::Vec3b>(i,j)[0] +
                           0.72 * srcImg.at<cv::Vec3b>(i,j)[1] +
                           0.07 * srcImg.at<cv::Vec3b>(i,j)[2];
             grayScale.at<uchar>(i,j) = (uchar) gray;
            }
       
        }

int h=srcImg.rows;
int w=srcImg.cols;
int ch=srcImg.channels();
int stepsize=srcImg.step;
cout<<"Matrix grayScale :"<<grayScale.rows<<" "<<grayScale.cols<<endl;
int vrows=srcImg.rows;
int vcols=srcImg.cols;
cout<<"Step"<<grayScale.step<<endl;
int b[3][5];
#pragma acc parallel loop copy(b[:3][:5])
for(int i=0;i<3;i++){
  #pragma acc loop  
  for(int j=0;j<5;j++){
    b[i][j]=i+j;


   }
}
cout<<"b[N-1][M-2] :"<<b[1][1]<<endl;
cout<<"b[N][M] :"<<b[2][4]<<endl;

 
   auto *startaddress=grayScale.data;
   cout<<(int)*(startaddress+1)<<endl;
   cout<<(int)*(startaddress+2)<<endl;
   #pragma acc enter data copyin(startaddress[0:vrows*vcols])
   #pragma acc parallel loop default(present)
   for(int i=0;i<vrows*vcols;i++){
       startaddress[i]=20;
   }
   
 #pragma acc exit data copyout(startaddress[0:vrows*vcols])




auto *srcimage=srcImg.data;

   #pragma acc enter data copyin(srcimage[:h*w*3],startaddress[0:vrows*vcols]) 
   #pragma acc parallel loop collapse(2) default(present)
   for(int i=0;i<h;i++){
       for(int j=0;j<w;j++){
    {
       srcimage[i*w*3+j*3+0]=20;
       srcimage[i*w*3+j*3+1]=69;
       srcimage[i*w*3+j*3+2]=120;
       double gray=0.21*srcimage[i*w*3+j*3+0] + 0.72*srcimage[i*w*3+j*3+1]+0.07*srcimage[i*w*3+j*3+2];
       startaddress[i*w+j]=(uchar)gray;

      }
   
   }

      #pragma acc exit data copyout(srcimage[:h*w*3],startaddress[0:vrows*vcols])
      
       cout<<"srcImge"<<(int)srcimage[3*w+3*26+0]<<endl;
       cout<<"srcImge"<<(int)srcimage[2*w+3*12+0]<<endl;
       cout<<"srcImge"<<(int)srcimage[12*w+3*88+0]<<endl;
       cout<<"srcImge"<<(int)srcimage[2*w+3*12+1]<<endl;
       cout<<"startaddress"<<(int)startaddress[12]<<endl;
       cout<<"startaddress"<<(int)startaddress[23]<<endl;
       cout<<"startaddress"<<(int)startaddress[46]<<endl;
       cout<<0.21*srcimage[30*w*3+13*3+0] + 0.72*srcimage[30*w*3+13*3+1]+0.07*srcimage[30*w*3+13*3+2]<<endl;
                     

}
 

The result shows:

main:
    108, Generating copy(b[:][:]) [if not already present]
         Generating NVIDIA GPU code
        111, #pragma acc loop gang /* blockIdx.x */
        113, #pragma acc loop seq
    113, Loop is parallelizable
    131, Generating enter data copyin(startaddress[:vcols*vrows])
         Generating NVIDIA GPU code
        153, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
    131, Generating default present(startaddress[:vcols*vrows])
    161, Generating exit data copyout(startaddress[:vcols*vrows])
    219, Generating enter data copyin(startaddress[:vcols*vrows],srcimage[:(w*h)*3])
         Generating NVIDIA GPU code
        223, #pragma acc loop gang, vector(128) collapse(2) /* blockIdx.x threadIdx.x */
        224,   /* blockIdx.x threadIdx.x collapsed */
    219, Generating default present(startaddress[:h],srcimage[:])
    250, Generating exit data copyout(startaddress[:vcols*vrows],srcimage[:(w*h)*3])
    
    
$ ./test4a
Matrix grayScale :810 1440
Step1440
b[N-1][M-2] :2
b[N][M] :6
194
195
-------------------------
20
20
-------------------------
duplicate: 20
duplicate: 255
srcImge20
srcImge20
srcImge20
srcImge69
startaddress62
startaddress62
startaddress62
62.28

Upvotes: 0

Related Questions