Contiguous Memory Allocation on GPU

Question

Does cudaMalloc allocate contiguous chunks of memory (i.e., physical bytes next to each other)?

I have a piece of CUDA code that simply copies 128 bytes from global device memory to shared memory, using 32 threads. I am trying to find a way to guarantee that this transfer can be completed in one memory transaction of 128 byes. If cudaMalloc allocates contiguous memory blocks, then it can be easily done.

Following is the code:

#include 

using namespace std;
#define SIZE 32 //SIZE of the array to store in shared memory                                                                                                                        
#define NUMTHREADS 32
__global__ void copy(uint* memPointer){

  extern __shared__ uint bits[];
  int tid = threadIdx.x;

  bits[tid] = memPointer[tid];

}

int main(){
  uint inputData[SIZE];
  uint* storedData;
  for(int  i=0;i>>(storedData);
            cudaError_t e6 = cudaFree(storedData);
            if(e6==cudaSuccess){
            }
            else{
              cout << "Error freeing memory storedData" << e6 << endl;
            }
      }
      else{
        cout << "Failed to copy" << " " << e3 << endl;
      }

  }
  else{
    cout << "Failed to allocate memory" << " " << e1 << endl;

  }
  return 0;
}

Contiguous Memory Allocation on GPU

Answers (1)

Related Questions