Reputation: 141
I am using CUDA to add two matrices, and to give their result in another matrix. I wish to make use of shared memory feature, and for this, I wrote the following:
#include <stdio.h>
#include <cuda.h>
#define grid 1024
#define BSZ 16
__global__ void addition(int *dev_a, int *dev_b, int *dev_c)
{
__shared__ int as[BSZ][BSZ];
__shared__ int bs[BSZ][BSZ];
int by = blockIdx.y;
int bx = blockIdx.x;
int cvalue;
int ty = threadIdx.y;
int tx = threadIdx.x;
int row = by * BSZ + ty;
int col = bx * BSZ + tx;
as[ty][tx] = dev_a[row*grid + col];
bs[ty][tx] = dev_b[row*grid + col];
__syncthreads();
cvalue = as[ty][tx] + bs[ty][tx];
__syncthreads();
dev_c[row*grid + col] = cvalue;
}
int main ()
{
int a[grid][grid], b[grid][grid], c[grid][grid];
//c = a + b
for(int i=0;i<grid;i++)
{
for(int j=0;j<grid;j++)
{
a[i][j]=2;
b[i][j]=1;
}
}
printf("Working fine here");
int *dev_a;
int *dev_b;
int *dev_c;
int size = grid * grid * sizeof(int);
printf("Working fine");
cudaMalloc( (void**)&dev_a, size );
cudaMalloc( (void**)&dev_b, size );
cudaMalloc( (void**)&dev_c, size );
cudaMemcpy(dev_a,a,size,cudaMemcpyHostToDevice);
cudaMemcpy(dev_b,b,size,cudaMemcpyHostToDevice);
dim3 dimBlock(BSZ,BSZ);
dim3 dimGrid(grid/dimBlock.x,grid/dimBlock.y);
//Kernel launch
addition<<<dimGrid, dimBlock>>>(dev_a, dev_b, dev_c);
cudaMemcpy(c,dev_c,size,cudaMemcpyDeviceToHost);
for (int i=0; i<grid; i++)
{
for(int j=0;j<grid;j++)
{
printf( "%d + %d = %d\n", a[i][j], b[i][j], c[i][j] );
}
}
}
I am getting a segmentation fault error, which I am not able to understand why! Please someone help me with this.
Upvotes: 1
Views: 5439
Reputation: 15642
int a[1024][1024], b[1024][1024], c[1024][1024];
The size of these objects is astronomical! You're probably overflowing the stack. I think you'll find the segfaults vanish if you reduce their sizes, or increase the size of your stack however your implementation permits you to do that, or perhaps even allocate them with dynamic storage duration (eg. malloc
or in your case cudaMalloc
) rather than automatic storage duration.
Upvotes: 8