Reputation: 267
The following is a CUDA programming example which is basically C but with NVidia CUDA functions within. I've been trying to interpret this code example and figure out what it is trying to do. My question is this the program compiles just fine, but what arguments does it take? For example this CUDA program is being run in a linux emulator however upon running ./program it returns:
Usage: ./program number Segmentation fault
What are the programs input arguments. Thank you.
#include <assert.h>
#include <stdio.h>
//#define N 100000
__host__ void saxpy_host(int length, float alpha, float * x, float * y)
{
for (int i = 0; i < length; ++i)
y[i] = alpha*x[i] + y[i];
}
__global__ void saxpy (int length, float alpha, float * x, float * y)
{
int i;
i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < length) y[i] = alpha*x[i]+y[i];
__syncthreads();
}
int main(int argc, char* argv[]) {
if (argc != 2) {
printf("Usage: %s number\n", argv[0]);
return -1;
}
int N = atoi(argv[1]);
// host data
float alpha = 0.5;
float x[N], xback[N];
float y[N], yback[N];
int size;
int i;
int blocks;
// determining size
size = sizeof(float)*N;
// device data
float * dxp, * dyp;
// fill host data
for (i = 0; i < N; i++) {
x[i] = (float) (rand () % 128);
y[i] = (float) (rand () % 256);
}
// Allocating and Moving data to device
cudaMalloc((void**) &dxp, size);
cudaMalloc((void**) &dyp, size);
cudaMemcpy (dxp, x, size, cudaMemcpyHostToDevice);
cudaMemcpy (dyp, y, size, cudaMemcpyHostToDevice);
// size of thread blocks
blocks = (N + 31)/32;
saxpy <<< blocks, 32 >>> (N, alpha, dxp, dyp);
// bring back data
cudaMemcpy (xback, dxp, size, cudaMemcpyDeviceToHost);
cudaMemcpy (yback, dyp, size, cudaMemcpyDeviceToHost);
// Calculating host SAXPY
saxpy_host (N, alpha, (float *) &x, (float *) &y);
// checking computation on host matches computation on GPU
for (i = 0; i < N; i++) {
assert (yback[i] == y[i]) ;
//printf ("%i %f %f \n", i, yback[i], y[i]);
}
// free device data
cudaFree(dxp); cudaFree(dyp);
return 0;
}
Upvotes: 4
Views: 518
Reputation: 16253
int N = atoi(argv[1]);
The program takes a single integer as a command line argument. (Try calling it as ./program 5
, for example.)
It then calculates a SAXPY (An old term originating from early BLAS implementations, but it stuck. It means "single (precision, aka float) real alpha x plus y".) with vectors of dimension N
.
Upvotes: 3