Reputation: 617
I'm trying to figure out how I should create a struct/class destined to be sent to the device, but I keep getting this "invalid argument" CUDA error. I did a small example that shows the error:
#include <iostream>
#include <cstdio>
using namespace std;
#define CUDA_WARN(XXX) \
do { if (XXX != cudaSuccess) cerr << "CUDA Error: " << \
cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< endl; cudaDeviceSynchronize(); } while (0)
struct P {
double x,y;
__host__ __device__ void init(const double &a, const double &b) {
x = a; y = b; }
};
int main(int argc, char **argv)
{
P hP, hQ, dP;
cout << "Size of P: " << sizeof(P) << endl;
CUDA_WARN(cudaMalloc((void**) &dP, sizeof(P)));
printf("dP: %p\n", &dP); // print dP's address on the device
hP.init(1.2,-2.1);
hQ.init(0.,0.);
CUDA_WARN(cudaMemcpy(&dP, &hP, sizeof(P), cudaMemcpyHostToDevice));
CUDA_WARN(cudaMemcpy(&hQ, &dP, sizeof(P), cudaMemcpyDeviceToHost));
cout << "Copy back: " << hQ.x << "\t" << hQ.y << endl;
dP.init(3.,3.);
CUDA_WARN(cudaMemcpy(&hP, &dP, sizeof(P), cudaMemcpyDeviceToHost));
cout << "Copy new: " << hP.x << "\t" << hP.y << endl;
return 0;
}
I'm compiling with (my card is a Tesla C2050):
nvcc -arch sm_20 -o exec file.cu
The result I'm getting is:
Size of P: 16
dP: 0x7fff82d4b7b0
CUDA Error: invalid argument, at line 24
CUDA Error: invalid argument, at line 25
Copy back: 0 0
CUDA Error: invalid argument, at line 28
Copy new: 1.2 -2.1
------------------
(program exited with code: 0)
Press return to continue
Thanks guys if you could help me on this!
====== After comments of @talonmies, @JackOLantern, @Robert Crovella =======
Thanks, guys! You really helped! Based on comments, I could correct my code and now it is working. Just to register the final solution:
#include <iostream>
#include <cstdio>
using namespace std;
#define CUDA_WARN(XXX) \
do { if (XXX != cudaSuccess) cerr << "CUDA Error: " << \
cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< endl; cudaDeviceSynchronize(); } while (0)
struct P {
double x,y;
__host__ __device__ void init(const double &a, const double &b) {
x = a; y = b; }
};
/* INCLUDED KERNEL FUNCTION */
__global__ void dev_P_init(P *p, double a, double b) {
p->init(a,b);
}
int main(int argc, char **argv)
{
P hP, hQ, *dP; //*changed*
cout << "Size of P: " << sizeof(P) << endl;
CUDA_WARN(cudaMalloc((void**) &dP, sizeof(P)));
printf("dP: %p\n", &dP); // print dP's address on the device
hP.init(1.2,-2.1);
hQ.init(0.,0.);
CUDA_WARN(cudaMemcpy(dP, &hP, sizeof(P), cudaMemcpyHostToDevice)); //*changed*
CUDA_WARN(cudaMemcpy(&hQ, dP, sizeof(P), cudaMemcpyDeviceToHost)); //*changed*
cout << "Copy back: " << hQ.x << "\t" << hQ.y << endl;
dev_P_init<<< 1, 1 >>>(dP,3., 3.); //*call to kernel*
CUDA_WARN(cudaMemcpy(&hP, dP, sizeof(P), cudaMemcpyDeviceToHost)); //*changed*
cout << "Copy new: " << hP.x << "\t" << hP.y << endl;
return 0;
}
And corrected output:
Size of P: 16
dP: 0x7fff6fa2e498
Copy back: 1.2 -2.1
Copy new: 3 3
------------------
(program exited with code: 0)
Press return to continue
Upvotes: 4
Views: 10420
Reputation: 21515
As already noticed by @talonmies, &dP
is not a valid device pointer. Indeed, dP
is a variable that resides on the host, so its address points to the host memory space. Opposite to that, when dP
is a pointer, cudaMalloc
will receive its value as a parameter and its value will point to a device memory space.
This is the correct version of your code:
#include <iostream>
#include <cstdio>
using namespace std;
#define CUDA_WARN(XXX) \
do { if (XXX != cudaSuccess) cerr << "CUDA Error: " << \
cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< endl; cudaDeviceSynchronize(); } while (0)
struct P {
double x,y;
__host__ __device__ void init(const double &a, const double &b) {
x = a; y = b; }
};
int main(int argc, char **argv)
{
P *dP;
P hP, hQ;
CUDA_WARN(cudaMalloc((void**) &dP, sizeof(P)));
CUDA_WARN(cudaMemcpy(dP, &hP, sizeof(P), cudaMemcpyHostToDevice));
CUDA_WARN(cudaMemcpy(&hQ, dP, sizeof(P), cudaMemcpyDeviceToHost));
CUDA_WARN(cudaMemcpy(&hP, dP, sizeof(P), cudaMemcpyDeviceToHost));
return 0;
}
Upvotes: 4