Paul
Paul

Reputation: 1

CUDA wrong result

I have some very simple CUDA code with a custom class:

Here's the custom class:

#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif 

class Foo {
public:
    int val;
    CUDA_CALLABLE_MEMBER Foo() {
    }
    CUDA_CALLABLE_MEMBER ~Foo() {}
    CUDA_CALLABLE_MEMBER int getVal() {
        return val;
    }
};

Here's the kernel:

#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "Custom.h"
#include <string>
#include <iostream>
using namespace std;

__global__ void someKernel(int maxNumberOfThreads, Foo* fooArray, int n)
{
    int idx = blockIdx.x*blockDim.x + threadIdx.x;
    if (idx < n){
        printf("Hello from thread # %i(block #: %i) with value %d\n",idx, blockIdx.x, fooArray[idx].val);
    }
}

extern void cuda_doStuff(Foo* fooArray, int n)
{
    int numberOfBlocks = 10;
    int threadsPerBlock = 100;
    int maxNumberOfThreads = 10;
    someKernel << <numberOfBlocks, threadsPerBlock >> >(maxNumberOfThreads, fooArray, n);
    cudaDeviceSynchronize();
}

Here's my main class:

#include <stdio.h>
#include "Custom.h"
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <assert.h> 
#include <iostream>
using namespace std;

extern void cuda_doStuff(Foo* fooArray, int n);

int main(int argc, const char* argv[])
{
    int n = 100;
    //Foo* fooArray = new Foo[n];
    const unsigned int bytes = n*sizeof(Foo);
    //Foo fooArray[1000];
    Foo* fooArray = (Foo*)malloc(bytes);
    for (int i = 0; i < n; i++){
        fooArray[i].val = 0;
    }
    for (int i = 0; i < n; i++){
        assert(fooArray[i].val == 0);
    }
    Foo* d_fooArray;
    cudaMalloc((void**)& d_fooArray, bytes);
    cudaError_t error = cudaMemcpy(d_fooArray, &fooArray, bytes, cudaMemcpyHostToDevice);
    const char* cudaError = cudaGetErrorString(error);
    cuda_doStuff(d_fooArray, n);
    cudaFree(d_fooArray);
    free(fooArray);
    cout << cudaError << endl;
    system("pause");
}

I get some random numbers with the output though:

Hello from thread # 96(block #: 0) with value 1
Hello from thread # 97(block #: 0) with value 13068
Hello from thread # 98(block #: 0) with value 220
Hello from thread # 99(block #: 0) with value 0
Hello from thread # 0(block #: 0) with value 11187856
Hello from thread # 1(block #: 0) with value -858993460
Hello from thread # 16(block #: 0) with value 0
Hello from thread # 2(block #: 0) with value -858993460
Hello from thread # 3(block #: 0) with value 400
Hello from thread # 17(block #: 0) with value 2132037632
Hello from thread # 4(block #: 0) with value -858993460
Hello from thread # 5(block #: 0) with value -858993460
Hello from thread # 18(block #: 0) with value 0
Hello from thread # 6(block #: 0) with value 100
Hello from thread # 7(block #: 0) with value -858993460
Hello from thread # 8(block #: 0) with value -636130255
Hello from thread # 9(block #: 0) with value 7405308
Hello from thread # 19(block #: 0) with value 67324640
Hello from thread # 10(block #: 0) with value 14712953
Hello from thread # 11(block #: 0) with value 1
Hello from thread # 12(block #: 0) with value 11182328
Hello from thread # 13(block #: 0) with value 11165024
Hello from thread # 14(block #: 0) with value -636130207
Hello from thread # 15(block #: 0) with value 0
Hello from thread # 20(block #: 0) with value 0
Hello from thread # 21(block #: 0) with value 7405568
Hello from thread # 22(block #: 0) with value 0
Hello from thread # 23(block #: 0) with value 7405248
Hello from thread # 24(block #: 0) with value 0
Hello from thread # 25(block #: 0) with value 7405380
Hello from thread # 26(block #: 0) with value 14684685
Hello from thread # 27(block #: 0) with value -628844083
Hello from thread # 28(block #: 0) with value 0
Hello from thread # 29(block #: 0) with value 7405316
Hello from thread # 30(block #: 0) with value 14713453
Hello from thread # 31(block #: 0) with value 7405328
Hello from thread # 32(block #: 0) with value 1967343965
Hello from thread # 48(block #: 0) with value 1997345969
Hello from thread # 33(block #: 0) with value 2132037632
Hello from thread # 34(block #: 0) with value 7405396
Hello from thread # 49(block #: 0) with value -627052760
Hello from thread # 35(block #: 0) with value 1997052142
Hello from thread # 36(block #: 0) with value 2132037632
Hello from thread # 37(block #: 0) with value -1377334916
Hello from thread # 50(block #: 0) with value 0
Hello from thread # 38(block #: 0) with value 0
Hello from thread # 39(block #: 0) with value 0
Hello from thread # 51(block #: 0) with value 7405412
Hello from thread # 40(block #: 0) with value 2132037632
Hello from thread # 52(block #: 0) with value 1997052100
Hello from thread # 41(block #: 0) with value 0
Hello from thread # 42(block #: 0) with value 0
Hello from thread # 43(block #: 0) with value 1
Hello from thread # 44(block #: 0) with value 0
Hello from thread # 53(block #: 0) with value -1
Hello from thread # 45(block #: 0) with value 7405340
Hello from thread # 54(block #: 0) with value 1997005042
Hello from thread # 46(block #: 0) with value -8192
Hello from thread # 55(block #: 0) with value 0
Hello from thread # 47(block #: 0) with value 7405404
Hello from thread # 56(block #: 0) with value 0
Hello from thread # 57(block #: 0) with value 14685370
Hello from thread # 58(block #: 0) with value 2132037632
Hello from thread # 59(block #: 0) with value 0
Hello from thread # 60(block #: 0) with value 0
Hello from thread # 61(block #: 0) with value 0
Hello from thread # 62(block #: 0) with value 0
Hello from thread # 63(block #: 0) with value 0
Hello from thread # 64(block #: 0) with value 0
Hello from thread # 65(block #: 0) with value 0
Hello from thread # 66(block #: 0) with value 0
Hello from thread # 67(block #: 0) with value 0
Hello from thread # 68(block #: 0) with value 0
Hello from thread # 80(block #: 0) with value 0
Hello from thread # 69(block #: 0) with value 0
Hello from thread # 70(block #: 0) with value 0
Hello from thread # 81(block #: 0) with value 0
Hello from thread # 71(block #: 0) with value 0
Hello from thread # 72(block #: 0) with value 0
Hello from thread # 73(block #: 0) with value 0
Hello from thread # 74(block #: 0) with value 0
Hello from thread # 82(block #: 0) with value 0
Hello from thread # 75(block #: 0) with value 0
Hello from thread # 83(block #: 0) with value 0
Hello from thread # 76(block #: 0) with value 0
Hello from thread # 84(block #: 0) with value 0
Hello from thread # 77(block #: 0) with value 0
Hello from thread # 85(block #: 0) with value 0
Hello from thread # 78(block #: 0) with value 0
Hello from thread # 86(block #: 0) with value 0
Hello from thread # 79(block #: 0) with value 0
Hello from thread # 87(block #: 0) with value 0
Hello from thread # 88(block #: 0) with value 0
Hello from thread # 89(block #: 0) with value 0
Hello from thread # 90(block #: 0) with value 0
Hello from thread # 91(block #: 0) with value 0
Hello from thread # 92(block #: 0) with value 0
Hello from thread # 93(block #: 0) with value 0
Hello from thread # 94(block #: 0) with value 2020893505
Hello from thread # 95(block #: 0) with value 32
no error
Press any key to continue . . .

I have tried with doubles and I also get some random results. Does anybody know why?

Upvotes: 0

Views: 112

Answers (1)

m.s.
m.s.

Reputation: 16354

This line causes the problem:

cudaError_t error = cudaMemcpy(d_fooArray, &fooArray, bytes, cudaMemcpyHostToDevice);

You are passing the address of the pointer as the second argument. Instead it must be the pointer itself:

cudaError_t error = cudaMemcpy(d_fooArray, fooArray, bytes, cudaMemcpyHostToDevice);

Upvotes: 2

Related Questions