0xC0000005: Access violation reading location 0x0000000000000018 at cudaEventDestroy()

Question

When I am running a CUDA program, I met "0xC0000005: Access violation reading location 0x0000000000000018." at cudaEventDestroy().

It is confusing because this exception sometimes appears and sometimes not (if not, the program runs normally without any error).

start & stop is defined as cudaEvent_t start, stop; to compute time elapsed on GPU.

Sometimes the Exception looks like: (at a different location)

Here I attach the whole program code, in which 2 matrices A & B add together to get S on both GPU and CPU, and compare the results.

I am new to CUDA, I will appreciate it a lot if somebody may explain this to me.

Note: the original code is provided by book "professional CUDA C programming".

#include 
#include 
#include "common.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"


void initialData(float *ip, const int size)
{
    for(int i = 0; i < size; i++)
    {
        ip[i] = (float)(rand() & 0xFF) / 10.0f;
    }
}

void sumMatrixOnHost(float *A, float *B, float *C, const int nx, const int ny)
{
    float* ia = A;
    float* ib = B;
    float* ic = C;

    for (int iy = 0; iy < ny; iy++)
    {
        for (int ix = 0; ix < nx; ix++)
        {
            ic[ix] = ia[ix] + ib[ix];
        }
        ia += nx;
        ib += nx;
        ic += nx;
    }
}

void checkResult(float *hostRef, float *gpuRef, const int N)
{
    double epsilon = 1.0E-8;
    bool match = 1;
    for (int i = 0; i < N; i++)
    {
        if (abs(hostRef[i] - gpuRef[i]) > epsilon)
        {
            match = 0;
            std::cout << "host" << hostRef[i] << "gpu" << gpuRef[i] << std::endl;
            break;
        }
    }
    if (match)
        std::cout<<"Arrays match."<> > (d_MatA, d_MatB, d_MatC, nx, ny);//
    CHECK(cudaDeviceSynchronize());// shoule be called after kernel called?
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&gpu_time_used, start, stop);
    std::cout << "sumMatrixOnGPU2D<<<(" << grid.x << grid.y << "), (" << block.x << block.y << ")>>> elapsed " << 1e-3 * gpu_time_used << " sec on GPU" << std::endl;
    std::cout << "sumMatrixOnGPU2D<<<(" << grid.x << grid.y << "), (" << block.x << block.y << ")>>> elapsed " << cpu_time_used << " sec on CPU" << std::endl;

    // check kernel error
    startc = clock();
    cudaEventRecord(start, 0);
    CHECK(cudaGetLastError());//
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&gpu_time_used, start, stop);
    std::cout << "cudaGetLastError() elapsed " << cpu_time_used << " sec on CPU" << std::endl;
    std::cout << "cudaGetLastError() elapsed " << 1e-3 * gpu_time_used << " sec on GPU" << std::endl;

    // copy kernel result back to host side
    startc = clock();
    cudaEventRecord(start, 0);
    CHECK(cudaMemcpy(d_S, d_MatC, nBytes, cudaMemcpyDeviceToHost));//
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&gpu_time_used, start, stop);
    std::cout << "cudaMemcpyDeviceToHost elapsed " << cpu_time_used << " sec on CPU" << std::endl;
    std::cout << "cudaMemcpyDeviceToHost elapsed " << 1e-3 * gpu_time_used << " sec on GPU" << std::endl;

    // check device results
    startc = clock();
    cudaEventRecord(start, 0);
    checkResult(h_S, d_S, nxy);//
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&gpu_time_used, start, stop);
    std::cout << "check results elapsed " << cpu_time_used << " sec on CPU" << std::endl;
    std::cout << "check results elapsed " << 1e-3 * gpu_time_used << " sec on GPU" << std::endl;

    // free device global memory
    startc = clock();
    cudaEventRecord(start, 0);
    CHECK(cudaFree(d_MatA));//
    CHECK(cudaFree(d_MatB));//
    CHECK(cudaFree(d_MatC));//
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&gpu_time_used, start, stop);
    std::cout << "cudaFree() elapsed " << cpu_time_used << " sec on CPU" << std::endl;
    std::cout << "cudaFree() elapsed " << 1e-3 * gpu_time_used << " sec on GPU" << std::endl;

    // free host memory
    startc = clock();
    cudaEventRecord(start, 0);
    free(h_A);//
    free(h_B);//
    free(h_S);//
    free(d_S);//
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&gpu_time_used, start, stop);
    std::cout << "free() elapsed " << cpu_time_used << " sec on CPU" << std::endl;
    std::cout << "free() elapsed " << 1e-3 * gpu_time_used << " sec on GPU" << std::endl;

    // reset device
    startc = clock();
    CHECK(cudaDeviceReset());// cuda reset
    stopc = clock(); cpu_time_used = ((double)(stopc - startc)) / CLOCKS_PER_SEC;
    std::cout << "cudaDeviceReset() elapsed " << cpu_time_used << " sec on CPU" << std::endl;

    cudaEventDestroy(start);
    cudaEventDestroy(stop);

    return 0;//
}

And the "common.h" simply contains the #define of CHECK as below.

#define CHECK(call)                                                                         \
{                                                                                           \
    const cudaError_t error = call;                                                         \
    if (error != cudaSuccess)                                                               \
    {                                                                                       \
        fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                              \
        fprintf(stderr, "code: %d, reason: %s
", error,                                    \
            cudaGetErrorString(error));                                                     \
        exit(1);                                                                            \
    }                                                                                       \
}                                                                                           \

0xC0000005: Access violation reading location 0x0000000000000018 at cudaEventDestroy()

Answers (1)

Related Questions