invalid device ordinal on cudaMemPrefetchAsync

Question

I'm running a toy CUDA sample on my GeForce 1080 Ti (Pascal) on windows 10 and CUDA 9.2.

Goal is to test cudaMemPrefetchAsync to the CPU, as it's supposed to work.

However, I get a CUDA error (invalid device ordinal) on this particular line.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include 
#include 

void fill(int* a, int val, int N) {
    for (int k = 0; k < N; ++k) {
        a[k] = val;
    }
}

__global__ void add(int* a, int* b, int N)
{
    for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < N; i += blockDim.x * gridDim.x) {
        a[i] += b[i];
    }
}

inline void check(cudaError_t err, const char* file, int line) {
    if (err != cudaSuccess) {
        ::fprintf(stderr, "ERROR at %s[%d] : %s
", file, line, cudaGetErrorString(err));
        abort();
    }
}

#define CUDA_CHECK(err) do { check(err, __FILE__, __LINE__); } while(0)

int main()
{
    int deviceId;
    CUDA_CHECK(cudaGetDevice(&deviceId));
    const int N = 1024*1024*32;
    int *a, *b;
    CUDA_CHECK(cudaMallocManaged(&a, N * sizeof(int)));
    CUDA_CHECK(cudaMallocManaged(&b, N * sizeof(int)));

    CUDA_CHECK(cudaMemPrefetchAsync(a, N * sizeof(int), cudaCpuDeviceId)); // program breaks here
    CUDA_CHECK(cudaMemPrefetchAsync(b, N * sizeof(int), cudaCpuDeviceId));
    fill(a, 1, N);
    fill(a, 2, N);

    CUDA_CHECK(cudaMemPrefetchAsync(a, N * sizeof(int), deviceId));
    CUDA_CHECK(cudaMemPrefetchAsync(b, N * sizeof(int), deviceId));

    add<<<32, 256>>>(a, b, N);

    CUDA_CHECK(cudaGetLastError());
    CUDA_CHECK(cudaDeviceSynchronize());

    return 0;
}

Is that a hardware/driver/OS limitation? Can I simply ignore the error?

talonmies · Accepted Answer

Is that a hardware/driver/OS limitation?

Yes, the latter. Quoting from the documentation

GPUs with SM architecture 6.x or higher (Pascal class or newer) provide additional Unified Memory features such as on-demand page migration and GPU memory oversubscription that are outlined throughout this document. Note that currently these features are only supported on Linux operating systems.

So asynchronous page migration is not supported in Windows at the moment and that it why you get an error when you try to enable it.

invalid device ordinal on cudaMemPrefetchAsync

Answers (1)

Related Questions