Regis Portalez
Regis Portalez

Reputation: 4860

invalid device ordinal on cudaMemPrefetchAsync

I'm running a toy CUDA sample on my GeForce 1080 Ti (Pascal) on windows 10 and CUDA 9.2.

Goal is to test cudaMemPrefetchAsync to the CPU, as it's supposed to work.

However, I get a CUDA error (invalid device ordinal) on this particular line.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <cstdio>
#include <cstdlib>

void fill(int* a, int val, int N) {
    for (int k = 0; k < N; ++k) {
        a[k] = val;
    }
}

__global__ void add(int* a, int* b, int N)
{
    for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < N; i += blockDim.x * gridDim.x) {
        a[i] += b[i];
    }
}

inline void check(cudaError_t err, const char* file, int line) {
    if (err != cudaSuccess) {
        ::fprintf(stderr, "ERROR at %s[%d] : %s\n", file, line, cudaGetErrorString(err));
        abort();
    }
}

#define CUDA_CHECK(err) do { check(err, __FILE__, __LINE__); } while(0)

int main()
{
    int deviceId;
    CUDA_CHECK(cudaGetDevice(&deviceId));
    const int N = 1024*1024*32;
    int *a, *b;
    CUDA_CHECK(cudaMallocManaged(&a, N * sizeof(int)));
    CUDA_CHECK(cudaMallocManaged(&b, N * sizeof(int)));

    CUDA_CHECK(cudaMemPrefetchAsync(a, N * sizeof(int), cudaCpuDeviceId)); // program breaks here
    CUDA_CHECK(cudaMemPrefetchAsync(b, N * sizeof(int), cudaCpuDeviceId));
    fill(a, 1, N);
    fill(a, 2, N);

    CUDA_CHECK(cudaMemPrefetchAsync(a, N * sizeof(int), deviceId));
    CUDA_CHECK(cudaMemPrefetchAsync(b, N * sizeof(int), deviceId));

    add<<<32, 256>>>(a, b, N);

    CUDA_CHECK(cudaGetLastError());
    CUDA_CHECK(cudaDeviceSynchronize());

    return 0;
}

Is that a hardware/driver/OS limitation? Can I simply ignore the error?

Upvotes: 1

Views: 1147

Answers (1)

talonmies
talonmies

Reputation: 72348

Is that a hardware/driver/OS limitation?

Yes, the latter. Quoting from the documentation

GPUs with SM architecture 6.x or higher (Pascal class or newer) provide additional Unified Memory features such as on-demand page migration and GPU memory oversubscription that are outlined throughout this document. Note that currently these features are only supported on Linux operating systems.

So asynchronous page migration is not supported in Windows at the moment and that it why you get an error when you try to enable it.

Upvotes: 5

Related Questions