Daniel
Daniel

Reputation: 385

Set CUDA device by UUID

Is there any way to set the program CUDA device by UUID? since I use multi-instance GPU (MIG) to split the GPU logically and different virtual GPUs have different UUIDs.

Upvotes: 1

Views: 2787

Answers (1)

Robert Crovella
Robert Crovella

Reputation: 152173

In CUDA, the way to select a device to use is via cudaSetDevice(), and this requires an integer argument.

However, UUID is part of the device property structure, so you could retrieve those (e.g. using cudaGetDeviceProperties() similar to the demonstration in the deviceQuery sample code) and then select a device based on the mapping between device index/ordinal and device UUID.

Here is an example:

$ cat t1872.cu
#include <iostream>
#include <iomanip>
#include <cstddef>
#include <vector>
#include <tuple>

bool uuid_equal(cudaUUID_t a, cudaUUID_t b){
  bool retval = true;
  for (int i = 0; i < sizeof(a.bytes); i++)
    if (a.bytes[i] != b.bytes[i]) retval = false;
  return retval;
}

void uuid_print(cudaUUID_t a){
  std::cout << "GPU";
  std::vector<std::tuple<int, int> > r = {{0,4}, {4,6}, {6,8}, {8,10}, {10,16}};
  for (auto t : r){
    std::cout << "-";
    for (int i = std::get<0>(t); i < std::get<1>(t); i++)
      std::cout << std::hex << std::setfill('0') << std::setw(2) << (unsigned)(unsigned char)a.bytes[i];
  }
  std::cout << std::endl;
}

int main(){

  int num_dev;
  cudaGetDeviceCount(&num_dev);
  cudaUUID_t desired_UUID = {(char)0x26U, (char)0x7cU, (char)0xe6U, (char)0x89U, (char)0xf4U, (char)0xf6U, (char)0x87U, (char)0x17U, (char)0x51U, (char)0x5eU, (char)0x84U, (char)0xaaU, (char)0xe8U, (char)0x93U, (char)0xd3U, (char)0x12U};
  int desired_index = -1;
  for (int i = 0; i < num_dev; i++){
    cudaDeviceProp p;
    cudaGetDeviceProperties(&p,i);
    if (uuid_equal(p.uuid, desired_UUID)){desired_index = i;}
    std::cout << "dev: " << i << " UUID: ";
    uuid_print(p.uuid);
  }
  if (desired_index < 0) std::cout << "Desired UUID not found! " << std::endl;
  else {cudaSetDevice(desired_index); std::cout << "set device to: " << desired_index << std::endl;}
}

$ nvcc -o t1872 t1872.cu -std=c++11
$ compute-sanitizer ./t1872
========= COMPUTE-SANITIZER
dev: 0 UUID: GPU-267ce689-f4f6-8717-515e-84aae893d312
dev: 1 UUID: GPU-909fbaf6-cabf-faae-137a-b75aeaa0fa31
dev: 2 UUID: GPU-d05dc1d5-d090-6176-7236-6a42d73b311d
dev: 3 UUID: GPU-49fa3a59-ba1b-eb51-dc8b-9a0254300a38
set device to: 0
========= ERROR SUMMARY: 0 errors
$

Note that the cudaUUIT_t is just an array of 16 char. My UUID print routine is just designed to match the output from nvidia-smi -a.

Upvotes: 5

Related Questions