Reputation: 689
I am implementing an example program with nvml
library as shown at https://devtalk.nvidia.com/default/topic/504951/how-to-call-nvml-apis-/
The program is as follows:
#include <stdio.h>
#include <nvidia/gdk/nvml.h>
const char * convertToComputeModeString(nvmlComputeMode_t mode)
{
switch (mode)
{
case NVML_COMPUTEMODE_DEFAULT:
return "Default";
case NVML_COMPUTEMODE_EXCLUSIVE_THREAD:
return "Exclusive_Thread";
case NVML_COMPUTEMODE_PROHIBITED:
return "Prohibited";
case NVML_COMPUTEMODE_EXCLUSIVE_PROCESS:
return "Exclusive Process";
default:
return "Unknown";
}
}
int main()
{
nvmlReturn_t result;
unsigned int device_count, i;
// First initialize NVML library
result = nvmlInit();
if (NVML_SUCCESS != result)
{
printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));
printf("Press ENTER to continue...\n");
getchar();
return 1;
}
result = nvmlDeviceGetCount(&device_count);
if (NVML_SUCCESS != result)
{
printf("Failed to query device count: %s\n", nvmlErrorString(result));
goto Error;
}
printf("Found %d device%s\n\n", device_count, device_count != 1 ? "s" : "");
printf("Listing devices:\n");
for (i = 0; i < device_count; i++)
{
nvmlDevice_t device;
char name[64];
nvmlPciInfo_t pci;
nvmlComputeMode_t compute_mode;
// Query for device handle to perform operations on a device
// You can also query device handle by other features like:
// nvmlDeviceGetHandleBySerial
// nvmlDeviceGetHandleByPciBusId
result = nvmlDeviceGetHandleByIndex(i, &device);
if (NVML_SUCCESS != result)
{
printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
result = nvmlDeviceGetName(device, name, sizeof(name)/sizeof(name[0]));
if (NVML_SUCCESS != result)
{
printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
// pci.busId is very useful to know which device physically you're talking to
// Using PCI identifier you can also match nvmlDevice handle to CUDA device.
result = nvmlDeviceGetPciInfo(device, &pci);
if (NVML_SUCCESS != result)
{
printf("Failed to get pci info for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
printf("%d. %s [%s]\n", i, name, pci.busId);
// This is a simple example on how you can modify GPU's state
result = nvmlDeviceGetComputeMode(device, &compute_mode);
if (NVML_ERROR_NOT_SUPPORTED == result)
printf("\t This is not CUDA capable device\n");
else if (NVML_SUCCESS != result)
{
printf("Failed to get compute mode for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
else
{
// try to change compute mode
printf("\t Changing device's compute mode from '%s' to '%s'\n",
convertToComputeModeString(compute_mode),
convertToComputeModeString(NVML_COMPUTEMODE_PROHIBITED));
result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED);
if (NVML_ERROR_NO_PERMISSION == result)
printf("\t\t Need root privileges to do that: %s\n", nvmlErrorString(result));
else if (NVML_ERROR_NOT_SUPPORTED == result)
printf("\t\t Compute mode prohibited not supported. You might be running on\n"
"\t\t windows in WDDM driver model or on non-CUDA capable GPU.\n");
else if (NVML_SUCCESS != result)
{
printf("\t\t Failed to set compute mode for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
else
{
printf("\t Restoring device's compute mode back to '%s'\n",
convertToComputeModeString(compute_mode));
result = nvmlDeviceSetComputeMode(device, compute_mode);
if (NVML_SUCCESS != result)
{
printf("\t\t Failed to restore compute mode for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
}
}
}
result = nvmlShutdown();
if (NVML_SUCCESS != result)
printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result));
printf("All done.\n");
printf("Press ENTER to continue...\n");
getchar();
return 0;
Error:
result = nvmlShutdown();
if (NVML_SUCCESS != result)
printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result));
printf("Press ENTER to continue...\n");
getchar();
return 1;
}
makefile as follows:
ARCH := $(shell getconf LONG_BIT)
ifeq (${ARCH},32)
NVML_LIB := ../lib/
else ifeq (${ARCH},64)
NVML_LIB := /usr/lib/nvidia-340/
else
$(error Unknown architecture!)
endif
CFLAGS := -I ../inc
LDFLAGS := -lnvidia-ml -L $(NVML_LIB)
example: example.o
$(CC) $(LDFLAGS) $< -o $@
clean:
-@rm -f example.o
-@rm -f example
And the error I get is:
cc -lnvidia-ml -L /usr/src/gdk/nvml/lib/ example.o -o example
example.o: In function `main':
example.c:(.text+0x5f): undefined reference to `nvmlInit_v2'
example.c:(.text+0x7b): undefined reference to `nvmlErrorString'
example.c:(.text+0xb5): undefined reference to `nvmlDeviceGetCount_v2'
example.c:(.text+0xd1): undefined reference to `nvmlErrorString'
example.c:(.text+0x149): undefined reference to `nvmlDeviceGetHandleByIndex_v2'
example.c:(.text+0x165): undefined reference to `nvmlErrorString'
example.c:(.text+0x19f): undefined reference to `nvmlDeviceGetName'
example.c:(.text+0x1bb): undefined reference to `nvmlErrorString'
example.c:(.text+0x1f3): undefined reference to `nvmlDeviceGetPciInfo_v2'
example.c:(.text+0x20f): undefined reference to `nvmlErrorString'
example.c:(.text+0x269): undefined reference to `nvmlDeviceGetComputeMode'
example.c:(.text+0x29d): undefined reference to `nvmlErrorString'
example.c:(.text+0x2ff): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x31b): undefined reference to `nvmlErrorString'
example.c:(.text+0x360): undefined reference to `nvmlErrorString'
example.c:(.text+0x3b5): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x3d1): undefined reference to `nvmlErrorString'
example.c:(.text+0x40c): undefined reference to `nvmlShutdown'
example.c:(.text+0x428): undefined reference to `nvmlErrorString'
example.c:(.text+0x45f): undefined reference to `nvmlShutdown'
example.c:(.text+0x47b): undefined reference to `nvmlErrorString'
collect2: error: ld returned 1 exit status
make: *** [example] Error 1
pranjal@PCL:~/nvidia$ make
cc -lnvidia-ml -L /usr/lib/nvidia-340/ example.o -o example
example.o: In function `main':
example.c:(.text+0x5f): undefined reference to `nvmlInit_v2'
example.c:(.text+0x7b): undefined reference to `nvmlErrorString'
example.c:(.text+0xb5): undefined reference to `nvmlDeviceGetCount_v2'
example.c:(.text+0xd1): undefined reference to `nvmlErrorString'
example.c:(.text+0x149): undefined reference to `nvmlDeviceGetHandleByIndex_v2'
example.c:(.text+0x165): undefined reference to `nvmlErrorString'
example.c:(.text+0x19f): undefined reference to `nvmlDeviceGetName'
example.c:(.text+0x1bb): undefined reference to `nvmlErrorString'
example.c:(.text+0x1f3): undefined reference to `nvmlDeviceGetPciInfo_v2'
example.c:(.text+0x20f): undefined reference to `nvmlErrorString'
example.c:(.text+0x269): undefined reference to `nvmlDeviceGetComputeMode'
example.c:(.text+0x29d): undefined reference to `nvmlErrorString'
example.c:(.text+0x2ff): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x31b): undefined reference to `nvmlErrorString'
example.c:(.text+0x360): undefined reference to `nvmlErrorString'
example.c:(.text+0x3b5): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x3d1): undefined reference to `nvmlErrorString'
example.c:(.text+0x40c): undefined reference to `nvmlShutdown'
example.c:(.text+0x428): undefined reference to `nvmlErrorString'
example.c:(.text+0x45f): undefined reference to `nvmlShutdown'
example.c:(.text+0x47b): undefined reference to `nvmlErrorString'
collect2: error: ld returned 1 exit status
make: *** [example] Error 1
Any help would be appreciated. Thank you.
Upvotes: 1
Views: 6798
Reputation: 151849
Here's what I did on a linux CUDA 7.5 setup:
Update the GPU driver to 352.79. In my case, this was done via the runfile installer here. If you have previously installed the GPU driver via the package manager method (e.g. .deb) then you don't want to use the runfile installer method.
get the latest version of the GDK (see note below), which at this time happens to target 352.79, and includes nvml:
wget --no-check-certificate http://developer.download.nvidia.com/compute/cuda/7.5/Prod/gdk/gdk_linux_amd64_352_79_release.run
install the GDK:
sh gdk_linux_amd64_352_79_release.run
verify that the appropriate libraries were updated:
ls /usr/lib64/libnv*
(and you should see libnvidia-ml.so.352.79
etc.)
compile the example file:
g++ -I./gdk352_79/usr/include -L/usr/lib64 -lnvidia-ml example.c -o example
When I run the example
executable, I get:
$ ./example
Found 2 devices
Listing devices:
0. Quadro 5000 [0000:02:00.0]
Changing device's compute mode from 'Default' to 'Prohibited'
Need root privileges to do that: Insufficient Permissions
1. GeForce GT 640 [0000:03:00.0]
Changing device's compute mode from 'Default' to 'Prohibited'
Need root privileges to do that: Insufficient Permissions
All done.
Press ENTER to continue...
$
Hopefully this will get you going. I am assuming you don't need help making any Makefile
changes if needed. If your Makefile
is not working, keep modifying it until you get the exact compile command I list in step 5.
NOTE: As of CUDA 8.0, the GDK is not a separate entity but is installed with CUDA 8.0 toolkit. It should not be necessary to install the GDK separately.
Upvotes: 1