Reputation: 11
I'm trying to compile the cublas example from the CUDA documentation
//Example 2. Application Using C and CUBLAS: 0-based indexing
//-----------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include "cublas_v2.h"
#define M 6
#define N 5
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
static __inline__ void modify (cublasHandle_t handle, float *m, int ldm, int n, int p, int q, float alpha, float beta){
cublasSscal (handle, n-p, &alpha, &m[IDX2C(p,q,ldm)], ldm);
cublasSscal (handle, ldm-p, &beta, &m[IDX2C(p,q,ldm)], 1);
}
int main (void){
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
int i, j;
float* devPtrA;
float* a = 0;
a = (float *)malloc (M * N * sizeof (*a));
if (!a) {
printf ("host memory allocation failed");
return EXIT_FAILURE;
}
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
a[IDX2C(i,j,M)] = (float)(i * M + j + 1);
}
}
cudaStat = cudaMalloc ((void**)&devPtrA, M*N*sizeof(*a));
if (cudaStat != cudaSuccess) {
printf ("device memory allocation failed");
return EXIT_FAILURE;
}
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
return EXIT_FAILURE;
}
stat = cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("data download failed");
cudaFree (devPtrA);
cublasDestroy(handle);
return EXIT_FAILURE;
}
modify (handle, devPtrA, M, N, 1, 2, 16.0f, 12.0f);
stat = cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("data upload failed");
cudaFree (devPtrA);
cublasDestroy(handle);
return EXIT_FAILURE;
}
cudaFree (devPtrA);
cublasDestroy(handle);
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
printf ("%7.0f", a[IDX2C(i,j,M)]);
}
printf ("\n");
}
free(a);
return EXIT_SUCCESS;
}
I saved this file into "cudaexample.c" and am trying to compile with gcc cudaexample.c -I/usr/local/cuda/include -L/usr/local/cuda/lib -lcuda -lcublas
I get an undefined symbols error:
Undefined symbols for architecture x86_64:
"_cudaFree", referenced from:
_main in ccpPWjbO.o
"_cudaMalloc", referenced from:
_main in ccpPWjbO.o
ld: symbol(s) not found for architecture x86_64
collect2: error: ld returned 1 exit status
It seems like I've specified the commands properly as other symbols (e.g. cublasCreate) are found. Why are Free and Malloc not present?
Relevant details: OSX: 10.10.2 gcc: 4.8.4 (target: x86_64-apple-darwin14) Graphics: NVIDA GeForce GT 650M 1024 MB
I downloaded and installed the CUDA-6.5 toolkit
Upvotes: 1
Views: 713
Reputation: 151799
Those API functions (e.g. cudaMalloc
) are contained in the CUDA runtime library. You are not linking against that library, so those symbols aren't found during the link phase.
Add -lcudart
to your link flags:
-I/usr/local/cuda/include -L/usr/local/cuda/lib -lcuda -lcublas -lcudart
and it should fix that issue for you.
(-lcuda
is only needed if you are using CUDA driver API functions. You can remove that if you wish.)
Upvotes: 1