Reputation: 638
I am trying to compile some code which allows some CPU routines to call a function which uses the GPU to speed up some calculations. The GPU code uses Thrust, specifically reduce and device_ptr. When I build the GPU code as a standalone using nvcc, there are no problems. However, attempting to integrate the GPU code with CPU C++ code causes the following compiler error when compiling the final "wrapper":
nvcc -O2 -c NLC_2D_TFIM.cpp -lcuda -lcudart -lcublas -lcusparse -L../CUDA/Lanczos/sort/sort/gnu/release -lmgpusort
In file included from /usr/local/cuda/bin/../include/thrust/pair.h:265:0,
from /usr/local/cuda/bin/../include/thrust/tuple.h:35,
from /usr/local/cuda/bin/../include/thrust/detail/functional/actor.h:29,
from /usr/local/cuda/bin/../include/thrust/detail/functional/placeholder.h:20,
from /usr/local/cuda/bin/../include/thrust/functional.h:26,
from /usr/local/cuda/bin/../include/thrust/system/detail/error_category.inl:22,
from /usr/local/cuda/bin/../include/thrust/system/error_code.h:516,
from /usr/local/cuda/bin/../include/thrust/system/cuda_error.h:26,
from /usr/local/cuda/bin/../include/thrust/detail/backend/cuda/malloc.inl:26,
from /usr/local/cuda/bin/../include/thrust/detail/backend/cuda/malloc.h:50,
from /usr/local/cuda/bin/../include/thrust/detail/backend/dispatch/malloc.h:22,
from /usr/local/cuda/bin/../include/thrust/detail/device_malloc.inl:23,
from /usr/local/cuda/bin/../include/thrust/device_malloc.h:102,
from /usr/local/cuda/bin/../include/thrust/detail/backend/internal_allocator.h:22,
from /usr/local/cuda/bin/../include/thrust/detail/uninitialized_array.h:23,
from /usr/local/cuda/bin/../include/thrust/detail/backend/cuda/copy_cross_space.inl:20,
from /usr/local/cuda/bin/../include/thrust/detail/backend/cuda/copy_cross_space.h:57,
from /usr/local/cuda/bin/../include/thrust/detail/backend/cuda/dispatch/copy.h:23,
from /usr/local/cuda/bin/../include/thrust/detail/backend/cuda/copy.h:21,
from /usr/local/cuda/bin/../include/thrust/detail/backend/dispatch/copy.h:24,
from /usr/local/cuda/bin/../include/thrust/detail/backend/copy.inl:20,
from /usr/local/cuda/bin/../include/thrust/detail/backend/copy.h:44,
from /usr/local/cuda/bin/../include/thrust/detail/copy.inl:20,
from /usr/local/cuda/bin/../include/thrust/detail/copy.h:39,
from /usr/local/cuda/bin/../include/thrust/detail/reference_base.inl:18,
from /usr/local/cuda/bin/../include/thrust/detail/reference_base.h:138,
from /usr/local/cuda/bin/../include/thrust/device_reference.h:27,
from /usr/local/cuda/bin/../include/thrust/detail/device_ptr.inl:23,
from /usr/local/cuda/bin/../include/thrust/device_ptr.h:181,
from ../CUDA/Lanczos/hamiltonian.h:32,
from ../CUDA/Lanczos/lanczos.h:8,
from NLC_2D_TFIM.cpp:17:
/usr/local/cuda/bin/../include/thrust/detail/pair.inl: In function ‘bool thrust::operator<(const thrust::pair<T1, T2>&, const thrust::pair<T1, T2>&)’:
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:22: error: ‘.’ cannot appear in a constant-expression
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:46: error: ‘.’ cannot appear in a constant-expression
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:36: error: parse error in template argument list
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:36: error: ‘.’ cannot appear in a constant-expression
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:58: error: ‘.’ cannot appear in a constant-expression
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:69: error: ‘.’ cannot appear in a constant-expression
/usr/local/cuda/bin/../include/thrust/detail/pair.inl:72:12: error: parse error in template argument list
make: *** [NLC_2D_TFIM.o] Error 1
NLC_2D_TFIM
works with another module (Graphs
) which uses std::pair
s, but none of these are passed to the module which talks to the GPU. Every header uses std
as its namespace, not thrust
. All the parameters I'm passing to the GPU handler are regular C arrays, int
s, etc.
The lines referenced above are:
#include"lanczos.h"
Which uses:
#include"hamiltonian.h"
And then from there:
#include<thrust/device_ptr.h>
In NLC_2D_TFIM.cu
, the "wrapper", the code is:
ReadGraphsFromFile(fileGraphs, "rectanglegraphs.dat", TypeFlag); //graphs the information generated by the Graphs module
double J=1.;
for(int hh=1; hh<10; hh++) {
h = hh;
//Create some storage for things to be used in GPU functions
d_hamiltonian* HamilLancz = (d_hamiltonian*)malloc(HowMany*sizeof(d_hamiltonian));
parameters* data = (parameters*)malloc(HowMany*sizeof(parameters));
double** groundstates = (double**)malloc(HowMany*sizeof(double*));
double** eigenvalues = (double**)malloc(HowMany*sizeof(double*));
int* NumElem = (int*)malloc(HowMany*sizeof(int));
int** Bonds = (int**)malloc(HowMany*sizeof(int*));
//Go through each graph we read in earlier
unsigned int i = 1;
while ( i<fileGraphs.size() && fileGraphs.at(i)->Order < 14) { //skip the zeroth graph
//CPU gets the energy for smaller graphs
GENHAM HV(fileGraphs.at(i)->Order, J, h, fileGraphs.at(i)->AdjacencyList, TypeFlag);
LANCZOS lancz(HV.Vdim); //dimension of reduced Hilbert space (Sz sector)
HV.SparseHamJQ(); //generates sparse matrix Hamiltonian for Lanczos
energy = lancz.Diag(HV, 1, prm.valvec_, eVec);
i++;
}
if( argv[0] == "--gpu" || argv[0] == "-g" )
{
while ( i < fileGraphs.size() )
{
i += 30;
for( int j = 0; j < HowMany; j++)
{
Bonds[ j ] = (int*)malloc(sizeof(int)*3*fileGraphs.at(i - j)->Order);
for(unsigned int k = 0; k < fileGraphs.at(i - j)->Order; k++)
{
Bonds[ j ][ k ] = k;
Bonds[ j ][ k + fileGraphs.at(i - j)->Order ] = fileGraphs.at(i - j)->AdjacencyList.at(2*k).second;
Bonds[ j ][ k + 2*fileGraphs.at(i - j)->Order ] = fileGraphs.at(i - j)->AdjacencyList.at(2*k + 1).second;
}
data[ j ].Sz = 0;
data[ j ].dimension = 2;
data[ j ].J1 = J;
data[ j ].J2 = h;
data[ j ].modelType = 2;
eigenvalues[ j ] = (double*)malloc(sizeof(double));
}
//Calls the CPU functions which will talk to the GPU, including Thrust
ConstructSparseMatrix(HowMany, Bonds, HamilLancz, data, NumElem, 1);
lanczos(HowMany, NumElem, HamilLancz, groundstates, eigenvalues, 200, 1, 1e-12);
So there's nothing with an std::pair
that's getting passed to the GPU. Here are the thrust calls:
for(int i = 0; i < howMany; i++)
{
thrust::device_ptr<int> red_ptr(d_H[i].set);
numElem[i] = thrust::reduce(red_ptr, red_ptr + rawSize[i]);
}
Upvotes: 1
Views: 791
Reputation: 638
It turned out that problem was that I was linking against code using Blitz. Removing all the Blitz data structures and the include statements for it cleared up my compilation problem. Blitz uses its own namespace, so perhaps something in there was conflicting with thrust or there is a missing } or > somewhere.
Upvotes: 1
Reputation: 317
I'm not sure this is the right answer but if your file extension is cpp doesn't nvcc just pass it to the regular c++ compiler? What happens if you rename the file .cu?
(Also I am not sure if having -c
and all the libraries in the same compile command is needed - -c
usually suggests no linking is done.)
Upvotes: 1