Jan Zapletal
Jan Zapletal

Reputation: 41

calling a __host__ function from a __host__ __device__ function

When compiling the MWE

#include <iostream>
#include "cuda.h"

struct Foo{
///*
    Foo( ){
      std::cout << "Construct" << std::endl;
    }

    Foo( const Foo & that ){
      std::cout << "Copy construct" << std::endl;
    }
//*/
   __host__ __device__
   int bar( ) const {
     return 0;
   }
};

template<typename CopyBody>
__global__ 
void kernel( CopyBody cBody ){
  cBody( );
}

template <typename CopyBody>
void wrapper( CopyBody && cBody ){
  std::cout << "enquing kernel" << std::endl;
  kernel<<<1,32>>>( cBody );
  std::cout << "kernel enqued" << std::endl;
}

int main(int argc, char** argv) {

  Foo foo;

  std::cout << "enquing kernel" << std::endl;
  kernel<<<1,32>>>( [=] __device__ ( ) { foo.bar( ); } );
  std::cout << "kernel enqued" << std::endl;
  cudaDeviceSynchronize( );

  wrapper( [=] __device__ ( ) { foo.bar( ); } );
  cudaDeviceSynchronize( );
  
  return 0;
}

with CUDA 10.1 (nvcc --expt-extended-lambda test.cu -o test) the compiler warns about test.cu(16): warning: calling a __host__ function("Foo::Foo") from a __host__ __device__ function("") is not allowed. However, the copy constructor is never called on the device. CUDA 9.1 does not produce this warning.

The given MWE is a based on a larger project, where the wrapper decides whether to use a __device__ or __host__ lambda. The constructors/destructors cannot be marked as __host__ __device__ since they need to be called on CPU only ((de)allocating CUDA memory) - this or deleting the constructors/destructor (and letting the compilers to create the default __host__ and __device__ versions) would otherwise help.

Upvotes: 4

Views: 1471

Answers (1)

Laszlo
Laszlo

Reputation: 801

With the following modifications I don't get mentioned warnings: ( I used CUDA 10.1 on Windows 10 )

#include <stdio.h>

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

struct Baz {

   Baz() {
      printf("%s: Construct\n", __FUNCTION__);
   }

   Baz(const Baz & that) {
      printf("%s: Copy Construct\n", __FUNCTION__);
   }

};

struct Foo: 
   public Baz {

   __host__ __device__ 
   int bar() const  {
      return 0;
   } 
};

template<typename CopyBody>
__global__
void kernel(CopyBody cBody) {
   cBody();
}

template <typename CopyBody>
void wrapper(CopyBody && cBody) {
   printf("%s: enquing kernel\n",__FUNCTION__);
   kernel << <1, 32 >> > (cBody);
   printf("%s: kernel enqued\n", __FUNCTION__);
}

int main(int argc, char** argv) {

   Foo foo;

   printf("%s: enquing kernel\n", __FUNCTION__);   
   kernel << <1, 32 >> > ([=] __device__() { foo.bar(); });
   printf("%s: kernel enqued\n", __FUNCTION__);   
   cudaDeviceSynchronize();

   wrapper([=] __device__() { foo.bar(); });
   cudaDeviceSynchronize();

   return 0;
}

The above code produces the following output:

Foo::Foo: Construct
main: enquing kernel
Foo::Foo: Copy Construct
Foo::Foo: Copy Construct
main: kernel enqued
Foo::Foo: Copy Construct
Foo::Foo: Copy Construct
wrapper: enquing kernel
Foo::Foo: Copy Construct
wrapper: kernel enqued

I replaced <iostream> with <stdio.h> for convenience. printf() works from the kernel.

Upvotes: 1

Related Questions