Reputation: 33

error: taking reference of texture/surface variable not allowed in device/global functions

I have the following code in a .cu file that use CUDA:

#include "gpu_stgauss2.h"
#include "gpu_st.h"
#include "gpu_sampler.h"

static texture<float, 2, cudaReadModeElementType> s_texSRC1;
static texture<float4, 2, cudaReadModeElementType> s_texSRC4;

inline __host__ __device__ texture<float,2>& texSRC1() { return s_texSRC1; }
inline __host__ __device__ texture<float4,2>& texSRC4() { return s_texSRC4; }

static texture<float4, 2, cudaReadModeElementType> s_texST;
inline __host__ __device__ texture<float4,2>& texST() { return s_texST; }

They are later used in the same file as follows:

gpu_image<float> gpu_stgauss2_filter( const gpu_image<float>& src, const gpu_image<float4>& st, 
                                      float sigma, float max_angle, bool adaptive,
                                      bool src_linear, bool st_linear, int order, float step_size,
                                      float precision )
{     
    if (sigma <= 0) return src;
    gpu_image<float> dst(src.size());

    gpu_sampler<float, texSRC1> src_sampler(src, src_linear? cudaFilterModeLinear : cudaFilterModePoint);
    float cos_max = cosf(radians(max_angle));

    if (src.size() == st.size()) {
        gpu_sampler<float4, texST> st_sampler(st, st_linear? cudaFilterModeLinear : cudaFilterModePoint);
        if (order == 1) imp_stgauss2_filter<1,float><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 2) imp_stgauss2_filter<2,float><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 4) imp_stgauss2_filter<4,float><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
    } else {
        float2 s = make_float2((float)st.w() / src.w(), (float)st.h() / src.h());
        gpu_resampler<float4, texST> st_sampler(st, s, st_linear? cudaFilterModeLinear : cudaFilterModePoint);
        if (order == 1) imp_stgauss2_filter<1,float><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 2) imp_stgauss2_filter<2,float><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 4) imp_stgauss2_filter<4,float><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
    }
    GPU_CHECK_ERROR();
    return dst;
}


gpu_image<float4> gpu_stgauss2_filter( const gpu_image<float4>& src, const gpu_image<float4>& st, 
                                       float sigma, float max_angle, bool adaptive,
                                       bool src_linear, bool st_linear, int order, float step_size,
                                       float precision )
{     
    if (sigma <= 0) return src;
    gpu_image<float4> dst(src.size());

    gpu_sampler<float4, texSRC4> src_sampler(src, src_linear? cudaFilterModeLinear : cudaFilterModePoint);
    float cos_max = cosf(radians(max_angle));

    if (src.size() == st.size()) {
        gpu_sampler<float4, texST> st_sampler(st, st_linear? cudaFilterModeLinear : cudaFilterModePoint);
        if (order == 1) imp_stgauss2_filter<1,float4><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 2) imp_stgauss2_filter<2,float4><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 4) imp_stgauss2_filter<4,float4><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
    } else {
        float2 s = make_float2((float)st.w() / src.w(), (float)st.h() / src.h());
        gpu_resampler<float4, texST> st_sampler(st, s, st_linear? cudaFilterModeLinear : cudaFilterModePoint);
        if (order == 1) imp_stgauss2_filter<1,float4><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 2) imp_stgauss2_filter<2,float4><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
        else if (order == 4) imp_stgauss2_filter<4,float4><<<dst.blocks(), dst.threads()>>>(dst, src_sampler, st_sampler, sigma, cos_max, adaptive, step_size, precision);
    }
    GPU_CHECK_ERROR();
    return dst;
}

However, it will cause the following error:

error : taking reference of texture/surface variable not allowed in __device__/__global__ functions

I have few experience in CUDA. Can anyone help on how to fix it? Thanks.

Upvotes: 3

Answers (5)

Dean Zhou

Reputation: 1

I encountered the same problem when trying to compile the exactly same code. It turns out returning reference here is not necessary after all, using the trick in 'gpu_stbf2.cu' as suggested by ennetws.

These 3 functions are actually only called within this file, so move the struct definiton in gpu_sampler.h back to here, and instead of get the texture by calling these fuction, you can just use it directly. I've put the code on github here.

Upvotes: 0

ennetws

Reputation: 153

For anyone having the same problem, which in this case is from a GPU library found here, I managed to solve it by adapting the same strategy used elsewhere, e.g. "gpu_stbf2.cu". I managed to successfully compile with Cuda 6.0 and Visual Studio 2012 x64.

Upvotes: 0

herohuyongtao

Reputation: 50667

Try to downgrade your CUDA to 4.0. It will be OK in CUDA 4.0 for such code syntax. I once encountered a similar problem and CUDA 4.0 works for me.

Upvotes: 0

TripleS

Reputation: 1246

I'd highly suggest using bindless texture in cc 3.0 ++, because the unbind texture command would not have to synchronize the host thread

2nd, you shall consider using the new cash memory that was proposed in CC 3.0 + , In order to do so please simple specify the memory as

 const float* pArray;

3rd, In case you insist of using the old fashion texture , which is powerful for interpolation operations. In the global scope:

   texture     <float, cudaTextureType1D> textureFloat32_1D;

In your code bind the texture

cudaBindTexture ( NULL , textureFloat32_1D , ... ) ;

Inside the kernel use the texture as you wish...

float a = tex1Dfatch(textureFloat32_1D , location) ;

Outside of the kernel

cudaUnbindTexture(textureFloat32_1D );

Please note that multithreading applications using CUDA code shall have problems using the same texture variable as mention in case three (it's not protected ! )

Upvotes: 1

anderas

Reputation: 5854

The compiler error says it all: you aren't allowed to do what you tried. I would propose either using the variables directly (instead of accessing them via texSRC1() etc, or returning pointers instead of references.

Upvotes: 2

error: taking reference of texture/surface variable not allowed in __device__/__global__ functions

Answers (5)

Related Questions

error: taking reference of texture/surface variable not allowed in device/global functions