Reputation: 406
I'm trying to optimize a CUDA kernel by breaking it into four separate kernels. I've prototyped all these kernels at the beginning of my code.
__global__ void knowles_flux__oligomers(double*, double*);
__global__ void knowles_flux__nucleus(double*, double*);
__global__ void knowles_flux__fibrils(double*, double*);
__global__ void knowles_flux__maxlength(double*, double*);
__device__ void calcFlux(double*, double*, double*);
... Code ...
__device__ void calcFlux(double* concs, double* fluxes, double* dt)
{
knowles_flux_fibrils<<< numBlocks, numThreads >>>(fluxes, concs);
cudaDeviceSynchronize();
knowles_flux_oligomers<<< 1, nc-1 >>>(fluxes, concs);
knowles_flux_nucleus<<< 1, 1 >>>(fluxes, concs);
knowles_flux_maxlength<<< 1, 1 >>>(fluxes, concs);
cudaDeviceSynchronize();
}
__global__ void knowles_flux_oligomers(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x + 1;
fluxes[idx] = 0;
}
__global__ void knowles_flux_nucleus(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x + nc - 1;
double frag_term = 0;
for (int s = idx+1; s < (maxlength); s++)
{
frag_term += conc[s];
}
fluxes[idx] = (kn)*pow(conc[0],(nc)) + 2*(km)*frag_term - 2*(ka)*conc[idx]*conc[0];
}
__global__ void knowles_flux_fibrils(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
double frag_term = 0;
for (int s = idx+1; s < (maxlength); s++)
{
frag_term += conc[s];
}
fluxes[idx] = -(km)*(idx)*conc[idx] + 2*(km)*frag_term - 2*(ka)*conc[idx]*conc[0] + 2*(ka)*conc[idx-1]*conc[0];
}
__global__ void knowles_flux_maxlength(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x + maxlength - 1;
fluxes[idx] = -km*(idx)*conc[idx]+2*(ka)*conc[idx-1]*conc[0];
}
Leading to the error "fatbinary : fatal error : 'Multiple Flux Kernel).sm_35.cubin'is not in 'keyword=value' format" where the 'Multiple Flux Kernel)' part is the trailing end of the source file I try to compile.
Maybe my google-fu is weak, but I'm not coming up with anything for this kind of error.
Upvotes: 0
Views: 1009
Reputation: 406
OK, the problem had nothing to do with the code. The error actually did have to do with the filename of the source file I was trying to compile. The filename originally was "GPU RKF45 (Variable Step Size, Multiple Flux Kernel).cu". The comma in the filename seemed to cause the problem. Getting rid of that let it compile.
Upvotes: 1