matrix inversion for complex numbers by Gauss-Jordan method in cuda

Question

I'm trying to invert a matrix composed of complex numbers, where I'm using matrix inversion code for real numbers posted in the following link by 'user' cuda matrix inverse gaussian jordan

code compiles, no bugs, but problem is output is wrong! I don't know where I went wrong. Can anyone, please, help. Thank you in advance!

here is the complete code:

#include 
#include 
#include 
#include 
#include 
#pragma comment(lib, "cuda.lib")
#pragma comment(lib, "cudart.lib")
#include 
#include 
#include 
#include 
#include "device_launch_parameters.h"
#include 
#include "cuComplex.h"
#include 

__device__ __host__ cuDoubleComplex  operator*(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a,b); }
__device__ __host__ cuDoubleComplex  operator+(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a,b); }
__device__ __host__ cuDoubleComplex  operator/(cuDoubleComplex a, cuDoubleComplex b) { return cuCdiv(a,b); }
__device__ __host__ cuDoubleComplex  operator-(cuDoubleComplex a, cuDoubleComplex b) { return cuCsub(a,b); }

using namespace std;

 __global__ void gaussjordan(cuDoubleComplex *A,  cuDoubleComplex *I,int n, int i)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    cuDoubleComplex P;

    if(xi){ 
            P=A[x*n+i]/A[i*n+i];
            I[x*n+y] = I[x*n+y] - I[i*n+y]*P; 
            if(y>=i){ 
                A[x*n+y] = A[x*n+y] - A[i*n+y]*P;  
            }
        }
 }


 __global__ void dev(cuDoubleComplex *d_A,  cuDoubleComplex *dI, int h)
{
    cuDoubleComplex temp = make_cuDoubleComplex(0,0);
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    if(x>>(d_A, dI, n, i);
    }
    dev<<>>(d_A, dI, n); 

    cudaMemcpy(iL, dI, ddsize, cudaMemcpyDeviceToHost ); 
    cudaMemcpy(L, d_A, ddsize, cudaMemcpyDeviceToHost ); 

    cudaEventRecord( stop, 0 );
    cudaEventSynchronize( stop );
    cudaEventElapsedTime( &time, start, stop );
    cudaEventDestroy( start );
    cudaEventDestroy( stop );


    for(int i=0;i



Thank you @RobertCrovella for ur fast and very insightful suggestion! Regarding your answer to my question: I changed my threadsPerBlock(4,4) and numBlocks(1,1) so I'll be using 1 block with 16 threads for my 4x4 matrix. My input matrix is the following 

1  0  0  0
0  2  0  0 
0  0  3  0
0  0  0  4


all numbers are real in here, then expected inverted matrix should look like

1   0    0   0
0   1/2  0   0 
0   0    1/3 0
0   0    0   1/4


and i'm not getting this at all. I inputted cuda memcheck tool to  see if my kernel is not lunching 
but it didn't show any error massages. I started learning CUDA very recently and don't have much experience. Can anyone give more detailed response? Thank You!

here is my modified code.

#include 
#include 
#include 
#include 
#include 
#pragma comment(lib, "cuda.lib")
#pragma comment(lib, "cudart.lib")
#include 
#include 
#include 
#include 
#include "device_launch_parameters.h"
#include 
#include "cuComplex.h"
#include 

__device__ __host__ cuDoubleComplex  operator*(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a,b); }
__device__ __host__ cuDoubleComplex  operator+(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a,b); }
__device__ __host__ cuDoubleComplex  operator/(cuDoubleComplex a, cuDoubleComplex b) { return cuCdiv(a,b); }
__device__ __host__ cuDoubleComplex  operator-(cuDoubleComplex a, cuDoubleComplex b) { return cuCsub(a,b); }

using namespace std;

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d
", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}




 __global__ void gaussjordan(cuDoubleComplex *A,  cuDoubleComplex *I,int n, int i)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    cuDoubleComplex P;

    if(xi){ 
            P=A[x*n+i]/A[i*n+i];
            I[x*n+y] = I[x*n+y] - I[i*n+y]*P; 
            if(y>=i){ 
                A[x*n+y] = A[x*n+y] - A[i*n+y]*P;  
            }
        }
 }


 __global__ void dev(cuDoubleComplex *d_A,  cuDoubleComplex *dI, int h)
{
    cuDoubleComplex temp = make_cuDoubleComplex(0,0);
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    if(x>>(d_A, dI, n, i);
        gpuErrchk( cudaPeekAtLastError() );
    }
    dev<<>>(d_A, dI, n); 

    gpuErrchk( cudaPeekAtLastError() );

    gpuErrchk(cudaMemcpy(iL, dI, ddsize, cudaMemcpyDeviceToHost )); 
    gpuErrchk(cudaMemcpy(L, d_A, ddsize, cudaMemcpyDeviceToHost )); 

    cudaEventRecord( stop, 0 );
    cudaEventSynchronize( stop );
    cudaEventElapsedTime( &time, start, stop );
    cudaEventDestroy( start );
    cudaEventDestroy( stop );


    for(int i=0;i

matrix inversion for complex numbers by Gauss-Jordan method in cuda

Answers (1)

Related Questions