Reputation: 808

CUDA Image Rotation

I am having trouble implementing image rotation in CUDA. I have a very simple Rotate function working as follows:

__device__ float readPixVal( float* ImgSrc,int ImgWidth,int x,int y)
{
    return (float)ImgSrc[y*ImgWidth+x];
}
__device__ void putPixVal( float* ImgSrc,int ImgWidth,int x,int y, float floatVal)
{
    ImgSrc[y*ImgWidth+x] = floatVal;
}

__global__ void Rotate(float* Source, float* Destination, int sizeX, int sizeY, float deg)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;// Kernel definition
    int j = blockIdx.y * blockDim.y + threadIdx.y;

    if(i < sizeX && j < sizeY)
    {
        putPixVal(Destination, sizeX, ((float)i)*cos(deg) - ((float)j)*sin(deg), ((float)i)*sin(deg) + ((float)j)*cos(deg)), readPixVal(Source, sizeX, i, j));
    }
}

The problem is, I do not know how to do any interpolation. With the above, many pixels are skipped due to integer roundoff. Anyone know how to fix this, or are there any free/opensource implementations of image rotate? I could not find any for CUDA.

Upvotes: 3

Answers (4)

Weixiong Zheng

Reputation: 31

Basically @James Cotters's answer but a bit modification. Since you are reading at newX and newY for pix val, and calculation of newX and newY are based on after-rotation x y values, then I think the pre-rotation values (that is "newX" and "newY", not sure why using "new" in the names, should be "old", right?) should be achieved by using inverse rotation matrix (which is equivelently apply "-deg" in the positions where deg is used) instead of the rotation matrix above. i.e.

newX = ((float)i-xc)*cos(deg) + ((float)j-yc)*sin(deg) + xc;
newy = -((float)i-xc)*sin(deg) + ((float)j-yc)*cos(deg) + yc;

Hope I understand this right XD

Upvotes: 0

3dhelp

Reputation: 21

void rotateImage_Kernel(cufftComplex* trg, const cufftComplex* src, const unsigned int imageWidth,const unsigned int imageHeight, const float angle, const float scale)
{
    // compute thread dimension
    const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

    //// compute target address
    const unsigned int idx = x + y * imageWidth;

    const int xA = (x - imageWidth/2 );
    const int yA = (y - imageHeight/2 );

    const int xR = (int)floor(1.0f/scale * (xA * cos(angle) - yA * sin(angle)));
    const int yR = (int)floor(1.0f/scale * (xA * sin(angle) + yA * cos(angle)));

    float src_x = xR + imageWidth/2;
    float src_y = yR + imageHeight/2;



     if ( src_x >= 0.0f && src_x < imageWidth && src_y >= 0.0f && src_y < imageHeight) {
        // BI - LINEAR INTERPOLATION
        float src_x0 = (float)(int)(src_x);
        float src_x1 = (src_x0+1);
        float src_y0 = (float)(int)(src_y);
        float src_y1 = (src_y0+1);

        float sx = (src_x-src_x0);
        float sy = (src_y-src_y0);


        int idx_src00 = min(max(0.0f,src_x0   + src_y0 * imageWidth),imageWidth*imageHeight-1.0f);
        int idx_src10 = min(max(0.0f,src_x1   + src_y0 * imageWidth),imageWidth*imageHeight-1.0f);
        int idx_src01 = min(max(0.0f,src_x0   + src_y1 * imageWidth),imageWidth*imageHeight-1.0f);
        int idx_src11 = min(max(0.0f,src_x1   + src_y1 * imageWidth),imageWidth*imageHeight-1.0f);

        trg[idx].y = 0.0f;

        trg[idx].x  = (1.0f-sx)*(1.0f-sy)*src[idx_src00].x;
        trg[idx].x += (     sx)*(1.0f-sy)*src[idx_src10].x;
        trg[idx].x += (1.0f-sx)*(     sy)*src[idx_src01].x;
        trg[idx].x += (     sx)*(     sy)*src[idx_src11].x;
    } else {
        trg[idx].x = 0.0f;
        trg[idx].y = 0.0f;
     }

    DEVICE_METHODE_LAST_COMMAND;

}


void translateImage_Kernel(cufftComplex* trg, const cufftComplex* src, const unsigned int imageWidth, const unsigned int imageHeight, const float tX, const float tY)
{
    // compute thread dimension
    const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

    //// compute target address
    const unsigned int idx = x + y * imageWidth;

    const int xB = ((int)x + (int)tX );
    const int yB = ((int)y + (int)tY );

    if ( xB >= 0 && xB < imageWidth && yB >= 0 && yB < imageHeight) {
        trg[idx] = src[xB + yB * imageWidth];
    } else {
        trg[idx].x = 0.0f;
        trg[idx].y = 0.0f;
    }

    DEVICE_METHODE_LAST_COMMAND;

}

Upvotes: 2

Martin Beckett

Reputation: 96139

Generally in this sort of image manipulation you loop over all destination pixel positions calculating the corresponding pixel (or interpolating groups of pixels) in the source image.

This ensures that you evenly and uniformly fill the resulting image which is normally what you care about.

Upvotes: 5

James Cotter

Reputation: 808

This seems to do the trick

__global__ void Rotate(float* Source, float* Destination, int sizeX, int sizeY, float deg)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;// Kernel definition
    int j = blockIdx.y * blockDim.y + threadIdx.y;
    int xc = sizeX - sizeX/2;
    int yc = sizeY - sizeY/2;
    int newx = ((float)i-xc)*cos(deg) - ((float)j-yc)*sin(deg) + xc;
    int newy = ((float)i-xc)*sin(deg) + ((float)j-yc)*cos(deg) + yc;
    if (newx >= 0 && newx < sizeX && newy >= 0 && newy < sizeY)
    {
        putPixVal(Destination, sizeX, i , j, readPixVal(Source, sizeX, newx, newy));
    }
}

Upvotes: 0

CUDA Image Rotation

Answers (4)

Related Questions