Reputation: 79
I'm trying to optimize a smooth function which given an image smooths/blurs the edges by replacing every pixel in the image with the average of the pixels surrounding it(the dimension of the image is a matrix). The code to optimize is as follows:
int i, j, ii, jj;
pixel_sum ps;
for (j = 0; j < dim; j++){
for (i = 0; i < dim; i++){
initialize_pixel_sum(&ps);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++){
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++){
accumulate_sum(&ps, src[RIDX(ii,jj,dim)]);
}
}
dst[RIDX(i,j,dim)].red = ps.red/ps.num;
dst[RIDX(i,j,dim)].green = ps.green/ps.num;
dst[RIDX(i,j,dim)].blue = ps.blue/ps.num;
}
}
I found an optimized version of which looks like the below:
int i, j, myJ;
//cornors
dst[0].red = (src[0].red+src[1].red+src[dim].red+src[dim+1].red)>>2;
dst[0].blue = (src[0].blue+src[1].blue+src[dim].blue+src[dim+1].blue)>>2;
dst[0].green = (src[0].green+src[1].green+src[dim].green+src[dim+1].green)>>2;
i = dim*2-1;
dst[dim-1].red = (src[dim-2].red+src[dim-1].red+src[i-1].red+src[i].red)>>2;
dst[dim-1].blue = (src[dim-2].blue+src[dim-1].blue+src[i-1].blue+src[i].blue)>>2;
dst[dim-1].green = (src[dim-2].green+src[dim-1].green+src[i-1].green+src[i].green)>>2;
j = dim*(dim-1);
i = dim*(dim-2);
dst[j].red = (src[j].red+src[j + 1].red+src[i].red+src[i + 1].red)>>2;
dst[j].blue = (src[j].blue+src[j + 1].blue+src[i].blue+src[i + 1].blue)>>2;
dst[j].green = (src[j].green+src[j + 1].green+src[i].green+src[i + 1].green)>>2;
j = dim*dim-1;
i = dim*(dim-1)-1;
dst[j].red = (src[j - 1].red+src[j].red+src[i - 1].red+src[i].red)>>2;
dst[j].blue = (src[j - 1].blue+src[j].blue+src[i - 1].blue+src[i].blue)>>2;
dst[j].green = (src[j - 1].green+src[j].green+src[i - 1].green+src[i].green)>>2;
//sides
i = dim - 1;
for (j = 1; j < i; j++)
{
dst[j].red = (src[j].red+src[j-1].red+src[j+1].red+src[j+dim].red+src[j+1+dim].red+src[j-1+dim].red)/6;
dst[j].green = (src[j].green+src[j-1].green+src[j+1].green+src[j+dim].green+src[j+1+dim].green+src[j-1+dim].green)/6;
dst[j].blue = (src[j].blue+src[j-1].blue+src[j+1].blue+src[j+dim].blue+src[j+1+dim].blue+src[j-1+dim].blue)/6;
}
i = dim*dim-1;
for (j = i - dim + 2; j < i; j++)
{
dst[j].red = (src[j].red+src[j-1].red+src[j+1].red+src[j-dim].red+src[j+1-dim].red+src[j-1-dim].red)/6;
dst[j].green = (src[j].green+src[j-1].green+src[j+1].green+src[j-dim].green+src[j+1-dim].green+src[j-1-dim].green)/6;
dst[j].blue = (src[j].blue+src[j-1].blue+src[j+1].blue+src[j-dim].blue+src[j+1-dim].blue+src[j-1-dim].blue)/6;
}
for (j = dim+dim-1; j < dim*dim-1; j+=dim)
{
dst[j].red = (src[j].red+src[j-1].red+src[j-dim].red+src[j+dim].red+src[j-dim-1].red+src[j-1+dim].red)/6;
dst[j].green = (src[j].green+src[j-1].green+src[j-dim].green+src[j+dim].green+src[j-dim-1].green+src[j-1+dim].green)/6;
dst[j].blue = (src[j].blue+src[j-1].blue+src[j-dim].blue+src[j+dim].blue+src[j-dim-1].blue+src[j-1+dim].blue)/6;
}
i = i - (dim - 1);
for (j = dim; j < i; j+=dim)
{
dst[j].red = (src[j].red+src[j-dim].red+src[j+1].red+src[j+dim].red+src[j+1+dim].red+src[j-dim+1].red)/6;
dst[j].green = (src[j].green+src[j-dim].green+src[j+1].green+src[j+dim].green+src[j+1+dim].green+src[j-dim+1].green)/6;
dst[j].blue = (src[j].blue+src[j-dim].blue+src[j+1].blue+src[j+dim].blue+src[j+1+dim].blue+src[j-dim+1].blue)/6;
}
myJ = dim;
for (i = 1; i < dim-1; i++)
{
for (j = 1; j < dim-1; j++)
{
myJ ++;
dst[myJ].red = (src[myJ-1].red+src[myJ].red+src[myJ+1].red+src[myJ-dim-1].red+src[myJ-dim].red+src[myJ-dim+1].red+src[myJ+dim-1].red+src[myJ+dim].red+src[myJ+dim+1].red)/9;
dst[myJ].green = (src[myJ-1].green+src[myJ].green+src[myJ+1].green+src[myJ-dim-1].green+src[myJ-dim].green+src[myJ-dim+1].green+src[myJ+dim-1].green+src[myJ+dim].green+src[myJ+dim+1].green)/9;
dst[myJ].blue = (src[myJ-1].blue+src[myJ].blue+src[myJ+1].blue+src[myJ-dim-1].blue+src[myJ-dim].blue+src[myJ-dim+1].blue+src[myJ+dim-1].blue+src[myJ+dim].blue+src[myJ+dim+1].blue)/9;
}
myJ += 2;
}
Can someone explain how this optimization works?
Upvotes: 1
Views: 386
Reputation: 45057
Since you don't show the definition of many things (accumulate_sum
, pixel_sum
, RIDX
, etc), it's hard to tell for certain. It appears that the first version of the code iterates over the data by row and column, and the second version handles the corner points first, then the edges, and then the rest of the data. You're processing a pixel based on the pixels around it. Corners and edges have fewer neighboring pixels, so processing them is a bit easier. Breaking out these special cases allows the code for them to be simplified. The second version then unrolls the innermost two loops while processing the rest of the data, which is only possible because you already eliminated all of the special cases (edges and corners).
Whether or not this change is actually an "optimization" is left as an exercise to the reader. You'd need to run performance tests on both versions to know for sure. Even if the second version ends up being more efficient, the first version is far more readable.
Upvotes: 1