Reputation: 805
I am working on a large application for which I need to perform loop unrolling on subsequent dependent loops for a certain procedure. I have written below a small sample piece of code to replicate the larger version.
Consider the original code:
void main()
{
int a[20] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
int b[20] = {10,9,8,7,6,5,4,3,2,1,20,19,18,17,16,15,14,13,12,11};
int i,j,k,l;
int nab =4, vab =10;
int dimi, dimj, dimij, dimk, diml, dimkl, dimijkl;
int count = 0;
for (i = nab+1; i< nab+vab; i++)
{
dimi = a[i];
for (j = i; j< nab+vab; j++)
{
dimj = b[j];
dimij = dimi*dimj;
count = count +1;
for (k = nab+1; k< nab+vab; k++)
{
dimk = a[k-1];
for (l =k; l< nab+vab; l++)
{
diml = a[l-1];
dimkl = dimk*diml;
dimijkl = dimij * dimkl;
}
}
}
}
printf ("Final dimension:%d \n ", dimijkl);
printf ("Count:%d \n ", count);
}
Now I am unrolling the loop i
by a factor of 2:
for (i = nab+1; i< nab+vab; i+=2)
{
dimi = a[i];
for (j = i; j< nab+vab; j++)
{
dimj = b[j];
dimij = dimi*dimj;
count = count +1;
for (k = nab+1; k< nab+vab; k++)
{
dimk = a[k-1];
for (l =k; l< nab+vab; l++)
{
diml = a[l-1];
dimkl = dimk*diml;
dimijkl = dimij * dimkl;
}
}
}
dimi = a[i+1];
for (j = i+1; j< nab+vab; j++)
{
dimj = b[j];
dimij = dimi*dimj;
count = count +1;
for (k = nab+1; k< nab+vab; k++)
{
dimk = a[k-1];
for (l =k; l< nab+vab; l++)
{
diml = a[l-1];
dimkl = dimk*diml;
dimijkl = dimij * dimkl;
}
}
}
}
printf ("Final dimension:%d \n ", dimijkl);
printf ("Count:%d \n ", count);
Now I wish to unroll the loop i
and j
by a factor of 2, but since loop j
depends on loop i
, I am a bit unsure as to how I should approach writing it. How can I rewrite the code to unroll both i
and j
by a factor of 2.
Also the code will become increasingly clumsier as i increase the unroll factor. Is there a clever way to unroll it manually, without the code becoming too ugly.
I cannot use compiler flags (example:-funroll-loops) in this particular case. I want approach it by manual loop unrolling.
Thank you for your time.
Upvotes: 0
Views: 254
Reputation: 3623
Perhaps not the most elegant solution, but you could use macros:
#define INNER_L_LOOP(l) \
{ \
diml = a[l-1]; \
dimkl = dimk*diml; \
dimijkl = dimij * dimkl; \
}
#define INNER_K_LOOP(k) \
{ \
dimk = a[k-1]; \
for (l =k; l< nab+vab; l++) \
{ \
INNER_L_LOOP(l) \
} \
}
#define INNER_J_LOOP(j) \
{ \
dimj = b[j]; \
dimij = dimi*dimj; \
count = count +1; \
\
for (k = nab+1; k< nab+vab; k += 2) \
{ \
INNER_K_LOOP(k) \
if (k+1 < nab+vab) \
INNER_K_LOOP(k+1) \
} \
}
#define INNER_I_LOOP(i) \
{ \
dimi = a[i]; \
for (j = i; j< nab+vab; j+= 2) \
{ \
INNER_J_LOOP(j) \
if (j+1 < nab+vab) \
INNER_J_LOOP(j+1) \
} \
}
for (i = nab+1; i< nab+vab; i+=2)
{
INNER_I_LOOP(i)
INNER_I_LOOP(i+1)
}
Here I've unrolled the i, j, and k loops by a factor of 2 each, and you'll notice that the innermost loop's code doesn't have to be repeated 8 times (which is what you were trying to avoid, if I understand correctly).
Also you'll notice that for the j and k loops, I'm checking that the index doesn't exceed the array bounds. This check has a slight performance impact, I'll leave it up to you to optimize. ;)
Upvotes: 0