Reputation: 75
I was trying to find maximum element of any sub matrix AXB from a matrix NXM. I implemented sparse tree method. But I was unable to optimise this. Actually I need to solve range queries, so I need to optimize the code. On doing pre-computation, for any N,M values it takes O(NMlog(N)log(M)) time complexity. How can I improve this to (N*M). Here is my code for precomputation
for(int i=0; i<n;i++)
for(int j=0; j<m;j++)
cin>>arr[i][j];
for(int i=0; pow(2,i) <= n; i++)
for(int j=0; pow(2,j) <= m; j++)
for(int x=0; x + pow(2,i)-1 < n; x++)
for(int y = 0; y + pow(2,j) -1 < m; y++)
{
i=(int)i;
j=(int)j;
if (i == 0 && j == 0)
M[x][y][i][j] = arr[x][y];
else if (i == 0)
M[x][y][i][j] = maxi(2,M[x][y][i][j-1], M[x][(int)(y+pow(2,(j-1)))][i][j-1]);
else if (j == 0)
M[x][y][i][j] = maxi(2,M[x][y][i-1][j], M[(int)(x+ pow(2,(i-1)))][y][i-1][j]);
else
M[x][y][i][j] = maxi(4,M[x][y][i-1][j-1], M[(int)(x + pow(2,(i-1)))][y][i-1][j-1], M[x][(int)(y+pow(2,(j-1)))][i-1][j-1], M[(int)(x + pow(2,(i-1)))][(int)(y+pow(2,(j-1)))][i-1][j-1]);
}
For input x,y,x1,y1
k = log(x1 - x + 1);
l = log(y1 - y + 1);
int max_element = max(4,M[x][y][k][l], M[(int)(x1 - pow(2,k) + 1)][y][k][l], M[x][(int)(y1 - pow(2,l) + 1)][k][l], M[(int)(x1 - pow(2,k) + 1)][(int)(y1 - pow(2,l) + 1)][k][l]);
How can Improve performance of this code. Please help.
Upvotes: 3
Views: 418
Reputation: 195
This solution is not better than O(N*M*Log(N)*Log(M))
, but it is better than your implementation.
By seeing your order of execution of for
loops and accessing of array M
, There will be too many Memory jump and cache miss which cause program to run slow.
Example:-
See the time taken by following Loops:
int M[1000][1000][11][11];
for(int i = 0 ; i <= 10 ; i++){
for(int j = 0 ; j <= 10 ; j++){
for(int x = 0 ; x < 1000 ; x++){
for(int y = 0 ; y < 1000 ; y++){
M[x][y][i][j] = 1;
}
}
}
}
Above execution take 1.9 sec.
int M[11][11][1000][1000];
for(int i = 0 ; i <= 10 ; i++){
for(int j = 0 ; j <= 10 ; j++){
for(int x = 0 ; x < 1000 ; x++){
for(int y = 0 ; y < 1000 ; y++){
M[i][j][x][y] = 1;
}
}
}
}
And this one takes only 0.2 sec. So always try to write loops such that there is sequential access of memory.
For more details you can read here.
So if you change your code in following manner it will be much more faster:
M[Log(n)][Log(m)][n][m];
for(int i=0; (1<<i) <= n; i++)
for(int j=0; (1<<j) <= m; j++)
for(int x=0; x + (1<<i)-1 < n; x++)
for(int y = 0; y + (1<<j) -1 < m; y++)
{
i=(int)i;
j=(int)j;
if (i == 0 && j == 0)
M[i][j][x][y] = arr[x][y];
else if (i == 0)
M[i][j][x][y] = maxi(2,M[i][j-1][x][y], M[i][j-1][x][(y+(1<<(j-1)))]);
else if (j == 0)
M[i][j][x][y] = maxi(2,M[i-1][j][x][y], M[i-1][j][(x+ (1<<(i-1)))][y]);
else
M[i][j][x][y] = maxi(4,M[i-1][j-1][x][y], M[i-1][j-1][(x + (1<<(i-1)))][y], M[i-1][j-1][x][(y+(1<<(j-1)))], M[i-1][j-1][(x + (1<<(i-1)))][(y+(1<<(j-1)))]);
}
And one more optimization can be done, if you are calculation log()
too many times (i.e order of 10^5 or greater) than you can use 31-__builtin_clz()
instead of log()
.
k = 31-__builtin_clz(x1 - x + 1);
l = 31-__builtin_clz(y1 - y + 1);
int max_element = max(4,M[k][l][x][y], M[k][l][(x1 - (1<<k) + 1)][y], M[k][l][x][(y1 - (1<<l) + 1)], M[k][l][(x1 - (1<<k) + 1)][(y1 - (1<<l) + 1)]);
Upvotes: 3