Reputation: 31
I'm trying to solve this algorithm problem: finding the biggest square with only one value in a numpy array.
Example Image :
My code taking too much time. Is there way to improve the speed?
import numpy as np
answer = 0
def allsame(board):
memory = board[0,0]
board = np.matrix(board)
for i in range(board[0].size):
for j in range(board[0].size):
if board[i,j] != memory: return False
return True
def findLargestSquare(board):
global answer
list = []
a = np.matrix(board)
if a[0].size == 1 or a[:,1].size==1: return answer
if a[1].size < a[:,1].size: ran = a[1].size
else: ran = a[:,1].size
for i in range(ran+1):
for j in range(ran+1):
if a[i:j,i:j].size > 0 and allsame(a[i:j,i:j])==True:
if a[i:j,i:j].size > answer:
list.append(a[i:j,i:j].size)
answer = a[i:j,i:j].size
findLargestSquare(a[1:])
return findLargestSquare(a[:,1:])
return answer
#testBoard = [['x','o','g'],['b','a','j'],['u','q','p']]
testBoard = [['X','O','O','O','X'],['X','O','O','O','O'],['X','X','O','O','O'],['X','X','O','O','O'],['X','X','X','X','X']]
print(findLargestSquare(testBoard))
I changed My code to self convolution method. can you take a look at which part is wrong?
import numpy as np
import time
answer = 0
def findLargestSquare(board):
global answer
a = np.array(board)
for k in reversed(range(a[0].size + 1)):
conv_size = k
for i in range(a[0].size - conv_size + 1):
num = i
for j in range(a[0].size - conv_size + 1):
#print('i:', i, 'j:', j)
print(a[i:i + conv_size, j:j + conv_size])
#print('unique: ',np.unique(a[i:i+ conv_size,j:j+conv_size]).size)
if(np.unique(a[i:i+ conv_size,j:j+conv_size]).size == 1):
#print("returning")
return len(a[i:i+ conv_size,j:j+conv_size])**2
num = num + 1
print("================")
return len(a[i:i+ conv_size,j:j+conv_size])**2
# testBoard = [['x','o','g'],['b','a','j'],['u','q','p']]
testBoard = [['X', 'O', 'O', 'O', 'X'], ['X', 'O', 'O', 'O', 'O'], ['X', 'X', 'O', 'O', 'O'], ['X', 'X', 'O', 'O', 'O'],
['X', 'X', 'X', 'X', 'X']]
print(findLargestSquare(testBoard))
Upvotes: 3
Views: 2684
Reputation: 3181
Since no one has provided the DP method example, here it is:
def findLargestSquare(M):
n = len(M)
S = [[0 for _ in range(n)] for _ in range(n)]
max_s = 0
for i in range(1, n):
for j in range(1, n):
if M[i][j] == 1:
S[i][j] = min(S[i][j-1], S[i-1][j], S[i-1][j-1]) + 1
if S[i][j] > max_s:
max_s = S[i][j]
return max_s
As a comparison, here's a brute force method which I came up with during a job interview (it's about 100 times slower for a 500x500 matrix):
def findLargestSquare(arr):
if sum([sum(row) for row in arr]) == 0:
return 0
n = len(arr)
for k in range(n, 1, -1): #check every kxk square from k=n to k=2
for i in range(n-k+1):
#for every row i check every starting position j from j=0 to j=n-k
j_next = 0
for j in range(n-k+1):
if j > j_next:
#break as soon as an element of this row segment is not 1:
count = 0
for p in range(j, j+k, 1):
if arr[i][p] != 1:
j_next = p
break
else: ##check the rows below:
keep_going = True
for r in range(i+1, i+k, 1):
if keep_going:
for p in range(j, j+k, 1):
if arr[r][p] != 1:
keep_going = False
j_next = p
break
else:
count += 1
if count == k-1: #square is found
return k
else:
return 1
Upvotes: 0
Reputation: 1722
I fixed up the first answer:
def get_best_size(a, best_size, i, j, x):
if i+1 >= a.shape[0] or j+1 >= a.shape[1]:
return 1
if not (a[i, j] == a[i+1, j] == a[i, j+1] == x):
return 1
min_neighbor_best_size = int(min(best_size[i+1, j], best_size[i, j+1]))
if a[i, j] == a[i + min_neighbor_best_size , j + min_neighbor_best_size ]:
return min_neighbor_best_size + 1
else:
return min_neighbor_best_size
# looking for largest X block in A
def doGetBest(a, x):
best_size = np.ones(a.shape)
best = 1
best_pair = (0,0)
for i in range(a.shape[0]-1,-1,-1):
for j in range(a.shape[1]-1,-1,-1):
best_size[i, j] = get_best_size(a, best_size, i, j, x)
if best < best_size[i, j]:
best = best_size[i, j]
best_pair = (i,j)
return best, best_pair
Upvotes: 0
Reputation: 7058
import numpy as np
import time
answer = 0
def findLargestSquare(board):
global answer
a = np.array(board)
for k in reversed(range(a[0].size + 1)):
conv_size = k
for i in range(a[0].size - conv_size + 1):
num = i
for j in range(a[0].size - conv_size + 1):
#print('i:', i, 'j:', j)
print(a[i:i + conv_size, j:j + conv_size])
#print('unique: ',np.unique(a[i:i+ conv_size,j:j+conv_size]).size)
if(np.unique(a[i:i+ conv_size,j:j+conv_size]).size == 1):
#print("returning")
return len(a[i:i+ conv_size,j:j+conv_size])**2
num = num + 1
print("================")
return len(a[i:i+ conv_size,j:j+conv_size])**2
answer
is unnecessary and used nowhere. It is certainly not needed to use a global
in the algorithm
The conv_size = k
and num = i
are unnecessary. If you want to name them like that, then do so in the for-loop, num
isn't even used in the rest of the algorithm
range(a[0].size)
This creates a new array each time (or a view
, I'm not 100% sure), and if the a[1]
is shorter, will give problems. max_size = min(a.shape)
is cleaner
Instead of the loop twice, you can use itertools.product
to generate the coordinates of the edge of the sub-square
for size in reversed(range(2, max_size + 1)):
k = max_size = size + 1
for i, j in itertools.product(range(k), repeat=2):
returns a generator with this result for 5:
[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]
if(np.unique(a[i:i+ conv_size,j:j+conv_size]).size == 1):
#print("returning")
return len(a[i:i+ conv_size,j:j+conv_size])**2
can be done cleaner without the double creation of the array
or view
sub_square = a[i:i+size, j:j+size]
if len(np.unique(sub_square)) == 1:
return len(sub_square) ** 2
# calculations
import itertools
import numpy as np
def findLargestSquare(board):
a = np.array(board)
max_size = min(a.shape)
for size in reversed(range(2, max_size + 1)):
# print('looking for identical squares of size: ', size)
k = max_size - size + 1
for i, j in itertools.product(range(k), repeat=2):
# print('checking position: ', (i, j))
sub_square = a[i:i+size, j:j+size]
count = len(np.unique(sub_square))
# print(count, ' elements in: \n', sub_square)
if count == 1:
return len(sub_square) ** 2
# if you also want to know the position, you could do
# return len(sub_square) ** 2 , (i, j)
# print("================")
return 1 # no squares found
Upvotes: 0
Reputation: 7058
there are a few things fundamentally wrong with your code, in the sense that it is not how you should use Python. Try reading some good introductions and examples first. I recommend this book and these scipy lecture notes
let's start from the top
There are multiple ways to check whether an array has all identical elements
numpy has a method to return the unique elements. If all elements are identical, the length of this array should be 1
You could take the first element, make an array with np.ones
and multiply it with this element. The initial array should equal this new one
What exactly is this method supposed to do?
global answer
Why a global?
This line gets called, but you do nothing with the return value. All it possibly does is change the global answer
What happens with this list
. PS, don't give variables the name of built-ins
Why use a = np.matrix(board)
Just keep your board as a numpy.array
at all times will be the easiest and most efficient. Also a matrix of 0
and 1
's will be a lot more efficient than X
and 0
's
If you want to stop the recursion of the algorithm if the size in one of the 2 directions is 1, 1 in a.shape
If you want to find squares on 1's in a numpy.array of 0's, you could use convolution
a = np.zeros((10,10), dtype=int)
ones = ((1, 1), (1, 2), (2, 1), (2, 2), (0, 0), (4, 5))
for point in ones:
a[point] = 1
This uses convolution to search whether there is an i by i kernel in the original array
def find_largest_square_helper(a):
for i in range(2, min(a.shape)): # changed this to 2, because the 1 is trivial
kernel = np.ones((i, i), dtype=int)
if scipy.signal.convolve(a, kernel).max() != i**2:
return(i-1)
If you expect large squares, you can start at the biggest square and then work your way down like this:
def find_largest_square_helper2(a):
for i in range(min(a.shape), 0, -1):
kernel = np.ones((i, i), dtype=int)
if scipy.signal.convolve(a, kernel).max() == i**2:
return(i)
When you not only look for the largest square of 1
s in an array of 0
s, you loop over all the possible characters in the array, and call this function in a set-comprehension. Then look for the max in this set
def find_largest_square(board):
a = np.array(board)
max_squares = {find_largest_square_helper(a==char) for char in np.unique(a)}
return max(max_squares)
Upvotes: 0
Reputation: 6220
You can do it in O(n^2)
instead of your current O(n^4)
(allsame()
is in O(n^2)
and is called O(n^2)
times):
use a new matrix best_size, such that best_size[i, j]
should contain the size of the biggest square starting at (i, j)
in your original board. Fill this matrix starting from the end following this rule:
def get_best_size(a, best_size, i, j):
# TODO Handle boundaries: best_size = 1 there
if not a[i, j] == a[i+1, j] == a[i, j+1]:
return 1
min_neighbor_best_size = min(best_size[i+1, j], best_size[i, j+1])
if a[i, j] == a[i + min_neighbor_best_size , j + min_neighbor_best_size ]:
return min_neighbor_best_size + 1
else:
return min_neighbor_best_size
Just drawing it should show you why this rule works.
And then you just iterate from the end of the array, to the start, and keep memory of the best one while you're at it:
best = 0
for i in range(ran,-1,-1):
for j in range(ran,-1,-1):
best_size[i, j] = get_best_size(a, best_size, i, j)
best = max(best, best_size[i, j])
return best
Upvotes: 1