jake eum
jake eum

Reputation: 31

Find the biggest square in numpy array

I'm trying to solve this algorithm problem: finding the biggest square with only one value in a numpy array.

Example Image :

example image

My code taking too much time. Is there way to improve the speed?

import numpy as np
answer = 0
def allsame(board):
    memory = board[0,0]
    board = np.matrix(board)
    for i in range(board[0].size):
        for j in range(board[0].size):
            if board[i,j] != memory: return False
    return True

def findLargestSquare(board):
    global answer
    list = []

    a = np.matrix(board)
    if a[0].size == 1 or a[:,1].size==1: return answer
    if a[1].size < a[:,1].size: ran = a[1].size
    else: ran = a[:,1].size
    for i in range(ran+1):
        for j in range(ran+1):
           if a[i:j,i:j].size > 0 and allsame(a[i:j,i:j])==True:
                    if a[i:j,i:j].size > answer:
                       list.append(a[i:j,i:j].size)
                       answer = a[i:j,i:j].size

    findLargestSquare(a[1:])
    return findLargestSquare(a[:,1:])
    return answer


#testBoard = [['x','o','g'],['b','a','j'],['u','q','p']]
testBoard = [['X','O','O','O','X'],['X','O','O','O','O'],['X','X','O','O','O'],['X','X','O','O','O'],['X','X','X','X','X']]
print(findLargestSquare(testBoard))

I changed My code to self convolution method. can you take a look at which part is wrong?

import numpy as np
import time
answer = 0


def findLargestSquare(board):
    global answer


    a = np.array(board)

    for k in reversed(range(a[0].size + 1)):
        conv_size = k
        for i in range(a[0].size - conv_size + 1):
            num = i
            for j in range(a[0].size - conv_size + 1):
                #print('i:', i, 'j:', j)
                print(a[i:i + conv_size, j:j + conv_size])
                #print('unique: ',np.unique(a[i:i+ conv_size,j:j+conv_size]).size)
                if(np.unique(a[i:i+ conv_size,j:j+conv_size]).size == 1):
                    #print("returning")
                    return len(a[i:i+ conv_size,j:j+conv_size])**2
                num = num + 1
                print("================")
    return len(a[i:i+ conv_size,j:j+conv_size])**2



# testBoard = [['x','o','g'],['b','a','j'],['u','q','p']]
testBoard = [['X', 'O', 'O', 'O', 'X'], ['X', 'O', 'O', 'O', 'O'], ['X', 'X', 'O', 'O', 'O'], ['X', 'X', 'O', 'O', 'O'],
             ['X', 'X', 'X', 'X', 'X']]


print(findLargestSquare(testBoard))

Upvotes: 3

Views: 2684

Answers (5)

MichaelSB
MichaelSB

Reputation: 3181

Since no one has provided the DP method example, here it is:

def findLargestSquare(M): 
    n = len(M) 
    S = [[0 for _ in range(n)] for _ in range(n)] 
    max_s = 0
    for i in range(1, n): 
        for j in range(1, n): 
            if M[i][j] == 1: 
                S[i][j] = min(S[i][j-1], S[i-1][j], S[i-1][j-1]) + 1
                if S[i][j] > max_s:
                    max_s = S[i][j]
    return max_s

As a comparison, here's a brute force method which I came up with during a job interview (it's about 100 times slower for a 500x500 matrix):

def findLargestSquare(arr):
    if sum([sum(row) for row in arr]) == 0:  
        return 0
    n = len(arr)
    for k in range(n, 1, -1):  #check every kxk square from k=n to k=2
        for i in range(n-k+1):
            #for every row i check every starting position j from j=0 to j=n-k
            j_next = 0
            for j in range(n-k+1):
                if j > j_next:
                    #break as soon as an element of this row segment is not 1:
                    count = 0
                    for p in range(j, j+k, 1):
                        if arr[i][p] != 1:
                            j_next = p
                            break
                    else:  ##check the rows below:
                        keep_going = True
                        for r in range(i+1, i+k, 1):
                            if keep_going:
                                for p in range(j, j+k, 1):
                                    if arr[r][p] != 1:
                                        keep_going = False
                                        j_next = p
                                        break
                                else:
                                    count += 1
                                if count == k-1: #square is found
                                    return k
    else:
        return 1

Upvotes: 0

Luis B
Luis B

Reputation: 1722

I fixed up the first answer:

def get_best_size(a, best_size, i, j, x):
    if i+1 >= a.shape[0] or j+1 >= a.shape[1]:
        return 1
    if not (a[i, j] == a[i+1, j] == a[i, j+1] == x):
        return 1
    min_neighbor_best_size = int(min(best_size[i+1, j], best_size[i, j+1]))
    if a[i, j] == a[i + min_neighbor_best_size , j + min_neighbor_best_size ]:
        return min_neighbor_best_size + 1
    else:
        return min_neighbor_best_size

# looking for largest X block in A
def doGetBest(a, x):
    best_size = np.ones(a.shape)
    best = 1
    best_pair = (0,0)
    for i in range(a.shape[0]-1,-1,-1):
        for j in range(a.shape[1]-1,-1,-1):
            best_size[i, j] =  get_best_size(a, best_size, i, j, x)
            if best < best_size[i, j]:
                best = best_size[i, j]
                best_pair = (i,j)

    return best, best_pair

Upvotes: 0

Maarten Fabr&#233;
Maarten Fabr&#233;

Reputation: 7058

Improvements on you second attempt

import numpy as np
import time
answer = 0


def findLargestSquare(board):
    global answer


    a = np.array(board)

    for k in reversed(range(a[0].size + 1)):
        conv_size = k
        for i in range(a[0].size - conv_size + 1):
            num = i
            for j in range(a[0].size - conv_size + 1):
                #print('i:', i, 'j:', j)
                print(a[i:i + conv_size, j:j + conv_size])
                #print('unique: ',np.unique(a[i:i+ conv_size,j:j+conv_size]).size)
                if(np.unique(a[i:i+ conv_size,j:j+conv_size]).size == 1):
                    #print("returning")
                    return len(a[i:i+ conv_size,j:j+conv_size])**2
                num = num + 1
                print("================")
    return len(a[i:i+ conv_size,j:j+conv_size])**2

answer

is unnecessary and used nowhere. It is certainly not needed to use a global in the algorithm

Renaming of variables

The conv_size = k and num = i are unnecessary. If you want to name them like that, then do so in the for-loop, num isn't even used in the rest of the algorithm

range(a[0].size)

This creates a new array each time (or a view, I'm not 100% sure), and if the a[1] is shorter, will give problems. max_size = min(a.shape) is cleaner

the double for-loop

Instead of the loop twice, you can use itertools.product to generate the coordinates of the edge of the sub-square

for size in reversed(range(2,  max_size + 1)):
    k = max_size = size + 1
    for i, j in itertools.product(range(k), repeat=2):

returns a generator with this result for 5:

[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]

the check itself

if(np.unique(a[i:i+ conv_size,j:j+conv_size]).size == 1):
                    #print("returning")
    return len(a[i:i+ conv_size,j:j+conv_size])**2

can be done cleaner without the double creation of the array or view

sub_square = a[i:i+size, j:j+size]
if len(np.unique(sub_square)) == 1:
    return len(sub_square) ** 2
        # calculations

Combined

import itertools
import numpy as np

def findLargestSquare(board):
    a = np.array(board)
    max_size = min(a.shape)
    for size in reversed(range(2,  max_size + 1)):
#         print('looking for identical squares of size: ', size)
        k = max_size - size + 1
        for i, j in itertools.product(range(k), repeat=2):
#             print('checking position: ', (i, j))
            sub_square = a[i:i+size, j:j+size]
            count = len(np.unique(sub_square))
#             print(count, ' elements in: \n', sub_square)
            if count == 1:
                return len(sub_square) ** 2
                # if you also want to know the position, you could do
                # return len(sub_square) ** 2 ,  (i, j)
#             print("================")
    return 1 # no squares found

Upvotes: 0

Maarten Fabr&#233;
Maarten Fabr&#233;

Reputation: 7058

there are a few things fundamentally wrong with your code, in the sense that it is not how you should use Python. Try reading some good introductions and examples first. I recommend this book and these scipy lecture notes

let's start from the top

allsame

There are multiple ways to check whether an array has all identical elements

np.unique

numpy has a method to return the unique elements. If all elements are identical, the length of this array should be 1

np.ones

You could take the first element, make an array with np.ones and multiply it with this element. The initial array should equal this new one

findLargestSquare(board)

What exactly is this method supposed to do?

global answer

Why a global?

``findLargestSquare(a[1:])`

This line gets called, but you do nothing with the return value. All it possibly does is change the global answer

list

What happens with this list. PS, don't give variables the name of built-ins

matrix

Why use a = np.matrix(board) Just keep your board as a numpy.array at all times will be the easiest and most efficient. Also a matrix of 0 and 1's will be a lot more efficient than X and 0's

.size

If you want to stop the recursion of the algorithm if the size in one of the 2 directions is 1, 1 in a.shape

Alternative method

If you want to find squares on 1's in a numpy.array of 0's, you could use convolution

array creation

a = np.zeros((10,10), dtype=int)
ones = ((1, 1), (1, 2), (2, 1), (2, 2), (0, 0), (4, 5))
for point in ones:
    a[point] = 1

Kernel creation and convolution

This uses convolution to search whether there is an i by i kernel in the original array

def find_largest_square_helper(a):
    for i in range(2, min(a.shape)): # changed this to 2, because the 1 is trivial
        kernel = np.ones((i, i), dtype=int)
        if scipy.signal.convolve(a, kernel).max() != i**2:
            return(i-1)

If you expect large squares, you can start at the biggest square and then work your way down like this:

def find_largest_square_helper2(a):
    for i in range(min(a.shape), 0, -1):
        kernel = np.ones((i, i), dtype=int)
        if scipy.signal.convolve(a, kernel).max() == i**2:
            return(i)

More than 1 char

When you not only look for the largest square of 1s in an array of 0s, you loop over all the possible characters in the array, and call this function in a set-comprehension. Then look for the max in this set

def find_largest_square(board):
    a = np.array(board)
    max_squares = {find_largest_square_helper(a==char) for char in np.unique(a)}
    return max(max_squares)

Upvotes: 0

gdelab
gdelab

Reputation: 6220

You can do it in O(n^2) instead of your current O(n^4) (allsame() is in O(n^2) and is called O(n^2) times):

use a new matrix best_size, such that best_size[i, j] should contain the size of the biggest square starting at (i, j) in your original board. Fill this matrix starting from the end following this rule:

def get_best_size(a, best_size, i, j):
    # TODO Handle boundaries: best_size = 1 there
    if not a[i, j] == a[i+1, j] == a[i, j+1]:
        return 1
    min_neighbor_best_size = min(best_size[i+1, j], best_size[i, j+1])
    if a[i, j] == a[i + min_neighbor_best_size , j + min_neighbor_best_size ]:
        return min_neighbor_best_size + 1
    else:
        return min_neighbor_best_size 

Just drawing it should show you why this rule works.

And then you just iterate from the end of the array, to the start, and keep memory of the best one while you're at it:

best = 0
for i in range(ran,-1,-1):
    for j in range(ran,-1,-1):
        best_size[i, j] =  get_best_size(a, best_size, i, j)
        best = max(best, best_size[i, j])
return best

Upvotes: 1

Related Questions