Reputation: 620
Say i have a 2 dimensional boolean array. I would like to get a list of slices/or alike, where each slice represents the least (in size) subregions of the array contanining True values while its border contains all False.
I could loop for each row and column and store the indices when such condition is met, but i wonder if you know another way or a library that does this efficiently? You can assume that the boundary of the original boolean array is always False/0.
Example 1
Example 2
Edit ! Added new examples with the correct solutions. Sorry for the confusion.
Upvotes: 6
Views: 674
Reputation: 18895
That's connected component analysis, which has been asked and answered before. Adapting the accepted answer from there to your needs, a possible solution is quite short:
import numpy as np
from scipy.ndimage.measurements import label
def analysis(array):
labeled, _ = label(array, np.ones((3, 3), dtype=np.int))
for i in np.arange(1, np.max(labeled)+1):
pixels = np.array(np.where(labeled == i))
x1 = np.min(pixels[1, :])
x2 = np.max(pixels[1, :])
y1 = np.min(pixels[0, :])
y2 = np.max(pixels[0, :])
print(str(i) + ' | slice: array[' + str(y1) + ':' + str(y2) + ', ' + str(x1) + ':' + str(x2) + ']')
example1 = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
]).astype(bool)
example2 = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
]).astype(bool)
for a in [example1, example2]:
print(a, '\n')
analysis(a)
print('\n')
That's the output (without the examples):
[[...]]
1 | slice: array[1:2, 3:5]
2 | slice: array[4:6, 6:8]
3 | slice: array[8:8, 2:2]
[[...]]
1 | slice: array[1:3, 5:8]
2 | slice: array[2:2, 3:3]
3 | slice: array[4:5, 1:1]
4 | slice: array[5:8, 3:6]
5 | slice: array[6:6, 8:8]
6 | slice: array[8:8, 8:8]
Hope that helps!
------------------
System information
------------------
Python: 3.8.1
SciPy: 1.4.1
------------------
Upvotes: 4
Reputation: 59691
You can approach the problem from a graph perspective, with the coordinates of the ones being the graph elements, 8-way connected - then you just have to find the connected components in the graph. If the data is sparse, this should be quite faster than looping through possible square sizes. This an example of how it could work:
from itertools import combinations
def find_squares(a):
# Find ones
ones = [(i, j) for i, row in enumerate(a) for j, val in enumerate(row) if val]
# Make graph of connected ones
graph = {a: [] for a in ones}
for a, b in combinations(ones, 2):
if abs(a[0] - b[0]) <= 1 and abs(a[1] - b[1]) <= 1:
graph[a].append(b)
graph[b].append(a)
# Find connected components in graph
components = []
for a, a_neigh in graph.items():
if any(a in c for c in components):
continue
component = {a, *a_neigh}
pending = [*a_neigh]
visited = {a}
while pending:
b = pending.pop()
if b in visited:
continue
visited.add(b)
component.add(b)
b_neigh = graph[b]
component.update(b_neigh)
pending.extend(c for c in b_neigh if c not in visited)
components.append(component)
# Find bounds for each component
bounds = [((min(a[0] for a in c), min(a[1] for a in c)),
(max(a[0] for a in c), max(a[1] for a in c)))
for c in components]
return bounds
a = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 1, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
square_bounds = find_squares(a)
print(*square_bounds, sep='\n')
# ((1, 5), (3, 8))
# ((2, 3), (2, 3))
# ((4, 1), (5, 1))
# ((5, 3), (8, 6))
# ((6, 8), (6, 8))
# ((8, 8), (8, 8))
Upvotes: 1