Reputation: 10156
For a list of (x, y) points, I am trying to find the nearby points for each point.
from collections import defaultdict
from math import sqrt
from random import randint
# Generate a list of random (x, y) points
points = [(randint(0, 100), randint(0, 100)) for _ in range(1000)]
def is_nearby(point_a, point_b, max_distance=5):
"""Two points are nearby if their Euclidean distance is less than max_distance"""
distance = sqrt((point_b[0] - point_a[0])**2 + (point_b[1] - point_a[1])**2)
return distance < max_distance
# For each point, find nearby points that are within a radius of 5
nearby_points = defaultdict(list)
for point in points:
for neighbour in points:
if point != neighbour:
if is_nearby(point, neighbour):
nearby_points[point].append(neighbour)
Is there any way I can index points
to make the above search faster? I feel there must be some faster way than O(len(points)**2).
Edit: the points in general could be floats, not just ints
Upvotes: 2
Views: 196
Reputation: 46869
this is a version with a fixed grid where each gridpoint holds the number of samples that are there.
the search can then be reduced to just the space around the point in question.
from random import randint
import math
N = 100
N_SAMPLES = 1000
# create the grid
grd = [[0 for _ in range(N)] for __ in range(N)]
# set the number of points at a given gridpoint
for _ in range(N_SAMPLES):
grd[randint(0, 99)][randint(0, 99)] += 1
def find_neighbours(grid, point, distance):
# this will be: (x, y): number of points there
points = {}
for x in range(point[0]-distance, point[0]+distance):
if x < 0 or x > N-1:
continue
for y in range(point[1]-distance, point[1]+distance):
if y < 0 or y > N-1:
continue
dst = math.hypot(point[0]-x, point[1]-y)
if dst > distance:
continue
if grd[x][y] > 0:
points[(x, y)] = grd[x][y]
return points
print(find_neighbours(grid=grd, point=(45, 36), distance=5))
# -> {(44, 37): 1, (45, 33): 1, ...}
# meadning: there is one neighbour at (44, 37) etc...
for further optimzation: the tests for x
and y
could be precalculated for a given gridsize - the math.hypot(point[0]-x, point[1]-y)
would not have to be done then for every point.
and it may be a good idea to replace the grid with a numpy
array.
UPDATE
if your points are float
s you can still create an int
grid to reduce the search space:
from random import uniform
from collections import defaultdict
import math
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
@property
def x_int(self):
return int(self.x)
@property
def y_int(self):
return int(self.y)
def __str__(self):
fmt = '''{0.__class__.__name__}(x={0.x:5.2f}, y={0.y:5.2f})'''
return fmt.format(self)
N = 100
MIN = 0
MAX = N-1
N_SAMPLES = 1000
# create the grid
grd = [[[] for _ in range(N)] for __ in range(N)]
# set the number of points at a given gridpoint
for _ in range(N_SAMPLES):
p = Point(x=uniform(MIN, MAX), y=uniform(MIN, MAX))
grd[p.x_int][p.y_int].append(p)
def find_neighbours(grid, point, distance):
# this will be: (x_int, y_int): list of points
points = defaultdict(list)
# need to cast a slightly bigger net on the upper end of the range;
# int() rounds down
for x in range(point[0]-distance, point[0]+distance+1):
if x < 0 or x > N-1:
continue
for y in range(point[1]-distance, point[1]+distance+1):
if y < 0 or y > N-1:
continue
dst = math.hypot(point[0]-x, point[1]-y)
if dst > distance + 1: # account for rounding... is +1 enough?
continue
for pt in grd[x][y]:
if math.hypot(pt.x-x, pt.y-y) <= distance:
points[(x, y)].append(pt)
return points
res = find_neighbours(grid=grd, point=(45, 36), distance=5)
for int_point, points in res.items():
print(int_point)
for point in points:
print(' ', point)
the output looks something like this:
(44, 36)
Point(x=44.03, y=36.93)
(41, 36)
Point(x=41.91, y=36.55)
Point(x=41.73, y=36.53)
Point(x=41.56, y=36.88)
...
for convenience Points
is now a class. may not be necessary though...
depending on how dense or sparse your points are you could also represent the grid as a dictionary pointing to a list or Points
...
also the find_neighbours
function accepts a starting point
consisting of int
s only in that version. this might also be refined.
and there is much room for improvement: the range of the y
axis can be restricted using trigonometry. and for the points way inside the circle there is no need for an individual check; detailed checking only needs to be done close to the outer rim of the circle.
Upvotes: 1