Reputation: 583
Using scipy.KDTree
to do some quick nearest neighbour searches. I'm using KDTree.query_ball_point(pnt, r=some_distance)
to do the search.
As my points are lat,long the radius value to search (some_distance
) has to be in decimal degrees (I believe). If I wanted to make this accessible to a user, I would expect that distance to be given in Kilometres, meters, miles etc.
What is the best way, with python libs, to convert a distance in km to a decimal degrees value? I'm using numpy, scipy and played a bit with PySAL.
Help appreciated, Louis
Upvotes: 2
Views: 8806
Reputation: 3098
This may not be a direct solution but more of a reference. I tried using the haversine formula previously but using it to compute the set of the nearest n neighbours became intolerably long when used for thousands of points.
So I created a hash class (or mapping?) that could be put into a binary tree to allow quick searching. It doesn't work on distances but angles (lat, long).
The find function works by finding the closest point in the tree and then walking back up the tree until n nodes are found.
geocode.py:
units = [(31-q, 180.0/(2 ** q)) for q in range(32)]
def bit_sum(bits):
return sum([2**bit for bit in bits])
class gpshash(object):
def __init__(self, longitude = None, latitude = None, **kwargs):
if(kwargs):
if(kwargs.has_key("longitude ") and kwargs.has_key("latitude ")):
self.longitude = geohash(degrees=kwargs["degrees"])
self.latitude = geohash(degrees=kwargs["hash"])
else:
if(longitude == None or latitude == None):
self.longitude = geohash(degrees=0)
self.latitude = geohash(degrees=0)
else:
self.longitude = geohash(degrees=longitude)
self.latitude = geohash(degrees=latitude)
long_hash = self.longitude.bin_hash
lat_hash = self.latitude.bin_hash
hash_str = ""
if(len(long_hash) == len(lat_hash)):
for i in range(len(long_hash)):
hash_str += (long_hash[i]+lat_hash[i])
self.bin_hash = hash_str
def __str__(self):
return "%s, %s" % (str(self.longitude.hash), str(self.latitude.hash))
def __repr__(self):
return str("<gpshash long: %f lat: %f>" % (self.longitude.degrees, self.latitude.degrees))
def __eq__(self, other):
if(isinstance(self, gpshash) and isinstance(other, gpshash)):
return (((self.longitude._hash ^ other.longitude._hash) == 0) and ((self.latitude._hash ^ other.latitude._hash) == 0))
else:
return False
class geohash(object):
def __init__(self, degrees = 0, **kwargs):
if(kwargs):
if(kwargs.has_key("degrees")):
self.degrees = kwargs["degrees"] % 360
self._hash = self.encode()
elif(kwargs.has_key("hash")):
self._hash = kwargs["hash"] % ((2 << 31) - 1)
self.degrees = self.decode()
else:
self.degrees = degrees % 360
self._hash = self.encode()
def __str__(self):
return str(self.hash)
def __repr__(self):
return str("<geohash degrees: %f hash: %s>" % (self.degrees, self.hash))
def __add__(self, other):
return geohash(hash=(self._hash + other._hash))
def __sub__(self, other):
return geohash(hash=(self._hash - other._hash))
def __xor__(self, other):
return geohash(hash=(self._hash ^ other._hash))
def __eq__(self, other):
if(isinstance(self, geohash) and isinstance(other, geohash)):
return ((self._hash ^ other._hash) == 0)
else:
return False
def encode(self):
lesser = filter(lambda (bit, angle): self.degrees >= angle, units)
combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((angles + angle) <= self.degrees) else (bits, angles), lesser, ([], 0))
return bit_sum(combined[0])
def decode(self):
lesser = filter(lambda (bit, angle): self._hash>= (2 ** bit), units)
combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((bit_sum(bits+[bit])) <= self._hash) else (bits, angles), lesser, ([], 0))
return combined[1]
@property
def hash(self):
self._hash = self.encode()
return "%08x" % self._hash
@property
def inv_hash(self):
self._inv_hash = self.decode()
return self._inv_hash
@property
def bin_hash(self):
self._bin_hash = bin(self._hash)[2:].zfill(32)
return self._bin_hash
class gdict(object):
'''
Base Geo Dictionary
Critical Components taken from Python26\Lib\UserDict.py
'''
__slots__ = ["parent", "left", "right", "hash_type"]
hash_type = None
def __init__(self, ihash=None, iparent=None):
def set_helper(iparent, cur_hex, hex_list):
ret_bin_tree = self.__class__(None, iparent)
if(hex_list):
ret_bin_tree.set_child(cur_hex, set_helper(ret_bin_tree, hex_list[0], hex_list[1:]))
return ret_bin_tree
elif(cur_hex):
ret_bin_tree.set_child(cur_hex, ihash)
return ret_bin_tree
self.parent = self
self.left = None
self.right = None
if(iparent != None):
self.parent = iparent
if(isinstance(ihash, self.hash_type)):
ilist = list(ihash.bin_hash)
if(len(ilist) > 1):
ret_bin_tree = set_helper(self, ilist[1], ilist[2:])
if(ret_bin_tree):
self.set_child(ilist[0], ret_bin_tree)
def set_child(self, istr, ichild):
if(istr == "0"):
self.left = ichild
elif(istr == "1"):
self.right = ichild
def get_child(self, istr):
if(istr == "0"):
return self.left
elif(istr == "1"):
return self.right
else:
return ""
def __repr__(self):
def repr_print_helper(ibin_tree, fmt_str = "", depth = 1):
if(not isinstance(ibin_tree, self.__class__)):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth)) % ""
fmt_str += ibin_tree.__repr__()
else:
if((ibin_tree.left != None and ibin_tree.right == None) or (ibin_tree.left == None and ibin_tree.right != None)):
if(ibin_tree.left != None):
fmt_str += "0"
fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
elif(ibin_tree.right != None):
fmt_str += "1"
fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
else:
if(ibin_tree.left != None):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth - 1)) % ""
fmt_str += "0"
fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
if(ibin_tree.right != None):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth - 1)) % ""
fmt_str += "1"
fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
return fmt_str
return repr_print_helper(self)
def find(self, ihash, itarget = 1):
class flat(list):
pass
def find_helper_base(iparent, ibin_tree, ihash):
ret_find = None
if(isinstance(ibin_tree, self.hash_type)):
ret_find = iparent
elif(len(ihash) > 0):
if(ibin_tree.get_child(ihash[0])):
ret_find = find_helper_base(ibin_tree, ibin_tree.get_child(ihash[0]), ihash[1:])
else:
ret_find = ibin_tree
return ret_find
def up_find(iflat, iparent, ibin_tree, ibias = "0"):
if((ibin_tree != iparent) and (len(iflat) < itarget)):
if(iparent.left):
if(len(iflat) >= itarget):
return
if(iparent.left != ibin_tree):
down_find(iflat, iparent.left, ibias)
if(iparent.right):
if(len(iflat) >= itarget):
return
if(iparent.right != ibin_tree):
down_find(iflat, iparent.right, ibias)
up_find(iflat, ibin_tree.parent.parent, ibin_tree.parent, ibias)
def down_find(iflat, ibin_tree, ibias = "0"):
if(len(iflat) >= itarget):
return
elif(isinstance(ibin_tree, self.hash_type)):
iflat += [ibin_tree]
else:
if(ibias == "1"):
if(ibin_tree.left):
down_find(iflat, ibin_tree.left, ibias)
if(ibin_tree.right):
down_find(iflat, ibin_tree.right, ibias)
else:
if(ibin_tree.right):
down_find(iflat, ibin_tree.right, ibias)
if(ibin_tree.left):
down_find(iflat, ibin_tree.left, ibias)
if(isinstance(ihash, self.hash_type)):
flatter = flat()
hasher = ihash.bin_hash
base = find_helper_base(self, self.get_child(hasher[0]), hasher[1:])
if(base):
down_find(flatter, base)
bias = flatter[0].bin_hash[0]
up_find(flatter, base.parent, base, bias)
return list(flatter)
def merge(self, from_bin_tree):
def merge_helper(to_bin_tree, from_bin_tree):
if(isinstance(from_bin_tree, self.__class__)):
if(from_bin_tree.left != None):
if(to_bin_tree.left != None):
merge_helper(to_bin_tree.left, from_bin_tree.left)
else:
from_bin_tree.left.parent = to_bin_tree
to_bin_tree.left = from_bin_tree.left
elif(from_bin_tree.right != None):
if(to_bin_tree.right != None):
merge_helper(to_bin_tree.right, from_bin_tree.right)
else:
from_bin_tree.right.parent = to_bin_tree
to_bin_tree.right = from_bin_tree.right
merge_helper(self, from_bin_tree)
class geodict(gdict):
'''
Geo Dictionary
'''
hash_type = geohash
def __init__(self, ihash=None, iparent=None):
gdict.__init__(self, ihash, iparent)
class gpsdict(gdict):
'''
GPS Dictionary
'''
hash_type = gpshash
def __init__(self, ihash=None, iparent=None):
gdict.__init__(self, ihash, iparent)
if(__name__ == "__main__"):
gpses = \
[
gpshash(90, 90),
gpshash(68, 24),
gpshash(144, 60),
gpshash(48, 91),
gpshash(32, 105),
gpshash(32, 150),
gpshash(167, 20),
gpshash(49, 116),
gpshash(81, 82),
gpshash(63, 79),
gpshash(129, 76)
]
base_dict = gpsdict()
for cur_hash in gpses:
base_dict.merge(gpsdict(cur_hash ))
print "All locations added:"
print base_dict
print ""
print "Trying to find 3 nearest points to:"
to_find = gpshash(60, 20)
print to_find.__repr__()
found = base_dict.find(to_find, 3)
print ""
print "Found the following:"
for x in found:
print x.__repr__()
Upvotes: 0
Reputation: 28370
Classic Calculation from here:
Distance
This uses the ‘haversine’ formula to calculate the great-circle distance between two points – that is, the shortest distance over the earth’s surface – giving an ‘as-the-crow-flies’ distance between the points (ignoring any hills, of course!).
Haversine formula:
a = sin²(Δφ/2) + cos(φ1).cos(φ2).sin²(Δλ/2)
c = 2.atan2(√a, √(1−a))
d = R.c
where φ is latitude, λ is longitude, R is earth’s radius (mean radius = 6,371km) note that angles need to be in radians to pass to trig functions!
You can of course do a very rough and ready approximation from the definitions of Nautical Mile and kilometre:
The nautical mile (symbol M, NM or nmi) is a unit of length that is about one minute of arc of latitude measured along any meridian (at sea level), or about one minute of arc of longitude at the equator. By international agreement it has been set at 1,852 metres exactly (about 6,076 feet).
Upvotes: 3