Reputation: 25
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
Question: How should I find the city from my IP address and use this dictionary spending less time (time complexity) as possible?
Upvotes: 0
Views: 345
Reputation: 172
The "proper answer" if you want the best complexity for arbitrarily large data sets is the one given given by Ji Bin.
To really optimize performances over multiple calls, you indeed need to restructure your data, and use the inbuilt bisect function.
But if you REALLY do not want to touch your data, you can still use a band-aid custom implementation of bisect which would look like that
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ipv4_str_to_tuple(ip_str):
return tuple(map(int, ip_str.split('.')))
def relative_in_range(ipv4_tuple, ip_range):
ipv4t_start = ipv4_str_to_tuple(ip_range['start'])
ipv4t_end = ipv4_str_to_tuple(ip_range['end'])
if ipv4t_start > ipv4_tuple:
return -1
if ipv4t_end < ipv4_tuple:
return 1
return 0
def from_those_ranges(ipv4_tuple, ranges):
#in-built bisect
lo, hi = 0, len(ranges)
while lo < hi:
mid = lo + (hi - lo) // 2
comp = relative_in_range(ipv4_tuple, ranges[mid])
if comp == 0:
return True
if comp > 0:
lo = mid + 1
else:
hi = mid
return False
def find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges):
for entry, entry_ranges in entries_ranges.items():
if from_those_ranges(ipv4_tuple, entry_ranges):
return entry
return None
def find_entry_from_ipv4_str(ipv4_str, entries_ranges):
ipv4_tuple = ipv4_str_to_tuple(ipv4_str)
return find_entry_from_ipv4_tuple(ipv4_tuple, entries_ranges)
print(find_entry_from_ipv4_str('10.2.4.2', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('192.168.1.1', RANGES))
print(find_entry_from_ipv4_str('172.12.10.25', RANGES))
print(find_entry_from_ipv4_str('192.168.2.1', RANGES))
print(find_entry_from_ipv4_str('10.10.5.5', RANGES))
-> None
-> munich
-> london
-> None
-> munich
-> london
etc.
link to playground : https://trinket.io/python/e1f9deb1c7
Upvotes: 1
Reputation: 531
First, you need to rearrange your data, for lookup more efficiently.
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) + y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in RANGES.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
Now, you could use bisect
to search the sorted start IP, for your input, AIK it using the RB-tree internally.
Below is the whole PoC code for it:
from functools import reduce
from bisect import bisect_left
RANGES = {
'london': [
{'start': '10.10.0.0', 'end': '10.10.255.255'},
{'start': '192.168.1.0', 'end': '192.168.1.255'},
],
'munich': [
{'start': '10.12.0.0', 'end': '10.12.255.255'},
{'start': '172.16.10.0', 'end': '172.16.11.255'},
{'start': '192.168.2.0', 'end': '192.168.2.255'},
]
}
def ip_to_long(ip):
return reduce(lambda x, y: (x << 8) + y, map(int, ip.split('.')))
def data_transform(input_ranges):
data = {}
for location, items in input_ranges.items():
for item in items:
data[ip_to_long(item['start'])] = dict(location=location, end=ip_to_long(item['end']))
return data
def search_for_ip(search_ip, ip_starts, ip_data):
lookup_index = bisect_left(ip_starts, ip_to_long(search_ip))
if lookup_index > 0 and ip_data[ip_starts[lookup_index-1]]['end'] > ip_to_long(search_ip):
return ip_data[ip_starts[lookup_index-1]]['location']
return
new_data = data_transform(RANGES)
print(new_data)
ip_starts = sorted(list(new_data))
print(search_for_ip('192.168.2.100', ip_starts, new_data)) # -> munich
print(search_for_ip('192.168.1.100', ip_starts, new_data)) # -> lodon
print(search_for_ip('192.168.0.100', ip_starts, new_data)) # -> None
Upvotes: 0
Reputation: 23146
Write a custom function which parses the IP addresses as tuples of numbers for easier comparison:
def get_city(ip):
for city in RANGES:
for d in RANGES[city]:
if tuple(map(int, d["start"].split("."))) <= tuple(map(int, ip.split("."))) <= tuple(map(int, d["end"].split("."))):
return city
>>> get_city("192.168.2.10")
"munich"
Upvotes: 0