Reputation: 404
I have a list of dictionaries. The key is item_cd and the value is location_coordinates. I would like to find the euclidian distance of these locations and create a batch of 20 based on the proximity of their locations.
[{5036885850: [92.0, 88.73]}, {5036885955: [90.0, 61.73]},
{5036885984: [86.0, 73.03]}, {5036885998: [102.0, 77.54]},
{5036885851: [93.0, 88.0]}, {5036885956: [91.0, 66.73]}, {5036885984: [87.0, 70.0]},
{5036885998: [101.0, 70.54]},{5036885812: [45.0, 88.73]}, {5036885955: [76.0, 60.73]},
{5036885911: [83.0, 74.03]}, {5036885910: [108.0, 77.54]},
{5036885850: [89.0, 76.73]},
{5036885800: [80.0, 69.45]},
{50368854801: [86.0, 69.50]},
{5036885802: [102.0, 77.54]},
{5036885809: [92.5, 85.0]},
{5036885803: [91.5, 65.73]},
{5036885850: [78.0, 76.73]},
{5036885800: [77.0, 69.45]},
{50368854801: [85.0, 69.50]},
{5036885802: [101.50, 89.23]},
{5036885809: [100.5, 84.84]},
{5036885803: [100.67, 64.23]},
]
[{5036885850: [92.0, 88.73]}, {5036885955: [90.0, 61.73]}, {5036885984: [86.0, 73.03]}, {5036885998: [102.0, 77.54]}]
output: (in this format)
[{5036885955: [90.0, 61.73]}, {5036885984: [86.0, 73.03]}, {5036885998: [102.0, 77.54]}, {5036885850: [92.0, 88.73]}]
What I have tried with some help on StackOverflow:
import numpy as np
def calcualate_centroid(lst):
arr = np.array(lst)
length = arr.shape[0]
if length == 0:
return 0,0
else:
sum_x = np.sum(arr[:, 0])
sum_y = np.sum(arr[:, 1])
return sum_x/float(length), sum_y/float(length)
data = [{5036885850: [92.0, 88.73]}, {5036885955: [90.0, 61.73]}, {5036885984: [86.0, 73.03]}, {5036885998: [102.0, 77.54]}]
centroid_list = [list(item.values())[0] for item in order_centroid_list]
centroid_removed_list = []
current_pos = np.array((0, 0))
result_list = []
while len(order_centroid_list) != 0:
dist_list = [np.linalg.norm(current_pos - np.array(centroid)) for centroid in centroid_list]
min_dist = min(dist_list)
item_index = dist_list.index(min_dist)
next_order = order_centroid_list[item_index]
result_list.append(next_order)
centroid_removed_list.append(centroid_list[item_index])
centroid_list.remove(centroid_list[item_index])
current_pos = np.array(self.calculate_centroid(centroid_removed_list))
order_centroid_list.remove(next_order)
print('\nfinal result: ' + str(result_list))
I would like to repeat the above steps until the list is empty
Upvotes: 0
Views: 126
Reputation: 2947
To make the processed result in batch of length 20, you can just keep another list of batch_results each of length 20, say final_result_list
. Whenever the result_list
is of length 20, add the current result_list
to the final_result_list
, reinitialize the current_pos
to (0,0)
, centroid_removed_list
and result_list
.
Now you're good to go! Here's the full code of the problem:
import numpy as np
def calculate_centroid(lst):
arr = np.array(lst)
length = arr.shape[0]
if length == 0:
return 0,0
else:
sum_x = np.sum(arr[:, 0])
sum_y = np.sum(arr[:, 1])
return sum_x/float(length), sum_y/float(length)
data = [{5036885850: [92.0, 88.73]}, {5036885955: [90.0, 61.73]},
{5036885984: [86.0, 73.03]}, {5036885998: [102.0, 77.54]},
{5036885851: [93.0, 88.0]}, {5036885956: [91.0, 66.73]}, {5036885984: [87.0, 70.0]},
{5036885998: [101.0, 70.54]},{5036885812: [45.0, 88.73]}, {5036885955: [76.0, 60.73]},
{5036885911: [83.0, 74.03]}, {5036885910: [108.0, 77.54]},
{5036885850: [89.0, 76.73]},
{5036885800: [80.0, 69.45]},
{50368854801: [86.0, 69.50]},
{5036885802: [102.0, 77.54]},
{5036885809: [92.5, 85.0]},
{5036885803: [91.5, 65.73]},
{5036885850: [78.0, 76.73]},
{5036885800: [77.0, 69.45]},
{50368854801: [85.0, 69.50]},
{5036885802: [101.50, 89.23]},
{5036885809: [100.5, 84.84]},
{5036885803: [100.67, 64.23]},
]
batch_length = 20
centroid_list = [list(item.values())[0] for item in data]
centroid_removed_list = []
current_pos = np.array((0, 0))
result_list = []
final_result_list = []
while len(data) != 0:
if len(result_list) == batch_length:
final_result_list.append(result_list)
result_list = []
current_pos = np.array((0, 0))
centroid_removed_list = []
dist_list = [np.linalg.norm(current_pos - np.array(centroid)) for centroid in centroid_list]
min_dist = min(dist_list)
item_index = dist_list.index(min_dist)
next_order = data[item_index]
result_list.append(next_order)
centroid_removed_list.append(centroid_list[item_index])
centroid_list.remove(centroid_list[item_index])
current_pos = np.array(calculate_centroid(centroid_removed_list))
data.remove(next_order)
final_result_list.append(result_list)
print('\nfinal result: ' + str(final_result_list))
And the result you'll get is:
final result: [
[{5036885955: [76.0, 60.73]}, {5036885800: [77.0, 69.45]}, {5036885800: [80.0, 69.45]}, {50368854801: [85.0, 69.5]}, {50368854801: [86.0, 69.5]}, {5036885984: [87.0, 70.0]}, {5036885911: [83.0, 74.03]}, {5036885984: [86.0, 73.03]}, {5036885850: [78.0, 76.73]}, {5036885850: [89.0, 76.73]}, {5036885956: [91.0, 66.73]}, {5036885803: [91.5, 65.73]}, {5036885955: [90.0, 61.73]}, {5036885998: [101.0, 70.54]}, {5036885803: [100.67, 64.23]}, {5036885809: [92.5, 85.0]}, {5036885998: [102.0, 77.54]}, {5036885802: [102.0, 77.54]}, {5036885851: [93.0, 88.0]}, {5036885850: [92.0, 88.73]}],
[{5036885812: [45.0, 88.73]}, {5036885809: [100.5, 84.84]}, {5036885802: [101.5, 89.23]}, {5036885910: [108.0, 77.54]}]
]
Upvotes: 1