Reputation: 21
While working on a personal project I stumbled into a problem that can be formulated as follows:
You have a grid with N rows and M columns. The table contains some red and some green cells. The goal is to find such an assignment of red to green cells, such that the total distance between red and green cells in their matching is minimal. In addition, two adjacent(with a common edge or vertex) red grid cells can be paired together and moved to another pair of adjacent green cells.
Let's say the limits are N <= 1000, M <= 1000. It is not guaranteed that the number of red cells is equal to the number of green cells, i.e. they are different most of the time. Moving a group into another group that doesn’t have the same look(say two on top of each other into two that are diagonal to each other) is allowed.
I tried finding a similar problem and a lot are close to it, however without the pairing part. I've tried representing it as a transportation problem, an assignment problem or a maximum flow problem(edges having capacity 1 and maybe including an additional "layer" of vertexes to represent groups), however I haven't found a way to properly include the pairing part.
I have managed to represent it as an ILP problem: Red and green cells are represented as vertexes in a complete bipartite graph - R1, R2,.. G1, G2,.., where the edge weights between Ri and Gj is the Euclidean distance between those two cells in the grid. All valid groupings of red and green cells form a second completed bipartite graph, where the position of the group is just the average of the two cells. Each edge in the two graphs is assigned a boolean variable that represents whether or not we move said cell/group into the other. The constraints are that the sum of each edge that is related to Ri(i.e. the ones that come from Ri and from groups that include Ri) is equal to one and that the sum of the edges that go into Gi(cell or group) is <= 1. The objective function is the minimum sum of the boolean variable multiplied by the edge weight it is associated with.
Here is a python implementation of the solution
import pulp
import math
# manhattan or euclidean
DISTANCE_CALCULATION = "euclidean"
# default or gurobi
SOLVER = "gurobi"
# change these value pairs
red_cell_cords = [(1, 8),(1, 9),(1, 19),(1, 20),(2, 8),(2, 9),(2, 19),(2, 20),(3, 9),(3, 19),(6, 9),(6, 19),(7, 18),(8, 11),(8, 17),(9, 12),(9, 16),(11, 13),(11, 15),(12, 14)]
green_cell_cords = [(0, 6),(0, 21),(0, 22),(1, 6),(1, 23),(2, 5),(2, 6),(2, 23),(2, 24),(3, 4),(3, 5),(3, 6),(3, 24),(3, 25),(4, 4),(4, 5),(4, 9),(4, 19),(4, 25),(5, 5),(5, 25),(6, 5),(6, 7),(6, 21),(6, 25),(7, 4),(7, 5),(7, 25),(8, 4),(8, 25),(9, 4),(9, 6),(9, 22),(9, 26),(10, 5),(10, 27),(11, 5),(11, 27),(12, 27),(14, 24),(17, 25),(19, 25)]
red_cells = [f"R{i}" for i in range(len(red_cell_cords))]
green_cells = [f"G{i}" for i in range(len(green_cell_cords))]
red_group_cords = []
green_group_cords = []
red_groups = []
green_groups = []
# Generate RED groups and coordinates
for r1_idx, r1_cord in enumerate(red_cell_cords):
for r2_idx, r2_cord in enumerate(red_cell_cords):
if r1_idx < r2_idx:
if abs(r1_cord[0] - r2_cord[0]) <= 1 and abs(r1_cord[1] - r2_cord[1]) <= 1:
red_groups.append(f"R{r1_idx}-R{r2_idx}-")
red_group_cords.append(((r1_cord[0] + r2_cord[0]) / 2, (r1_cord[1] + r2_cord[1]) / 2))
# Generate GREEN groups and coordinates
for g1_idx, g1_cord in enumerate(green_cell_cords):
for g2_idx, g2_cord in enumerate(green_cell_cords):
if g1_idx < g2_idx:
if abs(g1_cord[0] - g2_cord[0]) <= 1 and abs(g1_cord[1] - g2_cord[1]) <= 1:
green_groups.append(f"G{g1_idx}-G{g2_idx}-")
green_group_cords.append(((g1_cord[0] + g2_cord[0]) / 2, (g1_cord[1] + g2_cord[1]) / 2))
costs = {}
# Edge weights for singular cells
for r_idx, r_cord in enumerate(red_cell_cords):
for g_idx, g_cord in enumerate(green_cell_cords):
red_label = red_cells[r_idx]
green_label = green_cells[g_idx]
if DISTANCE_CALCULATION == "manhattan":
distance = abs(r_cord[0] - g_cord[0]) + abs(r_cord[1] - g_cord[1])
elif DISTANCE_CALCULATION == "euclidean":
distance = math.sqrt(math.pow(abs(r_cord[0] - g_cord[0]), 2) + math.pow(abs(r_cord[1] - g_cord[1]), 2))
else:
raise BaseException("Invalid parameter as DISTANCE_CALCULATION. Check for typos")
costs[(red_label, green_label)] = distance
# Edge weights for groups
for r_idx, r_cord in enumerate(red_group_cords):
for g_idx, g_cord in enumerate(green_group_cords):
red_label = red_groups[r_idx]
green_label = green_groups[g_idx]
if DISTANCE_CALCULATION == "manhattan":
distance = abs(r_cord[0] - g_cord[0]) + abs(r_cord[1] - g_cord[1])
elif DISTANCE_CALCULATION == "euclidean":
distance = math.sqrt(math.pow(abs(r_cord[0] - g_cord[0]), 2) + math.pow(abs(r_cord[1] - g_cord[1]), 2))
else:
raise BaseException("Invalid parameter as DISTANCE_CALCULATION. Check for typos")
costs[(red_label, green_label)] = distance
prob = pulp.LpProblem("RedGreenAssignment", pulp.LpMinimize)
# Variables
## Singular movement
vars = pulp.LpVariable.dicts("Assign", (red_cells, green_cells), 0, 1, pulp.LpBinary)
## Group movement
vars_groups = pulp.LpVariable.dicts("Assign group", (red_groups, green_groups), 0, 1, pulp.LpBinary)
# Objective function
## Singular movement & Group movement
prob += pulp.lpSum([vars[r][g] * costs[(r, g)] for r in red_cells for g in green_cells]) + pulp.lpSum([vars_groups[r][g] * costs[(r, g)] for r in red_groups for g in green_groups])
# Constraints
## For each red cell, ensure it's either assigned individually or through a group
for i, r in enumerate(red_cells):
related_groups = [group for group in red_groups if f"R{i}-" in group]
prob += (pulp.lpSum([vars[r][g] for g in green_cells]) + \
pulp.lpSum([vars_groups[group][g_group] for group in related_groups for g_group in green_groups])) == 1
## For each green cell, ensure it assigned individually or through a group at most once
for i, g in enumerate(green_cells):
related_groups = [group for group in green_groups if f"G{i}-" in group]
prob += (pulp.lpSum([vars[r][g] for r in red_cells]) + \
pulp.lpSum([vars_groups[r_group][group] for group in related_groups for r_group in red_groups])) <= 1
# Solve
if SOLVER == "default":
prob.solve()
elif SOLVER == "gurobi":
solver = pulp.GUROBI_CMD(path=r'C:\gurobi1103\win64\bin\gurobi_cl.exe') # include your path to the gurobi solver
prob.solve(solver)
else:
raise BaseException("Invalid parameter as SOLVER. Check for typos")
# Result
for r in red_cells:
for g in green_cells:
if pulp.value(vars[r][g]) == 1:
print(f"{r} is assigned to {g}")
for r in red_groups:
for g in green_groups:
if pulp.value(vars_groups[r][g]) == 1:
print(f"{r} is assigned to {g}")
print(f"Total Cost: {pulp.value(prob.objective)}")
Having about 2500 edges gives a solution in under 0.5 seconds, i.e. my current solution works reasonably fast.
Marked in yellow with arrows are the “matchings” of red to green cells. Groupings in pairs are marked by having a line over the two.
Solution(not necessarily optimal) found by hand for a grid of 22x29 without considering pairs:
Solution(optimal) found by the ILP solver for a grid of 22x29 with pairings in consideration:
My question is: Is there a more efficient algorithm(which still finds the optimal assignment) that can handle the red-green cell pairing constraint and improve performance within the grid size limit compared to the ILP solution?
The people I've discussed the problem with are surprised that the ILP solution works in a reasonable amount of time(since ILPs are generally NP-hard).
This is my first question, feel free to correct my statement or suggest a proper mathematical representation of the ILP formulation.
Upvotes: 2
Views: 131
Reputation: 15308
cords
is spelled coords
LpVariable.matrix
over LpVariable.dicts
For example,
import typing
import numpy as np
import pandas as pd
import pulp
from matplotlib import pyplot as plt
def make_colour_frames(
coord_data: np.ndarray, name: typing.Literal['R', 'G'],
) -> tuple[pd.DataFrame, pd.DataFrame]:
coords = pd.DataFrame(
index=name + pd.RangeIndex(name='name', stop=len(coord_data)).astype(pd.StringDtype()),
columns=('y', 'x'), data=coord_data,
)
i, j = np.triu_indices(n=len(coords), k=1)
combos = pd.concat(
(coords.iloc[i].reset_index(), coords.iloc[j].reset_index()),
axis='columns', keys=('1', '2'),
)
yx = combos.loc[:, (slice(None), ['y', 'x'])]
predicates = (
(yx['2'] - yx['1']).abs() <= 1
).all(axis='columns')
combos = combos[predicates]
groups = pd.DataFrame(
data={
'name': combos[('1', 'name')] + '-' + combos[('2', 'name')],
'name1': combos[('1', 'name')],
'name2': combos[('2', 'name')],
'y1': combos[('1', 'y')],
'y2': combos[('2', 'y')],
'x1': combos[('1', 'x')],
'x2': combos[('2', 'x')],
'y': combos.loc[:, (slice(None), 'y')].mean(axis=1),
'x': combos.loc[:, (slice(None), 'x')].mean(axis=1),
},
).set_index('name')
return coords, groups
def get_norm(
combos: pd.DataFrame, norm: typing.Literal['euclidean', 'manhattan'],
) -> pd.Series:
diff = combos[['y_R', 'x_R']] - combos[['y_G', 'x_G']].values
match norm:
case 'euclidean':
return np.linalg.norm(diff, axis=1)
case 'manhattan':
return diff.abs().sum(axis=1)
case _:
raise ValueError('Invalid norm ' + norm)
def make_combos(
red: pd.DataFrame, green: pd.DataFrame, norm: typing.Literal['euclidean', 'manhattan'],
) -> pd.DataFrame:
combos = pd.merge(
left=red.reset_index(), right=green.reset_index(), how='cross',
suffixes=('_R', '_G'),
)
combos['dist'] = get_norm(combos, norm)
combos['name'] = combos['name_R'] + '-' + combos['name_G']
combos.set_index('name', inplace=True)
# Movement
combos['assign'] = pulp.LpVariable.matrix(
name='assign', indices=combos.index, cat=pulp.LpBinary,
)
return combos
def make_vars(
red_coord_data: np.ndarray,
green_coord_data: np.ndarray,
norm: typing.Literal['euclidean', 'manhattan'] = 'euclidean',
) -> tuple[
pd.DataFrame, # single combos
pd.DataFrame, # group combos
]:
# Generate RED groups and coordinates
red_coords, red_groups = make_colour_frames(red_coord_data, name='R')
# Generate GREEN groups and coordinates
green_coords, green_groups = make_colour_frames(green_coord_data, name='G')
# Edge weights for singular cells
single_combos = make_combos(red_coords, green_coords, norm)
# Edge weights for groups
group_combos = make_combos(red_groups, green_groups, norm)
return single_combos, group_combos
def get_cost(combos: pd.DataFrame) -> pulp.LpAffineExpression:
return pulp.lpDot(combos['dist'], combos['assign'])
def make_problem(
single_combos: pd.DataFrame, group_combos: pd.DataFrame,
) -> pulp.LpProblem:
prob = pulp.LpProblem(name='RedGreenAssignment', sense=pulp.LpMinimize)
# Objective function
## Singular movement & Group movement
prob.setObjective(get_cost(single_combos) + get_cost(group_combos))
return prob
def add_constraints(
prob: pulp.LpProblem,
single_combos: pd.DataFrame,
group_combos: pd.DataFrame,
) -> None:
# Each red cell is assigned individually or through a group exactly once
for name, for_name in single_combos.groupby('name_R'):
total = pulp.lpSum(for_name['assign']) + pulp.lpSum(
group_combos.loc[
(group_combos['name1_R'] == name) |
(group_combos['name2_R'] == name), 'assign',
]
)
prob.addConstraint(name='excl_red_' + name, constraint=total == 1)
# Each green cell is assigned individually or through a group at most once
for name, for_name in single_combos.groupby('name_G'):
total = pulp.lpSum(for_name['assign']) + pulp.lpSum(
group_combos.loc[
(group_combos['name1_G'] == name) |
(group_combos['name2_G'] == name), 'assign',
]
)
prob.addConstraint(name='excl_green_' + name, constraint=total <= 1)
def prune(df: pd.DataFrame) -> pd.DataFrame:
return df[
df['assign'].map(pulp.value) > 0.5
].drop(columns='assign')
def solve(
prob: pulp.LpProblem,
single_combos: pd.DataFrame,
group_combos: pd.DataFrame,
solver: str = pulp.LpSolverDefault,
):
prob.solve(solver=solver)
if prob.status != pulp.LpStatusOptimal:
raise ValueError(prob.status)
return prune(single_combos), prune(group_combos)
def dump(
prob: pulp.LpProblem,
single_combos: pd.DataFrame,
group_combos: pd.DataFrame,
) -> None:
# already shown in solver output
# print('Total Cost: ', prob.objective.value())
pd.options.display.max_columns = 20
pd.options.display.width = 200
cols = ['y_R', 'x_R', 'y_G', 'x_G', 'dist']
print(single_combos[cols])
print(group_combos[cols])
def graph(
red_coords: np.ndarray,
green_coords: np.ndarray,
single_combos: pd.DataFrame,
group_combos: pd.DataFrame,
) -> plt.Figure:
fig, ax = plt.subplots()
height = 1 + max(red_coords[:, 0].max(), green_coords[:, 0].max())
width = 1 + max(red_coords[:, 1].max(), green_coords[:, 1].max())
image = np.full(shape=(height, width, 3), fill_value=255, dtype=np.uint8)
source = red = 209, 77, 77
dest_used = green = 115, 218, 80
dest_unused = dark_green = 71, 142, 47
image[red_coords[:, 0], red_coords[:, 1]] = source
image[green_coords[:, 0], green_coords[:, 1]] = dest_unused
image[single_combos['y_G'], single_combos['x_G']] = dest_used
image[group_combos['y1_G'], group_combos['x1_G']] = dest_used
image[group_combos['y2_G'], group_combos['x2_G']] = dest_used
ax.imshow(image)
for name, row in single_combos.iterrows():
ax.arrow(
row['x_R'], row['y_R'],
row['x_G'] - row['x_R'], row['y_G'] - row['y_R'],
length_includes_head=True, head_width=0.5, head_length=0.8,
facecolor='yellow',
)
for name, row in group_combos.iterrows():
ax.fill(
row[['x1_R', 'x2_R', 'x2_G', 'x1_G']],
row[['y1_R', 'y2_R', 'y2_G', 'y1_G']],
edgecolor='black', facecolor='yellow', alpha=0.5,
)
return fig
def main() -> None:
red_coords = np.array((
(1, 8), (1, 9), (1, 19), (1, 20), (2, 8), (2, 9), (2, 19), (2, 20), (3, 9), (3, 19), (6, 9),
(6, 19), (7, 18), (8, 11), (8, 17), (9, 12), (9, 16), (11, 13), (11, 15), (12, 14),
))
green_coords = np.array((
(0, 6), (0, 21), (0, 22), (1, 6), (1, 23), (2, 5), (2, 6), (2, 23), (2, 24), (3, 4), (3, 5),
(3, 6), (3, 24), (3, 25), (4, 4), (4, 5), (4, 9), (4, 19), (4, 25), (5, 5), (5, 25), (6, 5),
(6, 7), (6, 21), (6, 25), (7, 4), (7, 5), (7, 25), (8, 4), (8, 25), (9, 4), (9, 6), (9, 22),
(9, 26), (10, 5), (10, 27), (11, 5), (11, 27), (12, 27), (14, 24), (17, 25), (19, 25),
))
single_combos, group_combos = make_vars(
red_coord_data=red_coords, green_coord_data=green_coords,
)
prob = make_problem(single_combos=single_combos, group_combos=group_combos)
add_constraints(
prob=prob, single_combos=single_combos, group_combos=group_combos,
)
single_combos, group_combos = solve(prob=prob, single_combos=single_combos, group_combos=group_combos)
dump(prob=prob, single_combos=single_combos, group_combos=group_combos)
graph(
red_coords=red_coords, green_coords=green_coords,
single_combos=single_combos, group_combos=group_combos,
)
plt.show()
if __name__ == '__main__':
main()
Result - Optimal solution found
Objective value: 52.33277240
Enumerated nodes: 0
Total iterations: 0
Time (CPU seconds): 0.05
Time (Wallclock seconds): 0.05
Option for printingOptions changed from normal to all
Total time (CPU seconds): 0.05 (Wallclock seconds): 0.05
y_R x_R y_G x_G dist
name
R8-G16 3 9 4 9 1.00000
R9-G17 3 19 4 19 1.00000
R10-G22 6 9 6 7 2.00000
R18-G32 11 15 9 22 7.28011
y_R x_R y_G x_G dist
name
R0-R1-G0-G3 1.0 8.5 0.5 6.0 2.549510
R2-R3-G1-G2 1.0 19.5 0.0 21.5 2.236068
R4-R5-G6-G11 2.0 8.5 2.5 6.0 2.549510
R6-R7-G4-G7 2.0 19.5 1.5 23.0 3.535534
R11-R12-G20-G24 6.5 18.5 5.5 25.0 6.576473
R13-R15-G21-G26 8.5 11.5 6.5 5.0 6.800735
R14-R16-G27-G29 8.5 16.5 7.5 25.0 8.558621
R17-R19-G31-G34 11.5 13.5 9.5 5.5 8.246211
Upvotes: 0