Reputation: 447
I have
Sample
import pandas as pd
spots = {
0: {'a': 3, 'b': 3},
1: {'a': 3, 'b': 3},
2: {'a': 1},
3: {'a': 3, 'b': 3},
4: {'a': 4, 'b': 3},
}
names = {
0: {'a': ['John', 'Claire', 'Billy'], 'b': ['Paul']},
1: {'a': ['John', 'Billy', 'Claire']},
2: {'a': ['Billy']},
3: {'a': ['Claire', 'Billy'], 'b': ['Paul', 'Peter']},
4: {'a': ['Anna', 'Claire', 'Billy'], 'b': ['Peter']},
}
I would like to sort names in the lists so that the position is the same whenever possible (e.g. Billy is available on all days and must therefore be put in the first position, unlike Anna who is available on the least amount of days and must be put at the end of names)
Expected outcome
output = {
0: {'a': ['Billy', 'Claire', 'John'], 'b': ['Paul', '', '']},
1: {'a': ['Billy', 'Claire', 'John'], 'b': ['', '', '']},
2: {'a': ['Billy']},
3: {'a': ['Billy', 'Claire'], 'b': ['Paul', 'Peter', '']},
4: {'a': ['Billy', 'Claire', 'Anna', ''], 'b': ['Peter', '', '']},
}
Own solution
def name_per_category(names):
unique_names = {'a': set(), 'b': set()}
for key in names:
for category in names[key]:
unique_names[category].update(names[key][category])
return {category: sorted(unique_names[category]) for category in unique_names}
def sort_names(spots, names):
output = {}
sorted_names = name_per_category(names)
for key in spots:
output[key] = {}
for category in spots[key]:
sorted_list = [''] * spots[key][category]
if key in names and category in names[key]:
for name in names[key][category]:
# Find the index for each name
index = sorted_names[category].index(name)
print(index, name)
sorted_list[index-1] = name
output[key][category] = sorted_list
return output
output = sort_names(spots, names)
Wrong output
{0: {'a': ['Billy', 'Claire', 'John'], 'b': ['', '', 'Paul']},
1: {'a': ['Billy', 'Claire', 'John'], 'b': ['', '', '']},
2: {'a': ['Billy']},
3: {'a': ['Billy', 'Claire', ''], 'b': ['Peter', '', 'Paul']},
4: {'a': ['Billy', 'Claire', '', 'Anna'], 'b': ['Peter', '', '']}}
Other than wrong outcome, my logic seems quite heavy. Is there better way to reason about this type of problem?
Upvotes: 1
Views: 49
Reputation: 15525
This problem can be formulated as integer linear programming.
Then define binary decision variables X_(i,j) and Y_(i,C):
Call M a positive integer constant bigger than or equal to the maximum number of elements in a column. For instance you can set M = number of days. M will be used for a big-M constraint.
This gives us the following integer linear program:
OBJECTIVE
MINIMIZE sum_i sum_C Y_(i,C) // for each worker i, minimize the number of columns in which i works at least once
CONSTRAINTS
forall position P:
if i in names[P]: sum_(j in P) X_(i,j) == 1 // worker i must be assigned once to position P
else: sum_(j in P) X_(i,j) == 0 // worker i is not available for position P
forall worker i, forall column C:
Y_(i,C) * M >= sum_(j in C) X_(i,j) // Y_(i,C) is 1 iff worker i works at least once in column C
DECISION VARIABLES
forall i, forall j: X_(i,j) in {0,1}
forall i, forall C: Y_(i,C) in {0,1}
Implementation in python with pulp:
spots_dict = {
0: {'a': 3, 'b': 3},
1: {'a': 3, 'b': 3},
2: {'a': 1},
3: {'a': 3, 'b': 3},
4: {'a': 4, 'b': 3},
}
names_dict = {
0: {'a': ['John', 'Claire', 'Billy'], 'b': ['Paul']},
1: {'a': ['John', 'Billy', 'Claire']},
2: {'a': ['Billy']},
3: {'a': ['Claire', 'Billy'], 'b': ['Paul', 'Peter']},
4: {'a': ['Anna', 'Claire', 'Billy'], 'b': ['Peter']},
}
# LISTS AND DICTS OF NAMES OF WORKERS, SPOTS, POSITIONS AND COLUMNS
worker_names = sorted(set().union(*(position_v for day_v in names_dict.values() for position_v in day_v.values())))
# workers = ['Anna', 'Billy', 'Claire', 'John', 'Paul', 'Peter']
spot_names = sorted(
'.'.join((str(day_k), position_k, str(spot_k)))
for day_k,day_v in spots_dict.items()
for position_k,position_v in day_v.items()
for spot_k in range(position_v)
)
# spot_names = ['0.a.0', '0.a.1', '0.a.2', '0.b.0', '0.b.1', '0.b.2', '1.a.0', '1.a.1', '1.a.2', '1.b.0', '1.b.1', '1.b.2', '2.a.0', '3.a.0', '3.a.1', '3.a.2', '3.b.0', '3.b.1', '3.b.2', '4.a.0', '4.a.1', '4.a.2', '4.a.3', '4.b.0', '4.b.1', '4.b.2']
position_names = sorted(
'.'.join((str(day_k), position_k))
for day_k,day_v in spots_dict.items()
for position_k in day_v.keys()
)
# positions = ['0.a', '0.b', '1.a', '1.b', '2.a', '3.a', '3.b', '4.a', '4.b']
all_position_keys = sorted(set().union(*(day_v.keys() for day_v in spots_dict.values())))
# all_position_keys = ['a', 'b']
max_spots = {
k: max(day_v.get(k,0) for day_v in spots_dict.values())
for k in all_position_keys
}
# max_spots = { 'a': 4, 'b': 3 }
column_names = sorted('.'.join((position_k, str(spot_k))) for position_k in all_position_keys for spot_k in range(max_spots[position_k]))
# column_names = ['a.0', 'a.1', 'a.2', 'a.3', 'b.0', 'b.1', 'b.2']
spots_in_column = {}
for j in spot_names:
column = j.split('.',maxsplit=1)[1]
spots_in_column.setdefault(column, []).append(j)
# spots_in_column = {'a.0': ['0.a.0', '1.a.0', '2.a.0', '3.a.0', '4.a.0'], 'a.1': ['0.a.1', '1.a.1', '3.a.1', '4.a.1'], 'a.2': ['0.a.2', '1.a.2', '3.a.2', '4.a.2'], 'b.0': ['0.b.0', '1.b.0', '3.b.0', '4.b.0'], 'b.1': ['0.b.1', '1.b.1', '3.b.1', '4.b.1'], 'b.2': ['0.b.2', '1.b.2', '3.b.2', '4.b.2'], 'a.3': ['4.a.3']}
from pulp import LpMinimize, LpProblem, LpStatus, lpSum, LpVariable
model = LpProblem(name="repeated-assignment", sense=LpMinimize)
# VARIABLES
X = LpVariable.dicts('X', (worker_names, spot_names), lowBound=0, upBound=1, cat='Integer')
Y = LpVariable.dicts('Y', (worker_names, column_names), lowBound=0, upBound=1, cat='Integer')
# OBJECTIVE
model += lpSum(Y[i][c] for i in worker_names for c in column_names)
# CONSTANT
M = len(spots_dict) # = number of days = upper bound on the height of a column
# CONSTRAINTS
for i in worker_names:
for p in position_names:
day_k, position_k = p.split('.')
day_k = int(day_k)
n_times_assigned_expr = lpSum(X[i][f'{p}.{spot_k}'] for spot_k in range(spots_dict[day_k].get(position_k,0)))
n_times_assigned_target = int(i in names_dict[day_k].get(position_k,[]))
constraint_name = f'Worker {i} assigned {n_times_assigned_target} time to position {p}'
model += (n_times_assigned_expr == n_times_assigned_target, constraint_name)
for i in worker_names:
for c in column_names:
constraint_name = f'Y[{i},{c}] is 1 iff {i} is assigned at least once in column {c}'
model += (Y[i][c] * M >= lpSum(X[i][j] for j in spots_in_column[c]), constraint_name)
print(model)
# SOLVE
status = model.solve()
# DISPLAY RESULTS
print('Assignments:')
for var in model.variables():
if var.value() == 1:
print(var.name, end=', ')
print()
Output:
[...]
Result - Optimal solution found
Objective value: 6.00000000
[...]
Assignments:
X_Anna_4.a.3, X_Billy_0.a.0, X_Billy_1.a.0, X_Billy_2.a.0, X_Billy_3.a.0, X_Billy_4.a.0, X_Claire_0.a.2, X_Claire_1.a.2, X_Claire_3.a.2, X_Claire_4.a.2, X_John_0.a.0, X_John_1.a.0, X_Paul_0.b.0, X_Paul_3.b.0, X_Peter_3.b.0, X_Peter_4.b.0,
Y_Anna_a.3, Y_Billy_a.0, Y_Claire_a.2, Y_John_a.0, Y_Paul_b.0, Y_Peter_b.0,
Upvotes: 1