Reputation: 111
Given a set of 140 options, my goal is to select the set of options that minimize the objective function and acheive a constraint that weight must be > 5584 units.
Criteria 1 is the objective function to minimize.
The thing I didn't expect and do not understand is why when I reduce the option set to only a smaller subset of options, lp() finds me a more optimal solution.
Note that I do not have a strong math background, so don't understand well the math underlying it. But, I had understood that linear programming would find me the best solution available every time.
Here is a reproducible example:
#First define the FULL option set of 140 options, each with a weight and a criteria 1.
DFToOptimizeFull <- structure(list(item_ID = c("Option1", "Option2", "Option3", "Option4",
"Option5", "Option6", "Option7", "Option8", "Option9", "Option10",
"Option11", "Option12", "Option13", "Option14", "Option15", "Option16",
"Option17", "Option18", "Option19", "Option20", "Option21", "Option22",
"Option23", "Option24", "Option25", "Option26", "Option27", "Option28",
"Option29", "Option30", "Option31", "Option32", "Option33", "Option34",
"Option35", "Option36", "Option37", "Option38", "Option39", "Option40",
"Option41", "Option42", "Option43", "Option44", "Option45", "Option46",
"Option47", "Option48", "Option49", "Option50", "Option51", "Option52",
"Option53", "Option54", "Option55", "Option56", "Option57", "Option58",
"Option59", "Option60", "Option61", "Option62", "Option63", "Option64",
"Option65", "Option66", "Option67", "Option68", "Option69", "Option70",
"Option71", "Option72", "Option73", "Option74", "Option75", "Option76",
"Option77", "Option78", "Option79", "Option80", "Option81", "Option82",
"Option83", "Option84", "Option85", "Option86", "Option87", "Option88",
"Option89", "Option90", "Option91", "Option92", "Option93", "Option94",
"Option95", "Option96", "Option97", "Option98", "Option99", "Option100",
"Option101", "Option102", "Option103", "Option104", "Option105",
"Option106", "Option107", "Option108", "Option109", "Option110",
"Option111", "Option112", "Option113", "Option114", "Option115",
"Option116", "Option117", "Option118", "Option119"), criteria_1 = c(0.19382357870646,
0.406093545012686, 0.26444538198799, 0.26601032196915, 0.582727750839455,
0.610218171520381, 0.278839154130233, 0.121920371729594, 0.394236226297326,
0.377841864658525, 0.120477145449607, 0.26254096134174, 0.320858353224026,
0.157895381699022, 0.367337498338429, 0.178897294673147, 0.206447474122835,
0.125249409058597, 0.0964062671575285, 0.155883926971779, 0.745589005311248,
0.271814782493108, 1, 0.709925503817279, 0.283967774188142, 0.465574672453751,
0.747720902276497, 0.261597766848332, 0.223547497818285, 0.312169636303741,
0.361688040733056, 0.33135717134122, 0.720315669627635, 0.597242543157505,
0.793399950297349, 0.422337180472638, 0.319528402753296, 0.430606430135989,
0.397399279889498, 0.343212756243173, 0.330215166243809, 0.266929589837542,
0.341647931849574, 0.337592426703038, 0.42473989909206, 0.308525738460027,
0.424706956637327, 0.357390032884661, 0.280438539204411, 0.409378774656271,
0.346474174849303, 0.791793745557103, 0.450584584087061, 0.262056880638506,
0.311923434799947, 0.448835397534517, 0.330430390281398, 0.141640071895463,
0.1929124019673, 0.176881794381289, 0.379488987395177, 0.741791953949916,
0.0370066289465928, 0.0768869958215097, 0.147257049396344, 0.570100734558947,
0.36424969224812, 0.254741112761445, 0.339564812834843, 0.305964896057886,
0.286239647689116, 0.326383438614336, 0.458683804448965, 0.567810482635859,
0.235202885065509, 0.729063336203758, 0.341599616249299, 0.153703714406256,
0.117457767195094, 0.134082379254345, 0.558969536898439, 0.286059793445029,
0.322705673615406, 0.396612822128083, 0.100616428459969, 0.156259239780615,
0.232564021060054, 0.104164041865814, 0.210723751509863, 0.192397806148102,
0.166759676123656, 0.310175982060811, 0.200003698801935, 0.193767171976952,
0.137411647758468, 0.369825867340157, 0.429786451982038, 0.202788665483821,
0.786777129845286, 0.321634986042802, 0.496148738072809, 0.170282669379121,
0.140613654358518, 0.155074004935589, 0.106219651041155, 0.121895751579215,
0.144999508752868, 0.278420842748903, 0.61995781054043, 0.860520490784782,
0.860520490784782, 0.894125146651717, 0.894125146651717, 0.894125146651717,
0.443148836322235, 0.120796860641858, 0.0224245646683504, 0.223040877540759,
0.195655525952297), itemWeight = c(151, 482, 309, 323, 648, 766,
309, 29, 385, 232, 114, 104, 326, 172, 69, 133, 130, 107, 222,
118, 657, 152, 1150, 723, 339, 516, 1126, 300, 247, 372, 286,
380, 935, 849, 1239, 571, 344, 639, 640, 476, 456, 336, 563,
536, 682, 463, 477, 518, 350, 695, 384, 953, 457, 284, 345, 549,
418, 196, 276, 217, 569, 657, 37, 78, 151, 613, 392, 280, 389,
320, 332, 366, 584, 619, 258, 903, 349, 160, 124, 141, 671, 282,
333, 428, 97, 57, 251, 62, 214, 194, 167, 316, 198, 231, 135,
345, 514, 188, 787, 359, 522, 168, 128, 165, 114, 129, 131, 92,
374, 307, 307, 307, 307, 307, 86, 73, 7, 109, 208)), row.names = c(NA,
-119L), class = c("tbl_df", "tbl", "data.frame"))
#Minimize criteria 1 on the full set
minimizedC_1FullSet <- lp(direction = "min",
objective.in =
DFToOptimizeFull$criteria_1,
const.mat = matrix(
DFToOptimizeFull$itemWeight, nrow = 1),
const.dir = ">=", #firm energy must be greater than or equal to
const.rhs = 5584, #the energy target
all.bin = TRUE)
minimizedC_1FullSet
#I get 3.548285.
#--
#Now lets solve the exact same problem but on only a select number of options. Not the full set.
DFToOptimizeSelect <- DFToOptimizeFull %>% filter(item_ID %in% c( "Option19", "Option27", "Option35",
"Option39", "Option43", "Option45",
"Option50", "Option57"))
minimizedC_1Select <- lp(direction = "min",
objective.in =
DFToOptimizeSelect$criteria_1,
const.mat = matrix(
DFToOptimizeSelect$itemWeight, nrow = 1),
const.dir = ">=", #firm energy must be greater than or equal to
const.rhs = 5584, #the energy target
all.bin = TRUE)
minimizedC_1Select
#I get 3.541123, which is 0.007162 less than I got on the full set.
This may seem like splitting hairs, but it causes me to lose confidence that lp() is finding me the optimal solution.
Please help me understand why this is possible??? This optimization is a key analysis in my post-doc, and I need to be confident the optimization is working properly. Could it be a bug in the lp() function?
Upvotes: 0
Views: 63
Reputation: 15211
Given
import pandas as pd
import pulp
item_ID = pd.RangeIndex(name='item_ID', start=1, stop=120)
df = pd.DataFrame(
index=item_ID,
data={
'criteria_1': (
0.19382357870646,
0.406093545012686, 0.26444538198799, 0.26601032196915, 0.582727750839455,
0.610218171520381, 0.278839154130233, 0.121920371729594, 0.394236226297326,
0.377841864658525, 0.120477145449607, 0.26254096134174, 0.320858353224026,
0.157895381699022, 0.367337498338429, 0.178897294673147, 0.206447474122835,
0.125249409058597, 0.0964062671575285, 0.155883926971779, 0.745589005311248,
0.271814782493108, 1, 0.709925503817279, 0.283967774188142, 0.465574672453751,
0.747720902276497, 0.261597766848332, 0.223547497818285, 0.312169636303741,
0.361688040733056, 0.33135717134122, 0.720315669627635, 0.597242543157505,
0.793399950297349, 0.422337180472638, 0.319528402753296, 0.430606430135989,
0.397399279889498, 0.343212756243173, 0.330215166243809, 0.266929589837542,
0.341647931849574, 0.337592426703038, 0.42473989909206, 0.308525738460027,
0.424706956637327, 0.357390032884661, 0.280438539204411, 0.409378774656271,
0.346474174849303, 0.791793745557103, 0.450584584087061, 0.262056880638506,
0.311923434799947, 0.448835397534517, 0.330430390281398, 0.141640071895463,
0.1929124019673, 0.176881794381289, 0.379488987395177, 0.741791953949916,
0.0370066289465928, 0.0768869958215097, 0.147257049396344, 0.570100734558947,
0.36424969224812, 0.254741112761445, 0.339564812834843, 0.305964896057886,
0.286239647689116, 0.326383438614336, 0.458683804448965, 0.567810482635859,
0.235202885065509, 0.729063336203758, 0.341599616249299, 0.153703714406256,
0.117457767195094, 0.134082379254345, 0.558969536898439, 0.286059793445029,
0.322705673615406, 0.396612822128083, 0.100616428459969, 0.156259239780615,
0.232564021060054, 0.104164041865814, 0.210723751509863, 0.192397806148102,
0.166759676123656, 0.310175982060811, 0.200003698801935, 0.193767171976952,
0.137411647758468, 0.369825867340157, 0.429786451982038, 0.202788665483821,
0.786777129845286, 0.321634986042802, 0.496148738072809, 0.170282669379121,
0.140613654358518, 0.155074004935589, 0.106219651041155, 0.121895751579215,
0.144999508752868, 0.278420842748903, 0.61995781054043, 0.860520490784782,
0.860520490784782, 0.894125146651717, 0.894125146651717, 0.894125146651717,
0.443148836322235, 0.120796860641858, 0.0224245646683504, 0.223040877540759,
0.195655525952297,
),
'criteria_2': (
0.17641981214023, 0.240848545191324,
0.20835265524648, 0.248767529787931, 0.451793498520131, 0.381683037411269,
0.241976585373557, 0.116782623551492, 0.20673566370936, 0.286801382192965,
0.070652688912388, 0.364572800178567, 0.269242816166067, 0.1189751136006,
0.323905107166701, 0.193872198256378, 0.147740819150117, 0.0860132098261335,
0.0580418616854518, 0.109446297259946, 0.500647366537576, 0.180393459509617,
0.653491503844082, 0.472660220201084, 0.163162249237616, 0.283984633127125,
0.415992733266123, 0.164994074648886, 0.167340730610985, 0.268800486663044,
0.32576183372859, 0.187746554917954, 0.513868883897277, 0.407993976147579,
0.520823834574429, 0.346647590611749, 0.249144414555891, 0.36143709685635,
0.275195240878429, 0.210744193768878, 0.276940989909035, 0.213850998663282,
0.189043772333223, 0.256031575641972, 0.267888778202805, 0.299825443704003,
0.232862594751094, 0.293319667536792, 0.165555116687722, 0.249950801232123,
0.185427153206608, 0.450646068867545, 0.283324056730209, 0.215590084378473,
0.168950797268736, 0.305193207638845, 0.190241720123152, 0.168636009700241,
0.140235480483511, 0.116295393379208, 0.254990237458096, 0.534546957175079,
0.0342844548026959, 0.147617877226165, 0.0856553598188811, 0.360464565082389,
0.26523128883826, 0.157202592030299, 0.189919356942208, 0.16741285463652,
0.184588573334462, 0.194498543981115, 0.290262033461934, 0.376277472517838,
0.167260290450748, 0.494939091832352, 0.312304468360028, 0.125687206435128,
0.124559809278627, 0.10118904396013, 0.436260473197599, 0.268812179666543,
0.28390152035242, 0.319129746519373, 0.0887131242082749, 0.0936087834963992,
0.146028350082448, 0.0852325359256434, 0.141593369478712, 0.142175267274303,
0.119637448691408, 0.213864087532372, 0.143729711242536, 0.240836706142566,
0.0857609795196696, 0.284704814576988, 0.321668930739399, 0.178845649197193,
0.893388564922643, 0.22274883820528, 0.43912198342029, 0.174430954851812,
0.118927528091718, 0.100615369508259, 0.0691456056815229, 0.0878417595367306,
0.133033845100036, 0.262412843717465, 0.320020039396849, 0.440301379519025,
0.440301379519025, 0.705078014946413, 0.705078014946413, 0.705078014946413,
0.478037536193209, 0.0604065918539912, 0.0112204438672376, 0.117238408833876,
0.131639362146937,
),
'itemWeight': (
151, 482, 309, 323, 648, 766,
309, 29, 385, 232, 114, 104, 326, 172, 69, 133, 130, 107, 222,
118, 657, 152, 1150, 723, 339, 516, 1126, 300, 247, 372, 286,
380, 935, 849, 1239, 571, 344, 639, 640, 476, 456, 336, 563,
536, 682, 463, 477, 518, 350, 695, 384, 953, 457, 284, 345, 549,
418, 196, 276, 217, 569, 657, 37, 78, 151, 613, 392, 280, 389,
320, 332, 366, 584, 619, 258, 903, 349, 160, 124, 141, 671, 282,
333, 428, 97, 57, 251, 62, 214, 194, 167, 316, 198, 231, 135,
345, 514, 188, 787, 359, 522, 168, 128, 165, 114, 129, 131, 92,
374, 307, 307, 307, 307, 307, 86, 73, 7, 109, 208,
),
'select': pulp.LpVariable.matrix(
name='select', indices=item_ID, cat=pulp.LpBinary,
),
},
)
prob = pulp.LpProblem(name='backpack', sense=pulp.LpMinimize)
firm_energy = pulp.lpDot(df['itemWeight'], df['select'])
prob.addConstraint(
name='min_firm_energy', constraint=firm_energy >= 5584,
)
the first solution is
# Minimize criteria 1 on the full set
prob.setObjective(pulp.lpDot(df['criteria_1'], df['select']))
prob.solve()
assert prob.status == pulp.LpStatusOptimal
firm_energy = firm_energy.value()
df['select'] = df['select'].apply(pulp.value)
df = df.loc[df['select'] > 0.5, ['criteria_1', 'criteria_2', 'itemWeight']]
print(df)
Objective value: 3.48857926
Enumerated nodes: 0
Total iterations: 42
Time (CPU seconds): 0.03
Time (Wallclock seconds): 0.03
Option for printingOptions changed from normal to all
Total time (CPU seconds): 0.03 (Wallclock seconds): 0.03
criteria_1 criteria_2 itemWeight
item_ID
19 0.096406 0.058042 222
35 0.793400 0.520824 1239
39 0.397399 0.275195 640
43 0.341648 0.189044 563
44 0.337592 0.256032 536
45 0.424740 0.267889 682
46 0.308526 0.299825 463
50 0.409379 0.249951 695
61 0.379489 0.254990 569
and the second solution is
Objective value: 2.16717874
Enumerated nodes: 0
Total iterations: 0
Time (CPU seconds): 0.01
Time (Wallclock seconds): 0.01
Option for printingOptions changed from normal to all
Total time (CPU seconds): 0.02 (Wallclock seconds): 0.02
criteria_1 criteria_2 itemWeight
item_ID
19 0.096406 0.058042 222
27 0.747721 0.415993 1126
35 0.793400 0.520824 1239
39 0.397399 0.275195 640
43 0.341648 0.189044 563
45 0.424740 0.267889 682
50 0.409379 0.249951 695
57 0.330430 0.190242 418
It's fully expected that the two solutions be different given different objectives.
Upvotes: 0