alexng
alexng

Reputation: 45

How to do python itertools.combinations

I am trying to find out the list of possible products from all supplier combinations. Finding out either one in the combinations can produce the products

import itertools
import pandas as pd
import numpy as np

Column = {'ID':['1','2','3','4','5'],'Supplier 1':['B','B','A','B','B'],'Supplier 2':['A','NaN','B','NaN','A']}
df=pd.DataFrame(Column)
df

# Define all Supplier Columns
cols = [c for c in df.columns if "Supplier" in c]
# get unique suppliers
suppl = np.unique(np.concatenate([df[c].dropna() for c in cols]))
result = []
for sn in range(len(suppl)):
# generate combinations of suppliers
for combi in itertools.combinations(suppl, sn+1):
result.append({combi:......

From

ID  Supplier 1  Supplier 2
1    B          A
2    B          NaN
3    A          B
4    B          NaN
5    B          A

Desire(Either one of the supplier can produce):

Supplier    ID
A           1,3,5
B           1,2,3,4,5
A,B         1,2,3,4,5

NEW CODE:

from itertools import combinations, chain
import pandas as pd
import numpy as np

df = {'ID':['1','2','3','4','5'],'Supplier 1':['B','B','A','B','B'],'Supplier 2':['A',np.nan,'B',np.nan,'A']}
df=pd.DataFrame(Column)
from itertools import combinations, chain

g1 = df.groupby(['Supplier 1'])['ID'].apply(list)
g2 = df.groupby(['Supplier 2'])['ID'].apply(list)

res = (g1 + g2).to_dict()
res = [[','.join(comb), ','.join(sorted(set(chain.from_iterable([res[k] for k in comb]))))]
       for x in range(1, len(res) + 1) for comb in combinations(res.keys(), x)]

df2 = pd.DataFrame(res, columns=['Supplier', 'ID'])
print(df2)

Upvotes: 3

Views: 305

Answers (1)

deadshot
deadshot

Reputation: 9071

Not an efficient solution but this will work

from itertools import combinations, chain

g1 = df.groupby(['Supplier 1'])['ID'].apply(list)
g2 = df.groupby(['Supplier 2'])['ID'].apply(list)

res = (g1 + g2).to_dict()
res = [[','.join(comb), ','.join(sorted(set(chain.from_iterable([res[k] for k in comb]))))]
       for x in range(1, len(res) + 1) for comb in combinations(res.keys(), x)]

df2 = pd.DataFrame(res, columns=['Supplier', 'ID'])
print(df2)

Output:

  Supplier         ID
0        A      1,3,5
1        B  1,2,3,4,5
2      A,B  1,2,3,4,5

Upvotes: 1

Related Questions