Reputation: 509
In above image I want to find the count of a number which is repeated in 'Source' column and also want it'svalue
Expected output:-
Source Count Destination
99 2 [81,230]
2997 4 [153,238,246,338]
2183 3 [204,266,332]
Upvotes: 0
Views: 91
Reputation: 5601
def gen_df():
df_str = '''
Source Count Destination
99 2 [81,230]
2997 4 [153,238,246,338]
2183 3 [204,266,332]
'''
df = pd.read_csv(io.StringIO(df_str.strip()), sep='\s+', index_col=False)
# df.info()
df['Destination'] = df['Destination'].map(eval)
# df
df_raw = df.explode('Destination')[['Source', 'Destination']].sort_values('Destination')
df_raw.reset_index(drop=True, inplace=True)
return df_raw
df_raw = gen_df()
print(df_raw)
# Source Destination
# 0 99 81
# 1 2997 153
# 2 2183 204
# 3 99 230
# 4 2997 238
# 5 2997 246
# 6 2183 266
# 7 2183 332
# 8 2997 338
df_raw['count'] = 1
dfz = df_raw.groupby('Source').aggregate({'count':'sum', 'Destination': list })
print(dfz.reset_index())
# Source count Destination
# 0 99 2 [81, 230]
# 1 2183 3 [204, 266, 332]
# 2 2997 4 [153, 238, 246, 338]
Upvotes: 1
Reputation: 2108
df.groupby("Source").apply(lambda df: pd.Series({"Count": len(df),
"Destination": df["Destination"].values})).reset_index()
Upvotes: 3