Reputation: 747
I'm trying to use groupby
function from itertools
library. For group 2 lists the next code work perfectly:
from itertools import groupby
from operator import itemgetter
date = ['2019/07/25', '2019/07/25', '2019/07/27', '2019/07/28', '2019/07/28', '2019/07/28', '2019/07/28', '2019/07/28']
count1 = [1, 3, 4, 0, 2, 0, 1, 1]
count2 = [2, 1, 3, 1, 1, 1, 0, 0]
def group_data(date, count):
group = []
for k, g in groupby(zip(date, count), itemgetter(0)):
group.append((k, sum(list(list(zip(*g))[1]))))
sorted(group)
return group
print(group_data(date, count1))
[('2019/07/25', 3), ('2019/07/27', 3), ('2019/07/28', 3)]
But how to rewrite it for 3 lists?
group_data(date, count1, count2)
should return:
[('2019/07/25', 3, 4), ('2019/07/27', 3, 4), ('2019/07/28', 3, 4)]
In other words I want to get the same result as implementing pandas
function groupby
but using itertools
and get a list of sets:
df = pd.DataFrame({'date':date,'count1':count1,'count2':count2})
df.groupby('date')['count1', 'count2'].sum()
date count count2
2019/07/25 4 3
2019/07/27 4 3
2019/07/28 4 3
Upvotes: 0
Views: 137
Reputation: 92854
For any number of lists:
from itertools import groupby
dates = ['2019/07/25', '2019/07/25', '2019/07/27', '2019/07/28', '2019/07/28', '2019/07/28', '2019/07/28', '2019/07/28']
count1 = [1, 3, 4, 0, 2, 0, 1, 1]
count2 = [2, 1, 3, 1, 1, 1, 0, 0]
count3 = [3, 2, 5, 1, 10, 3, 0, 1]
def sum_group_data(dates, *counts):
res = []
size = len(counts)
for k, g in groupby(zip(dates, *counts), key=lambda x: x[0]):
group = list(g)
if len(group) == 1:
res.append(group[0])
else:
res.append((group[0][0], *[sum(j[i+1] for j in group) for i in range(size)]))
return res
print(sum_group_data(dates, count1, count2, count3))
The output:
[('2019/07/25', 4, 3, 5), ('2019/07/27', 4, 3, 5), ('2019/07/28', 4, 3, 15)]
Upvotes: 1
Reputation: 848
If you just need it for 3 lists then this works:
def group_data(date, count1, count2):
group = []
for k, g in groupby(zip(date, count1, count2), itemgetter(0)):
g12 = list(zip(*g))
group.append((k, sum(list(g12[1])), sum(list(g12[2]))))
sorted(group)
return group
But I think it could be much simplier.
In case you need for n lists:
def group_data(date, *counts):
group = []
for k, g in groupby(zip(date, *counts), itemgetter(0)):
gzip = list(zip(*g))
group.append((k, *list((sum(l) for l in gzip[1:]))))
sorted(group)
return group
Upvotes: 2
Reputation: 550
you don't need itertools for this task. It can be simply done by using the zip function
date= ['2019/07/25', '2019/07/25', '2019/07/27', 2019/07/28','2019/07/28','2019/07/28', '2019/07/28', '2019/07/28']
count1 = [1, 3, 4, 0, 2, 0, 1, 1]
count2 = [2, 1, 3, 1, 1, 1, 0, 0]
print(zip(date,count1,count2)
Upvotes: -1