Reputation: 5854
How do I sum duplicate elements in a list of lists of dictionaries?
Sample list:
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
Expected output:
op = [
[
{'user': 1, 'rating': 20},
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 30},
],
]
Upvotes: 10
Views: 864
Reputation: 71689
You can try:
from itertools import groupby
result = []
for lst in data:
sublist = sorted(lst, key=lambda d: d['user'])
grouped = groupby(sublist, key=lambda d: d['user'])
result.append([
{'user': name, 'rating': sum([d['rating'] for d in group])}
for name, group in grouped])
# Sort the `result` `rating` wise:
result = [sorted(sub, key=lambda d: d['rating']) for sub in result]
# %%timeit
# 7.54 µs ± 220 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
UPDATE (A more efficient solution):
result = []
for lst in data:
visited = {}
for d in lst:
if d['user'] in visited:
visited[d['user']]['rating'] += d['rating']
else:
visited[d['user']] = d
result.append(sorted(visited.values(), key=lambda d: d['rating']))
# %% timeit
# 2.5 µs ± 54 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Result:
# print(result)
[
[
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10},
{'user': 1, 'rating': 20}
],
[
{'user': 4, 'rating': 4},
{'user': 1, 'rating': 30},
{'user': 2, 'rating': 80}
]
]
Upvotes: 4
Reputation: 18106
Python list comprehension:
from collections import Counter
x = [[
{'user': x[0], 'rating': x[1]} for x in
Counter({d['user']: d['rating'] for d in group}).most_common()] for group in data
]
Output:
[
[
{
"rating": 20,
"user": 1
},
{
"rating": 10,
"user": 2
},
{
"rating": 10,
"user": 3
}
],
[
{
"rating": 80,
"user": 2
},
{
"rating": 10,
"user": 1
},
{
"rating": 4,
"user": 4
}
]
]
Upvotes: 0
Reputation: 12624
op = []
for lst in data:
rating_of_user = {}
for e in lst:
user, rating = e['user'], e['rating']
rating_of_user[user] = rating_of_user.get(user, 0) + rating
op.append([{'user': u, 'rating': r} for u, r in rating_of_user.items()])
N.B.: since Python 3.7 dictionaries officially preserve the insertion order
Upvotes: 2
Reputation: 44434
Sorting should be avoided because each item can be processed in a single pass. Any hash based technique should be better.
Here's an alternate solution, that uses a defaultdict instead of expensive sort/groupby or pandas.
from collections import defaultdict
from functools import reduce
def reduce_func(state, item):
new_obj = {
"user": item["user"],
"rating": state[item["user"]]["rating"] + item["rating"]}
}
state[item["user"]] = new_obj
return state
output = [list(reduce(reduce_func, elem, defaultdict(lambda: {"rating": 0})).values())
for elem in data]
Upvotes: 0
Reputation: 13049
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
keyname = "user"
all = []
for row in data:
row_out = []
for d in row:
key = d[keyname]
for d2 in row_out:
if d2[keyname] == d[keyname]:
break
else:
d2 = {keyname: key}
row_out.append(d2)
for k, v in d.items():
if k == keyname:
continue
d2[k] = d2.get(k, 0) + v
all.append(row_out)
print(all)
gives:
[[{'user': 1, 'rating': 20}, {'user': 2, 'rating': 10}, {'user': 3, 'rating': 10}], [{'user': 4, 'rating': 4}, {'user': 2, 'rating': 80}, {'user': 1, 'rating': 30}]]
Upvotes: 0
Reputation: 168
This should work:
from collections import defaultdict
data_without_duplicates = []
for l in data:
users_ratings = defaultdict(int)
for d in l:
users_ratings[d["user"]] += d["rating"]
data_without_duplicates.append(
[{"user": user, "rating": rating} for user, rating in users_ratings.items()]
)
Upvotes: 1
Reputation: 389
import pprint
data = [
[
{'user': 1, 'rating': 0},
{'user': 2, 'rating': 10},
{'user': 1, 'rating': 20},
{'user': 3, 'rating': 10}
],
[
{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 20},
{'user': 1, 'rating': 10}
],
]
def find(user, l):
for i, d in enumerate(l):
if user == d['user']:
return i
return -1
data_sum = []
for l in data:
list_sum = []
for d in l:
idx = find(d['user'], list_sum)
if idx == -1:
list_sum.append(d)
else:
list_sum[idx]['rating'] += d['rating']
data_sum.append(list_sum)
pprint.pprint(data_sum)
Upvotes: 0
Reputation: 78650
With pandas
:
>>> import pandas as pd
>>> [pd.DataFrame(dicts).groupby('user', as_index=False, sort=False).sum().to_dict(orient='records') for dicts in data]
[[{'user': 1, 'rating': 20},
{'user': 2, 'rating': 10},
{'user': 3, 'rating': 10}],
[{'user': 4, 'rating': 4},
{'user': 2, 'rating': 80},
{'user': 1, 'rating': 30}]]
Upvotes: 5