Reputation: 702
I have a list of dictionaries and I want to find the frequencies of items in the list. I simply used the collections.Counter()
but it throws an error saying:
unhashable type: 'dict'
My code:
for dep in dep_list:
dep_dict['parent'] = dep.split(',')[0]
dep_dict['parent_pos'] = dep.split(',')[1]
dep_dict['parent_dep'] = dep.split(',')[2]
dep_dict['child'] = dep.split(',')[3]
dep_dict['child_pos'] = dep.split(',')[4]
dep_dict['child_dep'] = dep.split(',')[5]
dep_dict['avl_sent'] = item['avl_sent']
dep_dict['avl_author_type'] = item['avl_author_type']
dep_dict['avl_brand_1'] = item['avl_brand_1']
final_list.append(dep_dict.copy())
counts = collections.Counter(final_list)
print counts
final_list
contents are exactly what I want. I just want the frequencies. I want to output the whole thing as a json finally.
Can someone help me with this?
EDIT
Sample output after print final_list
[{'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'dobj', 'child': u'event', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NNS', 'child_dep': u'dobj', 'child': u'emergingleaders', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'company', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'event', 'avl_sent': u'positive', 'parent_dep': u'dobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'networking', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'company', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'brewing', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'DT', 'child_dep': u'dobj', 'child': u'this', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'showdown', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'pobj', 'child': u'us', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'we', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'it', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'showdown', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'JJ', 'child_dep': u'amod', 'child': u'final', 'avl_brand_1': u'Virtua'}]
ERROR in prepare final output
unhashable type: 'dict'
Upvotes: 0
Views: 83
Reputation: 123413
You can get the frequencies of the items like this:
from collections import Counter
import json
final_list = [{'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'dobj', 'child': u'event', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NNS', 'child_dep': u'dobj', 'child': u'emergingleaders', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'company', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'event', 'avl_sent': u'positive', 'parent_dep': u'dobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'networking', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'company', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'brewing', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'DT', 'child_dep': u'dobj', 'child': u'this', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'showdown', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'pobj', 'child': u'us', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'we', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'it', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'showdown', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'JJ', 'child_dep': u'amod', 'child': u'final', 'avl_brand_1': u'Virtua'}]
counter = Counter()
for d in final_list:
counter.update(d.values())
print(json.dumps(counter, indent=4))
Output:
{
"VBD": 8,
"showdown": 2,
"it": 1,
"individual": 11,
"JJ": 1,
"DT": 1,
"amod": 1,
"event": 2,
"networking": 1,
"nn": 2,
"positive": 11,
"nsubj": 2,
"emergingleaders": 1,
"Virtua": 6,
"PRP": 3,
"Kennedy Health": 5,
"final": 1,
"do": 5,
"we": 1,
"get": 3,
"company": 2,
"brewing": 1,
"NN": 8,
"this": 1,
"NNS": 1,
"us": 1,
"dobj": 4,
"pobj": 5,
"root": 8
}
Upvotes: 0
Reputation: 73450
To elaborate on the namedtuple
mentioned in the comments:
from collections import namedtuple
mytuple = namedtuple('mytuple', (
'parent_pos', 'parent_dep', 'child', 'child_pos',
'child_dep', 'avl_sent', 'avl_author_type', 'avl_brand_1'
))
for dep in dep_list:
dep_tokens = dep.split(',')
dep_tuple = mytuple(
parent=dep_tokens[0],
parent_pos=dep_tokens[1],
parent_dep=dep_tokens[2],
child=dep_tokens[3],
child_pos=dep_tokens[4],
child_dep=dep_tokens[5],
avl_sent=item['avl_sent'],
avl_author_type=item['avl_author_type']
avl_brand_1=item['avl_brand_1']
)
final_list.append(dep_tuple)
counts = collections.Counter(final_list)
print counts
Upvotes: 1