kskp
kskp

Reputation: 702

Find item frequency in a list of dictionaries

I have a list of dictionaries and I want to find the frequencies of items in the list. I simply used the collections.Counter() but it throws an error saying:

unhashable type: 'dict'

My code:

for dep in dep_list:
    dep_dict['parent'] = dep.split(',')[0]
    dep_dict['parent_pos'] = dep.split(',')[1]
    dep_dict['parent_dep'] = dep.split(',')[2]
    dep_dict['child'] = dep.split(',')[3]
    dep_dict['child_pos'] = dep.split(',')[4]
    dep_dict['child_dep'] = dep.split(',')[5]
    dep_dict['avl_sent'] = item['avl_sent']
    dep_dict['avl_author_type'] = item['avl_author_type']
    dep_dict['avl_brand_1'] = item['avl_brand_1']
    final_list.append(dep_dict.copy())
counts = collections.Counter(final_list)
print counts

final_list contents are exactly what I want. I just want the frequencies. I want to output the whole thing as a json finally.

Can someone help me with this?

EDIT

Sample output after print final_list

[{'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'dobj', 'child': u'event', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NNS', 'child_dep': u'dobj', 'child': u'emergingleaders', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'company', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'event', 'avl_sent': u'positive', 'parent_dep': u'dobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'networking', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'company', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'brewing', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'DT', 'child_dep': u'dobj', 'child': u'this', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'showdown', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'pobj', 'child': u'us', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'we', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'it', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'showdown', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'JJ', 'child_dep': u'amod', 'child': u'final', 'avl_brand_1': u'Virtua'}]

ERROR in prepare final output
unhashable type: 'dict'

Upvotes: 0

Views: 83

Answers (2)

martineau
martineau

Reputation: 123413

You can get the frequencies of the items like this:

from collections import Counter
import json

final_list = [{'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'dobj', 'child': u'event', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NNS', 'child_dep': u'dobj', 'child': u'emergingleaders', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'get', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'company', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'event', 'avl_sent': u'positive', 'parent_dep': u'dobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'networking', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'company', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'NN', 'child_dep': u'nn', 'child': u'brewing', 'avl_brand_1': u'Kennedy Health'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'DT', 'child_dep': u'dobj', 'child': u'this', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'NN', 'child_dep': u'pobj', 'child': u'showdown', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'pobj', 'child': u'us', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'we', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'VBD', 'avl_author_type': u'individual', 'parent': u'do', 'avl_sent': u'positive', 'parent_dep': u'root', 'child_pos': u'PRP', 'child_dep': u'nsubj', 'child': u'it', 'avl_brand_1': u'Virtua'}, {'parent_pos': u'NN', 'avl_author_type': u'individual', 'parent': u'showdown', 'avl_sent': u'positive', 'parent_dep': u'pobj', 'child_pos': u'JJ', 'child_dep': u'amod', 'child': u'final', 'avl_brand_1': u'Virtua'}]

counter = Counter()
for d in final_list:
    counter.update(d.values())

print(json.dumps(counter, indent=4))

Output:

{
    "VBD": 8,
    "showdown": 2,
    "it": 1,
    "individual": 11,
    "JJ": 1,
    "DT": 1,
    "amod": 1,
    "event": 2,
    "networking": 1,
    "nn": 2,
    "positive": 11,
    "nsubj": 2,
    "emergingleaders": 1,
    "Virtua": 6,
    "PRP": 3,
    "Kennedy Health": 5,
    "final": 1,
    "do": 5,
    "we": 1,
    "get": 3,
    "company": 2,
    "brewing": 1,
    "NN": 8,
    "this": 1,
    "NNS": 1,
    "us": 1,
    "dobj": 4,
    "pobj": 5,
    "root": 8
}

Upvotes: 0

user2390182
user2390182

Reputation: 73450

To elaborate on the namedtuple mentioned in the comments:

from collections import namedtuple

mytuple = namedtuple('mytuple', (
    'parent_pos', 'parent_dep', 'child', 'child_pos', 
    'child_dep', 'avl_sent', 'avl_author_type', 'avl_brand_1'
))
for dep in dep_list:
    dep_tokens = dep.split(',')
    dep_tuple = mytuple(
        parent=dep_tokens[0],
        parent_pos=dep_tokens[1],
        parent_dep=dep_tokens[2],
        child=dep_tokens[3],
        child_pos=dep_tokens[4],
        child_dep=dep_tokens[5],
        avl_sent=item['avl_sent'],
        avl_author_type=item['avl_author_type']
        avl_brand_1=item['avl_brand_1']
    )
    final_list.append(dep_tuple)
counts = collections.Counter(final_list)
print counts

Upvotes: 1

Related Questions