Reputation: 53
I have been racking my brain on this for hours now. I'm trying to replace the offense number which is 1-30 to its corresponding offense type i.e. stealing, embezzlement, Burglary, etc. and then sort that into a list.
Here is a sample of the output I currently have:
offense # : Victim Total
1 189
10 712
11 1844
12 184
13 147
14 4364
15 595
16 175
17 387
18 2893
2 597
20 661
Here is what code I have thus far. The offense_map dictionary is what I would like to use to replace the 1-30 in the output to the offense type. Then sort the list in descending order from the largest victim count (right column) to the least. I am working with ~100,000 rows of data so efficiency is important for this program.
from collections import Counter
incidents_f = open('incidents.csv', mode = "r")
crime_dict = dict()
for line in incidents_f:
line_1st = line.strip().split(",")
if line_1st[0].upper() != "REPORT_NO":
report_no = line_1st[0]
offense = line_1st[3]
zip_code = line_1st[4]
if len(zip_code) < 5:
zip_code = "99999"
if report_no in crime_dict:
crime_dict[report_no].append(zip_code).append(offense)
else:
crime_dict[report_no] = [zip_code]+[offense]
#close File
incidents_f.close
details_f = open('details.csv',mode = 'r')
for line in details_f:
line_1st = line.strip().split(",")
if line_1st[0].upper() != "REPORT_NO":
report_no = line_1st[0]
involvement = line_1st[1]
if involvement.upper() != 'VIC':
continue
else:
crime_dict[report_no].append(involvement.upper())
#close File
details_f.close
offense_map = {'1':'Homicide','2':'Rape','3':'Robbery','4':'Assault','5':'Burglary','6':'Stealing','7':'Auto Theft','8':'Non Agg Assault','9':'Arson','10':'Forgery','11':'Fraud','12':'Embezzlement','13':'Stolen Property','14':'Property Damage','15':'Weapons Law Violation','16':'Prostitution','17':'Sex Offense Other','18':'Possession/Sale/Dist','20':'Family Offense','21':'DUI','22':'Liquor Law Violation','24':'Disorderly','25':'Loitering','26':'Misc Violation','29':'Missing/Runaway','30':'Casualty/Suicide'}
victims_by_offense = {}
for k, v in crime_dict.items():
zip = v[1]
if zip not in victims_by_offense.keys():
victims_by_offense[zip] = 0
victims_by_offense[zip] += v[0:].count('VIC')
for zip in sorted(victims_by_offense.keys()):
print(zip, victims_by_offense[zip])
Upvotes: 1
Views: 806
Reputation: 136
To get a list of keys in victims_by_offense
in descending order of Victim Total:
victims_by_offense = {'1': 189, '10': 712, '11': 1844, '12': 184, '13': 147, '14': 4364, '15': 595, '16': 175, '17': 387, '18': 2893, '2': 597, '20': 661}
sorted_keys = sorted(victims_by_offense, key=victims_by_offense.get, reverse=True)
Then
for zip in sorted_keys:
print(offense_map[zip], victims_by_offense[zip])
I get
('Property Damage', 4364)
('Possession/Sale/Dist', 2893)
('Fraud', 1844)
('Forgery', 712)
('Family Offense', 661)
('Rape', 597)
('Weapons Law Violation', 595)
('Sex Offense Other', 387)
('Homicide', 189)
('Embezzlement', 184)
('Prostitution', 175)
('Stolen Property', 147)
('Homicide', 189)
('Embezzlement', 184)
('Prostitution', 175)
('Stolen Property', 147)
Upvotes: 1
Reputation: 54223
I tweaked your code a bit to use csv.reader
objects instead of stripping and splitting yourself, as well as changed your data structure to be
crimes = {report_no: {'offense': offense_number,
'zip': zip_code,
'victims': victim_count},
...}
but I think it works much better this way.
import csv
import itemgetter
crimes = dict()
# build `crimes` dict with zero-count victims
with open("incidents.csv") as f:
reader = csv.reader(f)
headers = next(reader)
for report_no, _, _, offense, zip_code, *_ in reader:
if len(zip_code) < 5:
zip_code = "99999"
report = (zip_code, offense)
crimes[report_no] = {'offense': offense,
'zip': zip_code,
'victims': 0}
# parse victims information
with open("details.csv") as f:
reader = csv.reader(f)
headers = next(reader)
for report_no, involvement, *_ in reader:
if involvement.upper() == "VIC":
crimes[report_no]['victims'] += 1
offense_map = {'1':'Homicide',
'2':'Rape',
'3':'Robbery',
'4':'Assault',
'5':'Burglary',
'6':'Stealing',
'7':'Auto Theft',
'8':'Non Agg Assault',
'9':'Arson',
'10':'Forgery',
'11':'Fraud',
'12':'Embezzlement',
'13':'Stolen Property',
'14':'Property Damage',
'15':'Weapons Law Violation',
'16':'Prostitution',
'17':'Sex Offense Other',
'18':'Possession/Sale/Dist',
'20':'Family Offense',
'21':'DUI',
'22':'Liquor Law Violation',
'24':'Disorderly',
'25':'Loitering',
'26':'Misc Violation',
'29':'Missing/Runaway',
'30':'Casualty/Suicide'}
counts = {k: 0 for k in offense_map.values()}
# start counting crimes by victim count (by name, not number)
for crime_info in crimes.values()
try:
offense_no = crime_info['offense']
offense_name = offense_map[offense_no]
counts[offense_name] += crime_info['victims']
except KeyError:
# we couldn't map that
print("No such offense: {}".format(crime_info['offense']))
# sort by value
for k,v in sorted(counts.items(), key=operator.itemgetter(1), reverse=True):
print(k, v)
Upvotes: 0