Reputation: 73
I was able to extract some data with below code but I wanted include year along with month in the below extracted data. Sample data looks like below. Looking for a bifurcation based on date in the data.
people = [
{"name": "Tom", "age": 10, "city": "NewYork", "Date": '01/01/2021'},
{"name": "Mark", "age": 5, "country": "Japan", "Date": '05/01/2021'},
{"name": "Pam", "age": 7, "city": "London", "Date": '03/06/2021'},
{"name": "Tom", "hight": 163, "city": "California", "Date": '04/06/2021'},
{"name": "Lena", "weight": 45, "country": "Italy", "Date": '12/12/2020'},
{"name": "Ben", "age": 17, "city": "Colombo", "Date": '11/12/2020'},
{"name": "Lena", "gender": "Female", "country": "Italy", "Date": '8/01/2020'},
{"name": "Ben", "gender": "Male", "city": "Colombo", "Date": '7/01/2020'},
{"name": "Tom", "age": 10, "country": "Italy", "Date": '01/01/2021'},
{"name": "Mark", "age": 5, "country": "Japan", "Date": '05/01/2021'},
{"name": "Tom", "age": 7, "city": "London", "Date": '03/06/2021'},
{"name": "Tom", "hight": 163, "country": "Japan", "Date": '04/06/2021'}
]
def groupby( fld ):
vals = { fld: 0 }
for row in people:
if fld in row:
vals[fld] += 1
if row[fld] not in vals:
vals[row[fld]] = 1
else:
vals[row[fld]] += 1
return vals
months =
('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')
def groupbydate( fld ):
vals = {}
for row in people:
if fld in row and 'Date' in row:
month = months[int(row['Date'].lstrip('0').split('/')[0])-1]
if row[fld] not in vals:
vals[row[fld]] = {}
if month not in vals[row[fld]]:
vals[row[fld]][month] = 1
else:
vals[row[fld]][month] += 1
return vals
print( groupby( 'name' ) )
print( groupby( 'city' ) )
print( groupby( 'country' ) )
print( )
print( groupbydate( 'city' ) )
Present Output
{'name': 12, 'Tom': 5, 'Mark': 2, 'Pam': 1, 'Lena': 2, 'Ben': 2}
{'city': 6, 'NewYork': 1, 'London': 2, 'California': 1, 'Colombo': 2}
{'country': 6, 'Japan': 3, 'Italy': 3}
{'NewYork': {'Jan': 1}, 'London': {'Mar': 2}, 'California': {'Apr':
1}, 'Colombo': {'Nov': 1, 'Jul': 1}}
Wanted output as
{'NewYork': {'Jan 21': 1}, 'London': {'Mar 20': 2}, 'California':
{'Apr 20': 1}, 'Colombo': {'Nov 20': 1, 'Jul 20': 1}}
Upvotes: 0
Views: 232
Reputation: 33
Convert the date string into the date time object and then format them as per your needs.
Look at this codes which is quite similar to the code you submitted.
def groupbydate( fld ):
vals = {}
for row in people:
if fld in row and 'Date' in row:
datestring=row['Date']
dt = datetime.strptime(datestring, '%m/%d/%Y')
month = dt.strftime("%b %y")
print(month)
if row[fld] not in vals:
vals[row[fld]] = {}
if month not in vals[row[fld]]:
vals[row[fld]][month] = 1
else:
vals[row[fld]][month] += 1
return vals
Upvotes: 1
Reputation: 24314
Try:
df=pd.DataFrame(people)
df['Date']=pd.to_datetime(df['Date']).dt.strftime('%b %y')
out=pd.crosstab(df['Date'],df['city']).rename_axis(columns=None)
Finally:
d1=df['name'].value_counts().to_dict()
d1['name']=len(df)
d2=df['city'].value_counts().to_dict()
d2['city']=df['city'].value_counts().sum()
d3=df['country'].value_counts().to_dict()
d3['country']=df['country'].value_counts().sum()
d4=out.apply(lambda x:dict(x[x.ne(0)])).to_dict()
output:
print(d1)
#output
{'Tom': 5, 'Lena': 2, 'Ben': 2, 'Mark': 2, 'Pam': 1, 'name': 12}
print(d2)
#output
{'London': 2, 'Colombo': 2, 'NewYork': 1, 'California': 1, 'city': 6}
print(d3)
#output
{'Japan': 3, 'Italy': 3, 'country': 6}
print(d4)
#output
{'California': {'Apr 21': 1},
'Colombo': {'Jul 20': 1, 'Nov 20': 1},
'London': {'Mar 21': 2},
'NewYork': {'Jan 21': 1}}
Upvotes: 1
Reputation: 106
You can convert these dates into DateTime objects and then format them according to your needs.
from datetime import datetime
for record in people:
record["Date"] = datetime.strptime(record["Date"], "%m/%d/%Y")
and then you can make it into month year as record["Date"].strftime("%b %y")
which will give you the MMM YY
format.
Optional: One of the observations is that you want to perform group by, so what you can do is use pandas.
import pandas as pd
df = pd.DataFrame(people)
def groupby_date(fld):
df.groupby(by=fld, sort=False).apply(lambda group: group.groupby(by="Date").apply(len))
result = dict()
for (fld, date), count in x.items():
date = datetime.datetime.strptime(date, "%m/%d/%Y")
result[fld] = {date.strftime("%b %y"): count}
return result
The output would be:
{'NewYork': {'Jan 21': 1},
'London': {'Mar 21': 2},
'California': {'Apr 21': 1},
'Colombo': {'Jul 20': 1}}
Upvotes: 1