Reputation: 4842
I am trying to write a CSV file from JSON returned from an API call.
I am using pandas, json and requests libraries.
response = requests.get("valid_api_call")
My response.json() looks like this:
"request": {
"Target": "Affiliate_Report",
"Format": "json",
"Service": "service",
"Version": "2",
"api_key": "key",
"Method": "getStats",
"fields": [
"Stat.offer_id",
"Stat.offer_file_id",
"Stat.hour",
"Stat.date",
"Stat.affiliate_info5",
"Stat.affiliate_info4",
"Stat.affiliate_info3",
"Stat.affiliate_info1",
"Offer.name"
],
"limit": "10",
"data_start": "2019-11-11",
"data_end": "2019-11-18"
},
"response": {
"status": 1,
"httpStatus": 200,
"data": {
"page": 1,
"current": 10,
"count": 23500,
"pageCount": 2350,
"data": [
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "Other (unique values)",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "www.google.com",
"affiliate_info3": "1",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
},
{
"Stat": {
"offer_id": "18",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-11",
"affiliate_info5": "popup_below_2nd",
"affiliate_info4": "www.google.com",
"affiliate_info3": "6",
"affiliate_info1": "Other (unique values)"
},
"Offer": {
"name": "Special"
}
}
],
"dbSource": "branddb"
},
"errors": [],
"errorMessage": null
}
}
I am trying to write the output to a CSV file where the columns would be the field :
Stat.offer_id,
Stat.offer_file_id,
Stat.hour,
Stat.date,
Stat.affiliate_info5,
Stat.affiliate_info4,
Stat.affiliate_info3,
Stat.affiliate_info1,
Offer.name
I haven't worked with JSON that much, not sure how would I iterate the file and write ite to CSV with python. The way I am going to write the file will probably look like this:
f = csv.writer(open(response,'w'))
f.writerow(keylist)
I do not think that csv.writer accepts response as a file which is the json returned by an API call.
I know there is a method with pandas to convert JSON to CSV
df.to_csv('output.csv', encoding='utf-8', index=False)
So my question would be what would be the smartest way of achieving CSV file as an output when the input is JSON returned by API call which may have pagination?
Thank you for the suggestions.
EDIT:
Second API response follows the same structure but throws an error - string indices must be integers.
{
"request": {
"Target": "Affiliate_Report",
"Format": "json",
"Service": "service",
"Version": "2",
"api_key": "key",
"Method": "getStats",
"fields": [
"Stat.offer_id",
"Stat.offer_file_id",
"Stat.hour",
"Stat.date",
"Stat.affiliate_info5",
"Stat.affiliate_info4",
"Stat.affiliate_info3",
"Stat.affiliate_info1",
"Offer.name"
],
"limit": "10",
"data_start": "2019-11-20",
"data_end": "2019-11-20"
},
"response": {
"status": 1,
"httpStatus": 200,
"data": {
"page": 1,
"current": 10,
"count": 4037,
"pageCount": 404,
"data": [
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
},
{
"Stat": {
"offer_id": "20",
"offer_file_id": "0",
"hour": "0",
"date": "2019-11-20",
"affiliate_info5": "editor_pick",
"affiliate_info4": "www.google.com",
"affiliate_info3": "info",
"affiliate_info1": "1"
},
"Offer": {
"name": "60"
}
}
],
"dbSource": "branddb"
},
"errors": [],
"errorMessage": null
}
}
Code used
response_2 = requests.get(JSON)
response_json_2 = response_2.json()
df_2 = pd.io.json.json_normalize(response_json_2['response']['data']['data'])
df_2.to_csv(f'{from_date} - 2.csv', encoding='utf-8', index=False)
Upvotes: 0
Views: 402
Reputation: 18647
In this case, try using pandas.io.json.json_normalize
like:
response_json = reponse.json()
# Or try this if it still doesn't work
# import json
# response_json = json.loads(response.json())
df = pd.io.json.json_normalize(response_json['response']['data']['data'])
Should return the DataFrame
Stat.offer_id Stat.offer_file_id Stat.hour Stat.date Stat.affiliate_info5 \
0 18 0 0 2019-11-11 editor_pick
1 18 0 0 2019-11-11 popup
2 18 0 0 2019-11-11 popup
3 18 0 0 2019-11-11 popup
4 18 0 0 2019-11-11 popup
5 18 0 0 2019-11-11 popup
6 18 0 0 2019-11-11 popup_below_2nd
7 18 0 0 2019-11-11 popup_below_2nd
8 18 0 0 2019-11-11 popup_below_2nd
9 18 0 0 2019-11-11 popup_below_2nd
Stat.affiliate_info4 Stat.affiliate_info3 Stat.affiliate_info1 \
0 www.google.com 1 Other (unique values)
1 www.google.com 6 Other (unique values)
2 www.google.com 1 Other (unique values)
3 www.google.com 1 Other (unique values)
4 www.google.com 6 Other (unique values)
5 www.google.com 6 Other (unique values)
6 Other (unique values) 1 Other (unique values)
7 www.google.com 1 Other (unique values)
8 www.google.com 6 Other (unique values)
9 www.google.com 6 Other (unique values)
Offer.name
0 Special
1 Special
2 Special
3 Special
4 Special
5 Special
6 Special
7 Special
8 Special
9 Special
Then DataFrame.to_csv
df.to_csv('output.csv', encoding='utf-8', index=False)
Upvotes: 1