Reputation: 343
So here I have the following list and dictionaries:
{'linked': {'instructors.v1':
[{'id': '3219339', 'fullName': 'Lisa Mazzola'},
{'id': '6407572', 'fullName': 'Alan S. Miller '},
{'id': '226710', 'fullName': 'Kevin Werbach'},
{'id': '8054217', 'fullName': '许 肖潇'},
{'id': '20696355', 'fullName': 'Варшалович Дмитрий Александрович'},
{'id': '15622422', 'fullName': 'Prof. James Evans'}}],
'elements':
[{ 'id': '69Bku0KoEeWZtA4u62x6lQ', 'name': 'Gamification','instructorIds': '226710'}]
}
I am trying to obtain the 'fullName' from 'instructors.v1' associated with the 'instructorIds' from the 'elements' by matching both. My approach--> created another dictionary as follows:
{'3219339': 'Lisa Mazzola'}
{'6407572': 'Alan S. Miller'}
{'226710': 'Kevin Werbach'}
This gives me a KeyError: "'226710'" even though 226710 does exist in the list.Please suggest another approach, else, where am I going wrong?
here is the python code for your reference:
import imp
import importlib
import requests
import json
import re
from bs4 import BeautifulSoup
import csv
import sys
import urllib.request
from importlib import reload
if __name__ == "__main__":
headers = ({
"x-user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/53.0.2785.92 Safari/537.36
FKUA/website/41/website/Desktop"})
url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds,partnerIds,specializations,s12nlds,v1Details,v2Details&fields=instructorIds,partnerIds,specializations,s12nlds,description"
data = requests.get(url).json()
# print(len(data['elements']))
print(data)
with open("courserarough1.csv", 'a') as f:
##### Header #####
header = f.write(
'instructors' + ',' + 'courseURL' + ',' + 'courseType' + ',' +
'CourseName' + ',' + 'partnerName' + ',' +
'slug' + ',' + 'specializations' + ',' + 'course_id' + ',' +
'description' + ',' + "\n")
for n in range( len( data['linked']['instructors.v1'] ) ):
instructors = data['linked']['instructors.v1'][n]['fullName']
instructors = str( instructors )
instructors = instructors.strip().replace( ',', '' )
instructorsid = data['linked']['instructors.v1'][n]['id']
instructorsid = str( instructorsid )
instructorsid = instructorsid.strip().replace( ',', '' )
newdict = dict( [(instructorsid,instructors)] )
print(newdict)
#print(data['linked']['instructors.v1'])
partnerlist = []
for m in range( len( data['linked']['partners.v1'] ) ):
partnerName = data['linked']['partners.v1'][m]['name']
partnerName = str( partnerName )
partnerid = data['linked']['partners.v1'][m]['id']
partnerid = str( partnerid )
partnerlist.append(partnerid)
#print(partnerlist)
for i in range(len(data['elements'])):
partnerIds = data['elements'][i]['partnerIds']
#filtered = data[(np.where( partnerlist.__contains__(partnerIds) ))]
#print(filtered)
courseType = data['elements'][i]['courseType']
courseType = str(courseType)
if courseType:
courseType = courseType.rstrip().replace('v2.', '')
else:
courseType = ' '
# print(courseType)
CourseName = data['elements'][i]['name']
CourseName = str(CourseName)
CourseName = CourseName.strip().replace(',', '')
partnerIds = data['elements'][i]['partnerIds']
partnerIds = str( partnerIds )
if partnerIds:
partnerIds = partnerIds.rstrip().replace( ',', '' )
partnerIds = partnerIds.rstrip().replace( '\n', '' )
partnerIds = partnerIds.rstrip().replace( 'u', '' )
partnerIds = partnerIds.rstrip().replace( '[', '' )
partnerIds = partnerIds.rstrip().replace( ']', '' )
else:
partnerIds = ' '
slug = data['elements'][i]['slug']
slug = str(slug)
# print(slug)
specializations = data['elements'][i]['specializations']
specializations = str(specializations)
if specializations:
specializations = specializations.rstrip().replace(',', '')
specializations = specializations.rstrip().replace('\n', '')
specializations = specializations.rstrip().replace('u', '')
specializations = specializations.rstrip().replace('[', '')
specializations = specializations.rstrip().replace(']', '')
else:
specializations = ' '
course_id = data['elements'][i]['id']
course_id = str(course_id)
description = data['elements'][i]['description']
description = str(description)
if description:
description = description.strip().replace(',', '')
description = description.strip().replace('\n', '')
else:
description = ' '
courseURL = "https://www.coursera.org/learn/" + slug
courseURL = str(courseURL)
instructorIds = data['elements'][i]['instructorIds']
instructorIds = str( instructorIds )
if instructorIds:
instructorIds = instructorIds.rstrip().replace( ',', '' )
instructorIds = instructorIds.rstrip().replace( '\n', '' )
instructorIds = instructorIds.rstrip().replace( 'u', '' )
instructorIds = instructorIds.rstrip().replace( '[', '' )
instructorIds = instructorIds.rstrip().replace( ']', '' )
instructorIds = re.sub( r'^"|"$', '', instructorIds )
else:
instructorIds = ' '
#print(instructorIds)
instructors = newdict[instructorIds]
print(instructors)
##writing the
attributes in a csv file##
f.write(instructors + ',' + courseURL + ',' + courseType + ',' +
CourseName + ',' + partnerName + ',' + slug +
',' + specializations + ',' + course_id + ',' + description +
"\n")
Upvotes: 0
Views: 105
Reputation: 4213
import requests
import json
import re
from bs4 import BeautifulSoup
# url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds,partnerIds,specializations,s12nlds,v1Details,v2Details&fields=instructorIds,partnerIds,specializations,s12nlds,description"
url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds&fields=instructorIds" # for brief I have shorten api call
data = requests.get(url).text
json_data = json.loads(data)
cmp1 = json_data['elements']
cmp2 = json_data['linked']['instructors.v1']
for element in cmp1:
new_list = []
for ids in element['instructorIds']:
for inst in cmp2:
new_dict = {}
if ids in inst['id']:
new_dict[ids] = inst['fullName']
new_list.append(new_dict)
element['instructorIds'] = new_list
print(cmp1)
json_data['elements'] = cmp1
with open('data.json', 'w', encoding='utf-8') as fp: # file data.json will generated in the directory from which you execute this script. assign full path to store json file to your desire place.
json.dump(json_data, fp, sort_keys=False, indent=4, ensure_ascii=False)
Some of the sample JSON output:
"elements": [
{
"name": "Gamification",
"slug": "gamification",
"instructorIds": [
{
"226710": "Kevin Werbach"
}
],
"courseType": "v2.ondemand",
"id": "69Bku0KoEeWZtA4u62x6lQ"
},
{
"name": "Dealing With Missing Data",
"slug": "missing-data",
"instructorIds": [
{
"8394050": "Richard Valliant, Ph.D."
}
],
"courseType": "v2.ondemand",
"id": "0HiU7Oe4EeWTAQ4yevf_oQ"
},
...
...
...
...
...
{
"name": "Accounting Analytics",
"slug": "accounting-analytics",
"instructorIds": [
{
"1937011": "Brian J Bushee"
},
{
"14757138": "Christopher D. Ittner"
}
],
"courseType": "v2.ondemand",
"id": "rc5KG0aUEeWG1w6arGoEIQ"
},
{
"name": "Municipal Solid Waste Management in Developing Countries",
"slug": "solid-waste-management",
"instructorIds": [
{
"2387594": "Dr. Christian Zurbrügg"
},
{
"7293234": "Imanol Zabaleta"
},
{
"16974677": "Félix Schmidt"
}
],
"courseType": "v2.ondemand",
"id": "gpAI9GK4EeWFkQ7sUCFGVQ"
},
...
...
...
Upvotes: 1