Reputation: 113
Someone suggested to me to try to use a Python Dictionary so I can easily extract data from it.
My program uses MyAnimeList to extract data from it. I then store it in a variable. I would like to convert this variable into a Dictionary. I think it is possible but I don't know how to do it
The variable looks something like this:
['Synonyms: Pocket Monsters, Indigo League, Adventures on the Orange Islands, The Johto Journeys, Johto League Champions, Master Quest', 'Japanese: ポケットモンスター', 'Type: TV', 'Episodes: 276', 'Status: Finished Airing', 'Aired: Apr 1, 1997 to Nov 14, 2002', 'Premiered: Spring 1997', 'Broadcast: Thursdays at 19:00 (JST)', 'Producers: TV Tokyo, TV Tokyo Music, Studio Jack', 'Licensors: VIZ Media, 4Kids Entertainment', 'Studios: OLM', 'Source: Game', 'Genres: Action, Adventure, Comedy, Kids, Fantasy', 'Duration: 24 min. per ep.', 'Rating: PG - Children', 'Score: 7.341 (scored by 291,570 users)', 'Ranked: #21572', 'Popularity: #287', 'Members: 504,076', 'Favorites: 4,076', '']
I would like it to automatically make it look like this:
information_dict = {
"Synonyms": "Pocket Monsters, Indigo League, Adventures on the Orange Islands, The Johto Journeys, Johto League Champions, Master Quest",
"Japanese": "ポケットモンスター",
"Type": "TV",
"Episodes": "276",
"Status": "Finished Airing",
"Aired": "Apr 1, 1997 to Nov 14, 2002",
"Premiered": "Spring 1997",
"Broadcast": "Thursdays at 19:00 (JST)",
"Producers": "TV Tokyo, TV Tokyo Music, Studio Jack",
"Licensors": "VIZ Media, 4Kids Entertainment",
"Studios": "OLM",
"Source": "Game",
"Genres": "Action, Adventure, Comedy, Kids, Fantasy",
"Duration": "24 min. per ep.",
"Rating": "PG - Children",
"Score": "7.341 (scored by 291,570 users)",
"Ranked": "#21572",
"Popularity": "#287",
"Members": "504,076",
"Favorites": "4,076"
}
This is what my code looks like:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
Anime = input("Enter Anime:")
driver = webdriver.Chrome(executable_path=r"C:\Users\amete\Documents\chromedriver.exe")
driver.get("https://myanimelist.net/search/all?q=one%20piece&cat=all")
search = driver.find_element_by_xpath('//input[@name="q"]')
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[@name="q"]')))
# Clears the field
search.send_keys(Keys.CONTROL, 'a')
search.send_keys(Keys.DELETE)
# The field is now cleared and the program can type whatever it wants
search.send_keys(Anime)
search.send_keys(Keys.RETURN)
# Accept the cookies
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="qc-cmp2-ui"]/div[2]/div/button[3]'))).click()
# Added this wait
wait.until(EC.element_to_be_clickable((By.XPATH,
'//h2[@id="anime"]//ancestor::div[@class="content-left"]//article[1]/div[contains(@class, "list")][1]/div[contains(@class, "information")]/a[1]')))
link = driver.find_element_by_xpath(
'//h2[@id="anime"]//ancestor::div[@class="content-left"]//article[1]/div[contains(@class, "list")][1]/div[contains(@class, "information")]/a[1]').click()
# -----Extracting information-------#
# Extracting the Jap Title and the English Title
Titles = driver.find_element_by_xpath('//*[@id="contentWrapper"]/div[1]').text
Titles = Titles.split("\n")
print(Titles)
Titles.remove("Edit")
print(Titles)
Score = driver.find_element_by_xpath(
'//*[@id="content"]/table/tbody/tr/td[2]/div[1]/table/tbody/tr[1]/td/div[1]/div[1]/div[1]/div[1]/div[1]/div').text
Episodes = driver.find_element_by_xpath('//*[@id="content"]/table/tbody/tr/td[1]/div/div[10]').text
print("The Score of the Anime is:" + str(Score))
print(Episodes)
# -------Other Information about the show--------#
Information_List = []
try:
for i in range(7, 28):
Info = driver.find_element_by_xpath('//*[@id="content"]/table/tbody/tr/td[1]/div/div[' + str(i) + ']').text
Information_List.append(Info)
except:
pass
print(Information_List)
# --------Extracting the data and putting it into variables--------#
# ------Genres-------#
for x, s in enumerate(Information_List):
if "Genres" in s:
Genre_Index = x
print (Information_List[Genre_Index])
Genre = (Information_List[Genre_Index])
Genre = Genre.replace("Genres: ","")
Genre = Genre.replace(" ","")
Genre = Genre.split(",")
print (Genre)
#-------Rating-------#
print (Information_List[14])
Upvotes: 1
Views: 1207
Reputation: 339
source = ['Synonyms: Pocket Monsters, Indigo League, Adventures on the Orange Islands, The Johto Journeys, Johto League Champions, Master Quest', 'Japanese: ポケットモンスター', 'Type: TV', 'Episodes: 276', 'Status: Finished Airing', 'Aired: Apr 1, 1997 to Nov 14, 2002', 'Premiered: Spring 1997', 'Broadcast: Thursdays at 19:00 (JST)', 'Producers: TV Tokyo, TV Tokyo Music, Studio Jack', 'Licensors: VIZ Media, 4Kids Entertainment', 'Studios: OLM', 'Source: Game', 'Genres: Action, Adventure, Comedy, Kids, Fantasy', 'Duration: 24 min. per ep.', 'Rating: PG - Children', 'Score: 7.341 (scored by 291,570 users)', 'Ranked: #21572', 'Popularity: #287', 'Members: 504,076', 'Favorites: 4,076', '']
information_dict = {}
def add(_dict,key, value):
_dict[key] = value
for item in source:
words = item.split(':')
try:
add(information_dict,words[0],words[1])
except Exception as e:
print('End')
print(information_dict)
Output:
End
{'Aired': ' Apr 1, 1997 to Nov 14, 2002',
'Broadcast': ' Thursdays at 19',
'Duration': ' 24 min. per ep.',
'Episodes': ' 276',
'Favorites': ' 4,076',
'Genres': ' Action, Adventure, Comedy, Kids, Fantasy',
'Japanese': ' ポケットモンスター',
'Licensors': ' VIZ Media, 4Kids Entertainment',
'Members': ' 504,076',
'Popularity': ' #287',
'Premiered': ' Spring 1997',
'Producers': ' TV Tokyo, TV Tokyo Music, Studio Jack',
'Ranked': ' #21572',
'Rating': ' PG - Children',
'Score': ' 7.341 (scored by 291,570 users)',
'Source': ' Game',
'Status': ' Finished Airing',
'Studios': ' OLM',
'Synonyms': ' Pocket Monsters, Indigo League, Adventures on the Orange Islands, The Johto Journeys, Johto League Champions, Master Quest',
'Type': ' TV'}
Upvotes: 1
Reputation: 499
Something like this could help you:
informations = ['Synonyms: Pocket Monsters, Indigo League, Adventures on the Orange Islands, The Johto Journeys, Johto League Champions, Master Quest', 'Japanese: ポケットモンスター', 'Type: TV', 'Episodes: 276', 'Status: Finished Airing', 'Aired: Apr 1, 1997 to Nov 14, 2002', 'Premiered: Spring 1997', 'Broadcast: Thursdays at 19:00 (JST)', 'Producers: TV Tokyo, TV Tokyo Music, Studio Jack', 'Licensors: VIZ Media, 4Kids Entertainment', 'Studios: OLM', 'Source: Game', 'Genres: Action, Adventure, Comedy, Kids, Fantasy', 'Duration: 24 min. per ep.', 'Rating: PG - Children', 'Score: 7.341 (scored by 291,570 users)', 'Ranked: #21572', 'Popularity: #287', 'Members: 504,076', 'Favorites: 4,076', '']
information_dict = {}
for item in informations:
if ':' in item:
splited = item.split(':')
key = splited[0].strip()
value = splited[1].strip()
information_dict[key] = value
print(information_dict)
output:
{
"Synonyms": "Pocket Monsters, Indigo League, Adventures on the Orange Islands, The Johto Journeys, Johto League Champions, Master Quest",
"Japanese": "ポケットモンスター",
"Type": "TV",
"Episodes": "276",
"Status": "Finished Airing",
"Aired": "Apr 1, 1997 to Nov 14, 2002",
"Premiered": "Spring 1997",
"Broadcast": "Thursdays at 19",
"Producers": "TV Tokyo, TV Tokyo Music, Studio Jack",
"Licensors": "VIZ Media, 4Kids Entertainment",
"Studios": "OLM",
"Source": "Game",
"Genres": "Action, Adventure, Comedy, Kids, Fantasy",
"Duration": "24 min. per ep.",
"Rating": "PG - Children",
"Score": "7.341 (scored by 291,570 users)",
"Ranked": "#21572",
"Popularity": "#287",
"Members": "504,076",
"Favorites": "4,076"
}
Upvotes: 1