Reputation: 2227
Have been trying many things for the last few hours on this. However, strangely when selenium opens the new windows, it switches to it but instead of scraping data from the new page, if keeps closing the previous windows and scrapes data from them. Also, it keeps opening all the links instead of the next one. Sort of hit a wall with this one. Any help is appreciated. Thanks.
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
actions = ActionChains(driver)
search_term = input("Enter your search term :")
url = f'https://www.sciencedirect.com/search?qs={search_term}&years=2021%2C2020%2C2019&lastSelectedFacet=years'
driver.get(url)
driver.maximize_window()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[3]/div/div/div/button/span'))).click()
divs = driver.find_elements_by_class_name('result-item-content')
links = []
for div in divs:
link = div.find_element_by_tag_name('a')
links.append(link)
def get_data():
actions.key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
par_guid = driver.current_window_handle
allguid = driver.window_handles
for guid in allguid:
if guid != par_guid:
driver.switch_to.window(guid)
break
author_group = driver.find_element_by_id('author-group')
for author in author_group.find_elements_by_css_selector("a.author"):
try:
given_name = author.find_element_by_css_selector(".given-name").text
surname = author.find_element_by_css_selector(".surname").text
except NoSuchElementException:
print("Could not extract first or last name")
continue
try:
mail_icon = author.find_element_by_css_selector(".icon-envelope")
mail_icon.click()
mail_icon_present = True
mail = driver.find_element_by_class_name('e-address')
print(mail.text)
except NoSuchElementException:
mail_icon_present = False
print(f"Author {given_name} {surname}. Mail icon present: {mail_icon_present}")
driver.close()
driver.switch_to.window(par_guid)
for link in links:
get_data()
Upvotes: 0
Views: 518
Reputation: 2227
The problem part was this line actions.key_down(Keys.CONTROL).click(link).key_up(Keys.CONTROL).perform()
I think this is not the right way to open a new tab if you are dealing with multiple links. I changed it to
driver.execute_script('window.open(arguments[0]);', link)
. I also let go of saving the links to a list. This works because there are only two windows open at a time. The home window and the link we clicked on. So, In entirety the code becomes
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome(executable_path='C:/chromedriver.exe')
actions = ActionChains(driver)
search_term = input("Enter your search term :")
url = f'https://www.sciencedirect.com/search?qs={search_term}&years=2021%2C2020%2C2019&lastSelectedFacet=years'
driver.get(url)
driver.maximize_window()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'/html/body/div[3]/div/div/div/button/span'))).click()
divs = driver.find_elements_by_class_name('result-item-content')
for div in divs:
parent_window = driver.current_window_handle
link = div.find_element_by_tag_name('a')
driver.execute_script('window.open(arguments[0]);', link)
all_windows = driver.window_handles
child_window = [window for window in all_windows if window != parent_window][0]
driver.switch_to.window(child_window)
title = driver.find_element_by_tag_name('h1')
print("Article Title:- ",title.text)
author_group = driver.find_element_by_id('author-group')
for author in author_group.find_elements_by_css_selector("a.author"):
try:
given_name = author.find_element_by_css_selector(".given-name").text
surname = author.find_element_by_css_selector(".surname").text
except NoSuchElementException:
print("Could not extract first or last name")
continue
try:
mail_icon = author.find_element_by_css_selector(".icon-envelope")
mail_icon.click()
mail = driver.find_element_by_class_name('e-address')
print(mail.text)
except NoSuchElementException:
print(f"Author {given_name} {surname}")
driver.close()
driver.switch_to.window(parent_window)
Upvotes: 1