Aquitter
Aquitter

Reputation: 45

Python selenium get link/click on link

Hey i would like to be able to access a link for example from the following html code (to access each profile on the url in the code)

<div class="fancyCompLabel" onclick="window.open('https://www.techpilot.de/servlets/supplier/perfect_profile.jsp?lngCode=de&amp;ckey=A4gxuEGikU16YXWt6RMd','_blank')" style="cursor:pointer;">Rathberger GmbH</div>

basically i want to access each profile get on the profile do stuff an go to the next profile page. the following code ive written/ got helped by on stack is able to access the relevant html code but im not able to get the link.

from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

time.sleep(3)

# Set some Selenium Options
options = webdriver.ChromeOptions()
# options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Webdriver
wd = webdriver.Chrome(executable_path='/usr/bin/chromedriver', options=options)
# URL
url = 'https://www.techpilot.de/zulieferer-suchen?laserschneiden%202d%20(laserstrahlschneiden)'

# Load URL
wd.get(url)

# Get HTML
soup = BeautifulSoup(wd.page_source, 'html.parser')

wait = WebDriverWait(wd, 15)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#bodyJSP #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#efficientSearchIframe")))
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".hideFunctionalScrollbar #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
#wd.switch_to.default_content()  # you do not need to switch to default content because iframe is closed already
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fancyCompLabel")))

results = wd.find_elements_by_css_selector(".fancyCompLabel")

''' #prints text (e.g. Rathberger) here i would like to acess the link instead
for profil in results:
   print(profil)
'''   
wd.close()
'''

Upvotes: 0

Views: 731

Answers (1)

Jortega
Jortega

Reputation: 3790

To get the link in the onclick attribute you can use .get_attribute("onclick"). To parse the text from onclick attribute you could split the string into an array on the ' character and return the index that contains the url, .split("'")[1].

See below:

from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

time.sleep(3)

# Set some Selenium Options
options = webdriver.ChromeOptions()
# options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Webdriver
wd = webdriver.Chrome(executable_path='/usr/bin/chromedriver', options=options)
# URL
url = 'https://www.techpilot.de/zulieferer-suchen?laserschneiden%202d%20(laserstrahlschneiden)'

# Load URL
wd.get(url)

# Get HTML
soup = BeautifulSoup(wd.page_source, 'html.parser')

wait = WebDriverWait(wd, 15)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#bodyJSP #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#efficientSearchIframe")))
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".hideFunctionalScrollbar #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
#wd.switch_to.default_content()  # you do not need to switch to default content because iframe is closed already
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".fancyCompLabel")))

results = wd.find_elements_by_css_selector(".fancyCompLabel")

''' #prints text (e.g. Rathberger) here i would like to acess the link instead
for profil in results:
   print(profil)
'''   
for profil in results:
   print(profil.get_attribute("onclick").split("'")[1])

wd.close()

Upvotes: 1

Related Questions