Khaled Koubaa
Khaled Koubaa

Reputation: 527

Scrape table in nested page appears after click using selenium

I want to scrape data inside nested tables, in this page: https://www.wagertalk.com/freeOddsPage/page.html?sport=L5&date=2021-05-29&cb=0.01844398326591401

When you click on any cell, a nested new table appears, I want to scrape data from those nested tables. enter image description here

I created a phyton script trying to use selenium to click on each cell then the table show so I scrape it; but the elenium browser didn't click or the nested tables didn't show:

u = 'https://www.wagertalk.com/freeOddsPage/page.html?sport=S8&date=2021-05-27&cb=0.6242232189793953'

import requests
import csv
import json
import datetime
from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = Options()
# options.add_argument("--headless")          #headless
#options.add_argument('--no-sandbox')
#options.add_argument('--ignore-certificate-errors')
#options.add_argument('--incognito')   

driver = webdriver.Chrome(executable_path=r"C:/chromedriver.exe", options=options)

driver.get(u)

driver.maximize_window()
driver.implicitly_wait(60) ##Wait the loading if error

time.sleep(20)
soup = BeautifulSoup(driver.page_source, 'html.parser')


soup = BeautifulSoup(driver.page_source, 'html.parser')


for i in soup.select('#schedule tbody tr[id^="g"]:has(.tennis_score_main)'):
    match_date = i.select_one('th:nth-of-type(1) div:nth-of-type(1)').text 
    match_time = i.select_one('th:nth-of-type(1) div:nth-of-type(2)').text
    A_team = i.select_one('th:nth-of-type(3) div:nth-of-type(1) div:nth-of-type(1)').text if i.select_one('th:nth-of-type(3) div:nth-of-type(1) div:nth-of-type(1)') else i.select_one('th:nth-of-type(3) div:nth-of-type(1)').text
    H_team = i.select_one('th:nth-of-type(3) div:nth-of-type(2) div:nth-of-type(1)').text if i.select_one('th:nth-of-type(3) div:nth-of-type(2) div:nth-of-type(1)') else i.select_one('th:nth-of-type(3) div:nth-of-type(2)').text
    
    #I tried this:
    # WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#schedule tbody tr[id^="g"]:has(.scTD1):nth-of-type(1) .book.b10 div:nth-of-type(1)')))

    #and Tried this:
    driver.execute_script("document.querySelector('#schedule tbody tr:has(.scTD1):nth-of-type(1) .book.b10 div:nth-of-type(1)').click()")
    #code to scrape nested table here, but the table don't show

driver.quit()

Upvotes: 1

Views: 281

Answers (1)

furas
furas

Reputation: 142641

I don't know what was your problem because I can click cell to open popup window, and later click button Close to close this popup window. But I didn't use Beatifulsoup but only Selenium

I didn't check if it works with all cells and rows.

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#from selenium.webdriver.firefox.options import Options
import time

url = 'https://www.wagertalk.com/freeOddsPage/page.html?sport=S8&date=2021-05-27&cb=0.6242232189793953'

options = Options()
#options.add_argument("--headless")
#options.add_argument('--no-sandbox')
#options.add_argument('--ignore-certificate-errors')
#options.add_argument('--incognito')   

#driver = webdriver.Chrome(executable_path=r"C:/chromedriver.exe", options=options)
driver = webdriver.Chrome(options=options)
#driver = webdriver.Firefox(options=options)

driver.get(url)

driver.maximize_window()
driver.implicitly_wait(60)

for row in driver.find_elements_by_css_selector('tr[id^="g"]'):
    
    date_time = row.find_elements_by_css_selector('.time-started')
    match_date = date_time[0].text 
    match_time = date_time[1].text
    print('date:', match_date, '| time:', match_time)
    
    teams = row.find_elements_by_css_selector('.team div')
    A_team = teams[0].text
    H_team = teams[1].text
    print('A_team:', A_team)
    print('H_team:', H_team)

    books = row.find_elements_by_css_selector('.book')
    for b in books:
        print('--- popup ---')

        # open .popupDiv
        b.click()
        
        time.sleep(1)
        # ... scrape table from .popupDiv ...
        tds = driver.find_elements_by_css_selector('.popupDiv table td')
        for t in tds:
            print(t.text)
        
        # close .popupDiv
        driver.find_element_by_css_selector('.popupDiv button').click()
        
    print('--- end row ---')
    
driver.quit()

Upvotes: 2

Related Questions