Antonis Papadakis
Antonis Papadakis

Reputation: 39

selenium scrape table and converting to csv file

 import time
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 import pandas as pd


options = Options()
options.add_argument("start-maximized")
s = Service(r'C:\Users\jojob\.wdm\drivers\chromedriver\win32\98.0.4758.102\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
url = "https://www.nba.com/"

driver.get(url)  # open a new tab in the new window
time.sleep(2)
# when the element is visible, cklick it
wait = WebDriverWait(driver, 10)
players = wait.until(EC.visibility_of_element_located((By.XPATH, "//*[@id='nav- 
ul']/li[10]/a/span[1]")))
players.click()

search_player = wait.until(
EC.visibility_of_element_located((By.XPATH, '//* 
 [@id="__next"]/div[2]/div[3]/section/div/div[1]/div/input')))
 search_player.send_keys("antetokounmpo")
 search_player.send_keys(Keys.ENTER)
 time.sleep(1)
 giannis = wait.until((EC.visibility_of_element_located((By.XPATH,
                                                    "//* 
 [@id='__next']/div[2]/div[3]/section/div/div[2]/div["
                                                    
 "2]/div/div/div/table/tbody/tr[1]/td[1]/a/div[2]/p[1]")))) 
 giannis.click()
 time.sleep(2)

# get data from the table
table = wait.until(
EC.visibility_of_element_located((By.XPATH, "//* 
[@id='__next']/div[2]/div[5]/section[2]/div/div/div/table"))).get_attribute('outerHTML')                                  

df_table = pd.read_html(table)

print(df_table)
print(type(df_table))

I have write this code to scrape from this site one table(Last 5 Games, stats) and i want the result that i have to write them in a csv file.

this is my output:

[      Game Date      Matchup W/L  MIN  PTS  FGM  ...  AST  STL  BLK  TOV  PF  +/-
0  FEB 15, 2022  MIL vs. IND   W   36   50   17  ...    4    0    0    3   3    7
1  FEB 10, 2022    MIL @ PHX   L   30   18    5  ...    8    0    1    3   1  -18
2  FEB 08, 2022    MIL @ LAL   W   35   44   17  ...    8    1    2    0   5   23
3  FEB 06, 2022    MIL @ LAC   W   31   28    8  ...    5    1    2    2   2   14
4  FEB 05, 2022    MIL @ POR   W   25   29    7  ...    6    0    1    1   1   28

[5 rows x 23 columns]]
<class 'list'>

how i can convert that list to csv file with columns and rows? to looks like the table on the site

enter image description here

Upvotes: 0

Views: 979

Answers (1)

Shawn Ramirez
Shawn Ramirez

Reputation: 833

I was able to get there using BeautifulSoup interim to Pandas; please see below.

import time
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 import pandas as pd


options = Options()
options.add_argument("start-maximized")
s = Service(r'C:\Users\jojob\.wdm\drivers\chromedriver\win32\98.0.4758.102\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
url = "https://www.nba.com/"

driver.get(url)  # open a new tab in the new window
time.sleep(2)
# when the element is visible, cklick it
wait = WebDriverWait(driver, 10)
players = wait.until(EC.visibility_of_element_located((By.XPATH, "//*[@id='nav- 
ul']/li[10]/a/span[1]")))
players.click()

search_player = wait.until(
EC.visibility_of_element_located((By.XPATH, '//* 
 [@id="__next"]/div[2]/div[3]/section/div/div[1]/div/input')))
 search_player.send_keys("antetokounmpo")
 search_player.send_keys(Keys.ENTER)
 time.sleep(1)
 giannis = wait.until((EC.visibility_of_element_located((By.XPATH,
                                                    "//* 
 [@id='__next']/div[2]/div[3]/section/div/div[2]/div["
                                                    
 "2]/div/div/div/table/tbody/tr[1]/td[1]/a/div[2]/p[1]")))) 
 giannis.click()
 time.sleep(2)

# get data from the table
table = wait.until(
EC.visibility_of_element_located((By.XPATH, "//* 
[@id='__next']/div[2]/div[5]/section[2]/div/div/div/table"))).get_attribute('outerHTML')                                  

soup = BeautifulSoup(table,'html.parser')
df_table = pd.read_html(str(soup))[0]
csvFile = pd.DataFrame.to_csv(df_table)


print(df_table)
print(type(df_table))

Upvotes: 1

Related Questions