Unable to scrape all rows of a table using beautifulsoup

Question

I tried scraping the table rows from the website https://google.com/covid19-map/?hl=en to get the data on corona virus spread. But it only returns a few rows, in my case 15. I am unable to scrape all rows. The table isn't fully visible on the website, one needs to scroll to see the contents of the table. Please help.

import requests 
from bs4 import BeautifulSoup 


URL = "https://google.com/covid19-map/?hl=en"
r = requests.get(URL) 

soup = BeautifulSoup(r.content, 'html5lib') 

all_rows = soup.findAll('tr', attrs = {'class':'A5V3jc'})

for i in range(len(all_rows)):

    # Getting image link
    img_link = all_rows[i].find('img')
    if img_link != None:
        print(img_link['src'])

    # Getting name field
    name = all_rows[i].find('span')
    if(name != None):
        print(name.text, end ="	")

    # getting remaining data
    remaining_entries = all_rows[i].findAll('td', attrs = {'class':'uMsnNd HAChlc'})

    for j in remaining_entries:
        if(j != None):
            print(j.text, end="			")
    print("

")

αԋɱҽԃ αмєяιcαη · Accepted Answer

import pandas as pd
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)

driver = webdriver.Firefox(options=options)
driver.get("https://google.com/covid19-map/?hl=en")
df = pd.read_html(driver.page_source)[1]

df.to_csv("Data.csv", index=False)

driver.quit()

Output: view online

Unable to scrape all rows of a table using beautifulsoup

Answers (1)

Related Questions