scrape a table in python not showing correct results as expected

Question

need to scrape all the table data from rajya sabha website. however, instead of scraping from the url link the code scrapes the original table page by page

from selenium import webdriver
import chromedriver_binary
import os
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import csv
import time
import lxml



url = 'https://rsdebate.nic.in/simple-search?query=climate+change&sort_by=dc.identifier.sessionnumber_sort&order=asc&rpp=100&etal=0&start=0'

#url_call = f"https://rsdebate.nic.in/simple-search?query=climate+change&sort_by=dc.identifier.sessionnumber_sort&order=asc&rpp=100&etal=0&start={i}"
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')

table1 = soup.find('table', id='sam_table')
headers = []
for a in table1.find_all('th'):
    title = a.text
    headers.append(title)
    rsdata = pd.DataFrame(columns = headers)
    rsdata.to_csv('rs_debate_data.csv', mode ='a',index=False)
    
# Create a for loop to fill rajya sabha data
for k in range(0,96):
    url_call = f"https://rsdebate.nic.in/simple-search?query=climate+change&sort_by=dc.identifier.sessionnumber_sort&order=asc&rpp=100&etal=0&start={k}"
    page = requests.get(url_call)
    for j in table1.find_all('tr')[1:]:
        row_data = j.find_all('td')
        row = [i.text for i in row_data]
        length = len(rsdata)
        rsdata.loc[length] = row
        rsdata.to_csv('rs_debate_data.csv', mode ='a',index=False, header=False)
print(k)


# Export to csv

# Try to read csv
#rs_data = pd.read_csv('rs_debate_data.csv')

i was trying to scrape only rows related to keyword climate change in the debate title column of the table.

scrape a table in python not showing correct results as expected

Answers (1)

Related Questions