Reputation: 137
currently i have Webscraped the data and printed it but now i want it to export to excel/csvi am new to python need help there are multiple pages that i have scraped now i need to export them to csv/excel.need help my code below
import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
from bs4 import BeautifulSoup as bs
def scrap_bid_data():
page_no = 1 #initial page number
while True:
print('Hold on creating URL to fetch data...')
URL = 'https://bidplus.gem.gov.in/bidlists?bidlists&page_no=' + str(page_no) #create dynamic URL
print('URL cerated: ' + URL)
scraped_data = requests.get(URL,verify=False) # request to get the data
soup_data = bs(scraped_data.text, 'lxml') #parse the scraped data using lxml
extracted_data = soup_data.find('div',{'id':'pagi_content'}) #find divs which contains required data
if len(extracted_data) == 0: # **if block** which will check the length of extracted_data if it is 0 then quit and stop the further execution of script.
break
else:
for idx in range(len(extracted_data)): # loops through all the divs and extract and print data
if(idx % 2 == 1): #get data from odd indexes only because we have required data on odd indexes
bid_data = extracted_data.contents[idx].text.strip().split('\n')
print('-' * 100)
print(bid_data[0]) #BID number
print(bid_data[5]) #Items
print(bid_data[6]) #Quantitiy Required
print(bid_data[10] + bid_data[12].strip()) #Department name and address
print(bid_data[16]) #Start date
print(bid_data[17]) #End date
print('-' * 100)
page_no +=1 #increments the page number by 1
scrap_bid_data()
Upvotes: 0
Views: 511
Reputation: 11342
Since you have the data elements already, use can write them to a csv in a couple steps.
Here are the code updates:
def scrap_bid_data():
csvlst = [['BID number','Items','Quantity Required','Department name and address','Start date','End date']] # header row # ADD THIS LINE
page_no = 1 #initial page number
while True:
...................
if len(extracted_data) == 0: # **if block** which will check the length of extracted_data if it is 0 then quit and stop the further execution of script.
break
else:
for idx in range(len(extracted_data)): # loops through all the divs and extract and print data
if(idx % 2 == 1): #get data from odd indexes only because we have required data on odd indexes
bid_data = extracted_data.contents[idx].text.strip().split('\n')
.................
csvlst.append([bid_data[0],bid_data[5],bid_data[6],bid_data[10],bid_data[16],bid_data[17]]) # CSV row # ADD THIS LINE
page_no +=1 #increments the page number by 1
import csv # Write CSV # ADD THIS SECTION
with open("out.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(csvlst)
scrap_bid_data()
Upvotes: 1