Reputation: 412
I want to put the data I'm scraping from the website into a csv file, my first attempt was using scrapy but I couldn't get the syntax right. When I managed to do using beautifulsoup, I just don't know how to put it into a csv file.
import requests
from bs4 import BeautifulSoup
URL = "https://www.practo.com/tests/glycosylated-haemoglobin-blood/p?city=delhi"
page = requests.get(URL)
#print(page)
soup=BeautifulSoup(page.content,'html.parser')
results = soup.find(id='root-app')
#print(results.prettify())
job_elems = results.find_all('div', class_='u-padrl--std')
#<h1 class="o-font-size--24 u-font-bold u-marginb--std">HbA1c Test</h1
for job_elem in job_elems:
title_elem = job_elem.find('h1', class_='o-font-size--24 u-font-bold u-marginb--std')
also_known = job_elem.find('span',class_="u-font-bold")
cost = job_elem.find('div',class_="o-font-size--22 u-font-bold o-f-color--primary")
what_test = job_elem.find('div',class_="c-markdown--unstyled")
#test_prep = job_elem.find('div',class_="c-tabsection__content c-pp__accordion-item__content active")
#temp = job_elem.find('p')
print(title_elem.text)
print(also_known.text)
print(cost.text)
print(what_test.text)
#print(temp.text)
#print(test_prep.text)
print()
text_content = results.find_all('div',class_='c-markdown--unstyled')
# c-tabsection__content c-pp__accordion-item__content active
# c-tabsection c-pp__accordion-item u-br-rule u-padtb--std--half active
for item in text_content:
prep = item.find('p')
#,class_="c-tabsection__content c-pp__accordion-item__content active")
print(prep.text)
print('xxo')
Upvotes: 0
Views: 347
Reputation: 2379
import requests
from bs4 import BeautifulSoup
# import the csv module
import csv
URL = "https://www.practo.com/tests/glycosylated-haemoglobin-blood/p?city=delhi"
page = requests.get(URL)
#print(page)
soup=BeautifulSoup(page.content,'html.parser')
results = soup.find(id='root-app')
#print(results.prettify())
job_elems = results.find_all('div', class_='u-padrl--std')
#<h1 class="o-font-size--24 u-font-bold u-marginb--std">HbA1c Test</h1
rows = []
fields = ['title_elem', 'also_known', 'cost', 'what_test']
filename = "myfile.csv"
for job_elem in job_elems:
title_elem = job_elem.find('h1', class_='o-font-size--24 u-font-bold u-marginb--std').text.encode("utf-8")
also_known = job_elem.find('span',class_="u-font-bold").text.encode("utf-8")
cost = job_elem.find('div',class_="o-font-size--22 u-font-bold o-f-color--primary").text.encode("utf-8")
what_test = job_elem.find('div',class_="c-markdown--unstyled").text.encode("utf-8")
row = [title_elem, also_known, cost, what_test]
rows.append(row)
with open(filename, 'w') as csvfile:
# creating a csv writer object
csvwriter = csv.writer(csvfile)
# writing the fields
csvwriter.writerow(fields)
# writing the data rows
csvwriter.writerows(rows)
text_content = results.find_all('div',class_='c-markdown--unstyled')
# c-tabsection__content c-pp__accordion-item__content active
# c-tabsection c-pp__accordion-item u-br-rule u-padtb--std--half active
for item in text_content:
prep = item.find('p')
#,class_="c-tabsection__content c-pp__accordion-item__content active")
print(prep.text)
print('xxo')
Upvotes: 1
Reputation: 11
You can use the xlsxwriter library.
import xlsxwriter
workbook = xlsxwriter.Workbook("file.xlsx")
worksheet = workbook.add_worksheet()
worksheet.write(row, column, element)
workbook.close()
Upvotes: 1