Trying to parse website with Python: I want to save to csv and run on multiple page

Question

I am new to Python and webscraping. I tried to do it myself but i got stuck.

I would like to web scrape efinancinalcareers.com for job offers. I wrote the code to get to the elements of the html, I can print them on the console, but i need help to save the data to csv and to run the script on all result pages. Here is the code:

import requests
from bs4 import BeautifulSoup
import csv
import datetime
print datetime.datetime.now()
url = "http://www.efinancialcareers.com/search?page=1&sortBy=POSTED_DESC&searchMode=DEFAULT_SEARCH&jobSearchId=RUJFMEZDNjA2RTJEREJEMDcyMzlBQ0YyMEFDQjc1MjUuMTQ4NTE5MDY3NTI0Ni4tMTQ1Mjc4ODU3NQ%3D%3D&updateEmitter=SORT_BY&filterGroupForm.includeRefreshed=true&filterGroupForm.datePosted=OTHER"
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html,'lxml')

f = open ('EFINCAR.txt', 'w')
f.write('Job name;')
f.write('Salary;')
f.write('Location;')
f.write('Position;')
f.write('Company')
f.write('Date')
f.write('
')


# Job name
for container in soup.find_all('div',{'class':'jobListContainer'}):
    for details in container.find_all('li',{'class':'jobPreview well'}):
        for h3 in details.find_all('h3'):
            job=h3.find('a')
        print(job.text)

# Salary
for container in soup.find_all('div',{'class':'jobListContainer'}):
    for JobsPreview in container.find_all('li',{'class':'jobPreview well'}):
        for details in JobsPreview.find_all('ul',{'class':'details'}):
            salary=details.find('li',{'class':'salary'})
            print(salary.text)

# Location
for container in soup.find_all('div',{'class':'jobListContainer'}):
    for JobsPreview in container.find_all('li',{'class':'jobPreview well'}):
        for details in JobsPreview.find_all('ul',{'class':'details'}):
            location=details.find('li',{'class':'location'})
            print(location.text)

# Position
for container in soup.find_all('div',{'class':'jobListContainer'}):
    for JobsPreview in container.find_all('li',{'class':'jobPreview well'}):
        for details in JobsPreview.find_all('ul',{'class':'details'}):
            position=details.find('li',{'class':'position'})
            print(position.text)

# Company
for container in soup.find_all('div',{'class':'jobListContainer'}):
    for JobsPreview in container.find_all('li',{'class':'jobPreview well'}):
        for details in JobsPreview.find_all('ul',{'class':'details'}):
            company=details.find('li',{'class':'company'})
            print(company.text)

# Date
for container in soup.find_all('div',{'class':'jobListContainer'}):
    for JobsPreview in container.find_all('li',{'class':'jobPreview well'}):
        for details in JobsPreview.find_all('ul',{'class':'details'}):
            datetext=details.find('li',{'class':'updated'})
            print(datetext.text)

#       Attributes assignment section

#       Job Name
job_name = job.get_text()
f.write(job_name.encode('utf-8'))
f.write(';')

#       Salary

salary_name = salary.get_text()
f.write(salary_name.encode('utf-8'))
f.write(';')

#       location
location_name = location.get_text()
location_name = location_name.strip()
f.write(location_name.encode('utf-8'))
f.write(';')

#       position
position_name = position.get_text()
position_name = position_name.strip()
f.write(position_name.encode('utf-8'))
f.write(';')

#       company
company_name = company.get_text()
company_name = company_name.strip()
f.write(company_name.encode('utf-8'))
f.write(';')

#       Datetext
datetext_name = datetext.get_text()
datetext_name = datetext_name.strip()
f.write(datetext_name.encode('utf-8'))
f.write(';')
f.write('
')

f.close()
**strong text**
print('Finished!')

Trying to parse website with Python: I want to save to csv and run on multiple page

Answers (1)

Related Questions