Reputation: 137
I'm trying to scrape job title
and required skills
of different jobs from a webpage. As I'm not an expert on selenium, I can't figure out how I can scrape content from inner pages and then click on the next pages cyclically using selenium. Currently, the logic of clicking on the next page is commented out within the "get_links" function.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://www.indeed.com/jobs?q=developer&sc=0kf%3Aattr%28DSQF7%29%3B&start=640&pp=gQPAAAABhR6C4g8AAAAB8f6BVABIAQEBBg-PHLEDms2oSIodfSmVxw09STnASEoBTK5mKYOEa4i4O_Ur1l0A-QxgzLqNt1E6GP8A47DqWEqCMSpmIabUq7qaIzRCAAA&vjk=8008aba345c406ba'
def get_links(driver,link):
driver.get(link)
link_list = []
for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".job_seen_beacon"))):
title_link = item.find_element(By.CSS_SELECTOR,"h2 > a[class^='jcs-JobTitle']").get_attribute("href")
link_list.append(title_link)
return link_list
# try:
# next_page = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[aria-label='Next Page']")))
# driver.execute_script("arguments[0].click();",next_page)
# except Exception as err:
# break
def get_content(link):
driver.get(link)
title = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.jobsearch-JobInfoHeader-title"))).text
try:
skill = driver.find_element(By.XPATH,"//*[@id='jobDescriptionText']//div[./div/b[contains(.,'Required Skills')]]").get_attribute("textContent")
except Exception as err: skill = ""
return title,skill
if __name__ == '__main__':
with webdriver.Chrome() as driver:
for item in get_links(driver,link):
print(get_content(item))
Upvotes: 1
Views: 513
Reputation: 33353
The most problemmatic part of this exercise is to get the job requirements since this part appears differently in each job description. Below I defined 6 locators for it, probably you will need to add more locators like I used to cover all the possible cases.
Here I'm iterating over each job listed on each page, open it and get the title and required skills description.
Each job listing on the page is first scrolled into the view and only then clicked to make the code working clearly and stable.
In order to click the pagination button you need to scroll the page to make that element visible and clickable.
This should be done until "Next page" button appears.
Also, we need to close the cookies banner.
The following code works. It collects the job titles and job descriptions until we reached the last page.
My code is flat, I did not used functins like you separated the code, but the logic is the same and improved.
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 4)
actions = ActionChains(driver)
url = "https://www.indeed.com/jobs?q=developer&sc=0kf%3Aattr%28DSQF7%29%3B&start=640&pp=gQPAAAABhR6C4g8AAAAB8f6BVABIAQEBBg-PHLEDms2oSIodfSmVxw09STnASEoBTK5mKYOEa4i4O_Ur1l0A-QxgzLqNt1E6GP8A47DqWEqCMSpmIabUq7qaIzRCAAA&vjk=8008aba345c406ba"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "gnav-CookiePrivacyNoticeButton"))).click()
skills_locator1 = "//ul[preceding-sibling::p[contains(.,'Skills')]]"
skills_locator2 = "//ul[preceding-sibling::p[contains(.,'Job Qualifications')] and following-sibling::p[contains(.,'Additional Qualifications')]]"
skills_locator3 = "//ul[preceding-sibling::p[contains(.,'Required')] and following-sibling::p[contains(.,'Preferred')]]"
skills_locator4 = "//ul[preceding-sibling::p[contains(.,'ust have')] and following-sibling::p[contains(.,'ice to')]]"
skills_locator5 = "//ul[preceding-sibling::p[contains(.,'qualifications')]]"
skills_locator6 = "//ul[preceding-sibling::*[contains(.,'Job Requirements')] and following-sibling::p[contains(.,'Class')]]"
skills_locator = "//ul[preceding-sibling::p[contains(.,'Skills')]] or //ul[preceding-sibling::p[contains(.,'Job Qualifications')] and following-sibling::p[contains(.,'Additional Qualifications')]]" \
" or //ul[preceding-sibling::p[contains(.,'Required')] and following-sibling::p[contains(.,'Preferred')]] or //ul[preceding-sibling::p[contains(.,'ust have')] and following-sibling::p[contains(.,'ice to')]]" \
"or //ul[preceding-sibling::p[contains(.,'qualifications')]] or //ul[preceding-sibling::*[contains(.,'Job Requirements')] and following-sibling::p[contains(.,'Class')]]"
jobs = []
while driver.find_elements(By.CSS_SELECTOR, '[data-testid="pagination-page-next"]'):
for item in WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".job_seen_beacon"))):
item.location_once_scrolled_into_view
item.find_element(By.CSS_SELECTOR, "h2 > a[class^='jcs-JobTitle']").click()
job = []
title = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".jobsearch-JobInfoHeader-title-container h1"))).text
qualification = ''
try:
qualification = wait.until(lambda driver: driver.find_element(By.XPATH, skills_locator1) or driver.find_element(By.XPATH, skills_locator2)
or driver.find_element(By.XPATH, skills_locator3) or driver.find_element(By.XPATH, skills_locator4)
or driver.find_element(By.XPATH, skills_locator5) or driver.find_element(By.XPATH, skills_locator6)).text
except:
qualification = " "
job.append({'title': title})
job.append({'qualification': qualification})
jobs.append(job)
pagination_btn = driver.find_element(By.CSS_SELECTOR, '[data-testid="pagination-page-next"]')
pagination_btn.location_once_scrolled_into_view
pagination_btn.click()
for job in jobs:
print(job)
To represent the collected data I finally printed all the collected job details. The final output is:
[{'title': 'PHP Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'JR .NET Developer (100% Remote)\n- job post'}, {'qualification': 'Understanding of computer science concepts, object-oriented design principles\nSome experience developing software in different programming languages\nSome experience in backend development design and implementation\nSome experience in responsive web front end / single page application development using modular JavaScript including apps targeted for mobile devices'}]
[{'title': 'Software Engineer - 100% Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Golang Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Web Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Developer - Northwest Florida State College\n- job post'}, {'qualification': ' '}]
[{'title': 'BACKEND DEVELOPER PHP & AWS\n- job post'}, {'qualification': ' '}]
[{'title': 'Front End Engineer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'C++ developer\n- job post'}, {'qualification': '6+ years of experience actively working with the C++ programming language.\nFamiliar with the C++ 14 and 17 specs.\nUsed to the Visual Studio IDE (2019 and 2022).\nComfortable contributing in large projects (400k+ lines of code) involving several repositories.\n4+ years of experience developing Applications (or WinNT Services) for Windows using the Win32 API set.\nExpertise developing WinNT Services using C++.\nExpertise developing class libraries and creating shared libraries (DLLs).\nComfortable with all the following Windows specific behavior, features, and tools:\nPower Awareness.\nLocal System Vs Current User privileges.\nCOM interfaces.\nWinDbg.\nPerformance Monitor.\nFamiliarity with the C# programming language\nUnderstanding of wrapping/interoperability techniques from Native C++ to Managed C#.\nAwareness of UWP application sandboxing'}]
[{'title': 'Senior Front End Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Full-Stack Software Developer (New 911 Telehealth Technology!)\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Intermediate React Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Oracle PL/SQL Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Web Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Front End Developer (freelance)\n- job post'}, {'qualification': ' '}]
[{'title': 'Node.JS Developer - Backend - 100%Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer I (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Junior Software Engineer (Entry Level)\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Backend Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'React Native App Developer\n- job post'}, {'qualification': 'Proficient in full stack web development\n5+ years of software development\n3+ years React Native App Development\nGit/GitHub or similar\nJavascript experience'}]
[{'title': 'REMOTE - Full Stack Developer\n- job post'}, {'qualification': 'Experience writing full stack applications with React.js and NodeJS\nExperience with supporting applications in a serverless environment, AWS experience preferred\nExperience with git\nExperience developing in a Docker environment preferred\nExperience with SQL, familiarity with NoSQL a plus.'}]
[{'title': 'Python Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'PHP/MySQL Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'FRONT END DEVELOPER\n- job post'}, {'qualification': ' '}]
[{'title': 'application developer I, ServiceNow - ST\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer (Elixir/Phoenix)\n- job post'}, {'qualification': ' '}]
[{'title': 'IVA/IVR Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Front End Senior Developer I\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Software Engineer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Application Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'SENIOR WEB DEVELOPER\n- job post'}, {'qualification': ' '}]
[{'title': 'Remote SharePoint Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Back End Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'UI Developer\n- job post'}, {'qualification': 'Minimum of three years working as a professional UI developer for Web Applications.'}]
[{'title': 'Application Developer II\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior JavaScript Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Oracle PL/SQL Developer\n- job post'}, {'qualification': '9+ years’ experience with Oracle databases with the capability to write complex queries and develop complex PL/SQL database objects, including both DML and DDL.\n9+ years’ experience developing Oracle Forms\n5+ years’ experience with database and/or SQL performance tuning\nExperience developing application reporting, preferably with PHP and/or Classic ASP 3.0\nExperience using code repositories, preferably Git\nExperience using continuous integration tools, preferably Jenkins\nKnowledge of relational database designs\nKnowledge of Information Systems Development Methodology (ISDM)\nExperience with database query tools (i.e., TOAD, SQL Developer, SQL Navigator)\nKnowledge of database security, including role-based security\nKnowledge and experience with Unified Modeling Language (UML)\nAbility to be creative, to use sound judgment, and to display foresight to identify potential problems and design/specifications and assigned application software systems\nAbility to establish and maintain effective working relationships with others\nAbility to work independently\nAbility to determine work priorities and ensure proper completion of work assignments\nAbility to work well under pressure and meet deadlines without sacrificing quality\nExcellent interpersonal, collaborative, oral and written communication skills'}]
[{'title': 'Angular Developer, Front End (remote for local candidates)\n- job post'}, {'qualification': ' '}]
[{'title': 'Full Stack Developer\n- job post'}, {'qualification': "5+ years writing high performance, multi-tier, secure web apps\nStrong software development discipline while still being flexible enough to do what’s needed in a pinch\nStrong SQL skills in both Query and Schema design\nGood understanding of REST without being dogmatic\nUnderstand the value of documentation, and consistently put it into practice\nSome experience with Microservice architecture (for legacy code)\nPassing familiarity with GraphQL will be useful for porting\nMust be self-driven and able to work independently. We'll give you everything you need to get a solid start and we're helpful team players, but we won't micromanage you"}]
[{'title': 'PHP Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer I (Remote Option*)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Application and Integration Developer - Atlanta, GA or Remote (non-GA)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Web Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Lead Front End Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Full-Stack Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer III\n- job post'}, {'qualification': ' '}]
[{'title': 'LARAVEL DEVELOPER\n- job post'}, {'qualification': ' '}]
[{'title': 'Business Intelligence Developer - Remote, Nationwide\n- job post'}, {'qualification': ' '}]
[{'title': 'Java Developer, Back End (remote for local candidates)\n- job post'}, {'qualification': ' '}]
[{'title': 'DevOps Engineer - 100% Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Associate Software Engineer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Developer - React\n- job post'}, {'qualification': 'Proficient in using HTML5, JavaScript and CSS to produce progressive and responsive user interfaces for the web.\nExperience with ReactJS is required\nUnderstands principles of User Experience (UX) design and can apply these principles to front end development.\nExperience with GIT for source code management'}]
[{'title': 'Software Developer in Test\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Application Developer, Senior\n- job post'}, {'qualification': ' '}]
[{'title': 'Application Developer III- Remote Role\n- job post'}, {'qualification': ' '}]
[{'title': 'Database Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Software Engineer - Site Reliability\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer - .Net\n- job post'}, {'qualification': ' '}]
[{'title': 'Application Developer III\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Java Developer- Remote/Hybrid\n- job post'}, {'qualification': ' '}]
[{'title': 'IT Application Developer-Olympia - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Software/Web Development Teaching Assistant (USA - Virtual/Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer (Remote Work Options)\n- job post'}, {'qualification': ' '}]
[{'title': 'Python Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Developer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Application Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'ADA Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Microsoft Power Platform Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'UI Developer (React)\n- job post'}, {'qualification': ' '}]
[{'title': 'Frontend Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer, Early Career\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer, Tools (Open to Remote) - Overwatch\n- job post'}, {'qualification': ' '}]
[{'title': 'REMOTE - Web Consultant/Lead Web Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer III\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Software Engineer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Junior RPA Developer\n- job post'}, {'qualification': ' '}]
[{'title': '.NET Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'WEB DEVELOPER III (REMOTE)\n- job post'}, {'qualification': ' '}]
[{'title': 'Customer Success, Public Sector Lead\n- job post'}, {'qualification': ' '}]
[{'title': 'WORDPRESS DEVELOPER\n- job post'}, {'qualification': ' '}]
[{'title': 'DevOps Mentor (Part-time)\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer II (Remote Option*)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior SQL ETL Developer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer II (ServiceNow)\n- job post'}, {'qualification': ' '}]
[{'title': 'Full Stack Web Developer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Sr. Java Application Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Mobile Application Developer - FTE\n- job post'}, {'qualification': 'Angular, any version\nAndroid/iOS Native configuration & deployments\nBluetooth hardware integrations\nIonic\nRedux/NgRx\nAzure Devops\nSource Control in GIT\nVisual Studio Code\nNPM\nTypescript\nObservables\nLogging over Application Insights\nCI/CD workflows'}]
[{'title': 'Senior Application Developer - Crop\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer Engineer in Test (SDET)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Frontend Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Manager - Software Engineering (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Producer, Games - External Development\n- job post'}, {'qualification': ' '}]
[{'title': 'Engineering Software Developer (OFS-HQ) Tomball, TX\n- job post'}, {'qualification': ' '}]
[{'title': 'Full Stack Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Microsoft 365 Developer (Fully Remote)\n- job post'}, {'qualification': 'Senior Developer with at least 6 years of experience professional experience\nTypeScript, JavaScript,SPFx, SASS/CSS3 & HTML 5\nReact\nHandlebars'}]
[{'title': 'Senior Frontend Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer\n- job post'}, {'qualification': 'Java\nSpring Boot\nSpring Framework\nSQL\nPostman\nC#\nOnBase API\nJavaScript\nHTML/CSS\nVisual Basic\nIntelliJ\nGit\nGitHub\nOnBase Studio\nAgile Development\nSCRUM'}]
[{'title': 'Software Engineer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Sr. Software Developer - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Android Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Sr. Automation Engineer - Evernorth (Work at Home)\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Application Developer ( REMOTE)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Back End Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Principal Django Engineer (Clean Tech Job) (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Test Automation Engineer (JR14366)\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer, Entry\n- job post'}, {'qualification': ' '}]
[{'title': 'Application Developer\n- job post'}, {'qualification': 'Object Oriented Development: Java, C++, C#, Python, R, PHP, Visual Basic.NET, JavaScript, Ruby, Perl, SIMSCRIPT, Object Pascal, Objective-C, Dart, Swift, Scala, Kotlin, Common Lisp, MATLAB, and Smalltalk.\nAPI development\nDevOps experience, not just the tools but the mindset like continuous improvement. Tools for us would be Git, GitHub, JFrog, AzureDevOps\nAgile experience, specifically scrum\nGood Engineer Practices like automated unit testing, test-driven development\nCloud experience, specifically AWS if possible\nFundamental knowledge of data to be able to create scripts and data patchers\nBonus is automated testing development in Postman, Rest Assured, Robot Framework'}]
[{'title': 'Principal Platform Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Aid Information Management System (AIMS) Software Developer – Consultant\n- job post'}, {'qualification': ' '}]
[{'title': 'Cloud Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Digital Products- Fullstack Developer\n- job post'}, {'qualification': 'Applying experience with technologies such as Node.js, React, NoSQL (Document) or SQL (Relational);\nApplying Node.js and React experience with functional components, state management and associated technologies;\nWorking with technology stack ES6, Node.js with Express, Couchbase, React with Redux, Webpack, Git, npm and related technologies;\nMaintaining updated knowledge of popular Node.js and React libraries/components in open source community;\nHaving Bootstrap 4 experience with components, classes, layouts, grids and the other Bootstrap features;\nUsing mixins, partials, variables etc. for SASS, the other CSS processors like PostCSS, mastery of cross-browser and cross-platform issues, responsive/adaptive and different layout techniques;\nApplying CSS3/HTML5 usage and modern features (Flexbox, CSS grids, media queries and CSS custom properties), CSS naming conventions (BEM) and CSS linting;\nDemonstrating the ability to brainstorm, concept, and collaborate with UX Team to come up with style guidelines;\nTranslating Invision designs into flexible and reusable React components using HTML5, JSX and SASS;\nApplying modern CSS methodologies (CSS in JS, styled components, Glamorous,CSS modules, BEM or Atomic Design etc.) and CSS tools (Modernizr, Autoprefixer, CSS linting, Stylelint, CSS linting etc.) into our React components;\nProducing minimal and clean SASS code with more reusability and reviews/fixes code by the other developers in the team;\nFollowing and implementing the latest coding trends, tricks/hacks and best practices along with conveying the message to the other team members; and,\nDemonstrating proven verbal and written communication skills and able to interact professionally with a diverse group of people.'}]
[{'title': 'Senior .Net Software Developer – Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer I (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Consulting Software Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior IT Automation and Quality Engineer - Remote Opportunity\n- job post'}, {'qualification': ' '}]
[{'title': 'Sr. Software Engineer (REMOTE)\n- job post'}, {'qualification': ' '}]
[{'title': 'Cloud Engineer (m/f/d)\n- job post'}, {'qualification': ' '}]
[{'title': 'Lead Developer Microsoft C# / JavaScript\n- job post'}, {'qualification': ' '}]
[{'title': 'Staff Software Engineer - Web Platform\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Quality Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Developer III - Paramount - Full Time - Days - Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineering Lead Analyst - Evernorth - Work From Home\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'PHP Developer III\n- job post'}, {'qualification': ' '}]
[{'title': 'Sr. iOS Developer - Rubicon\n- job post'}, {'qualification': ' '}]
[{'title': '.NET DEVELOPER WITH WPF\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior FullStack Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'VR Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Science Engagement Engineer (Remote Friendly)\n- job post'}, {'qualification': ' '}]
[{'title': 'Full Stack Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'AWS / Python / Java Developer (CONTRACT) REMOTE\n- job post'}, {'qualification': ' '}]
[{'title': 'Java Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Database Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'RPA Developer - Work from Home\n- job post'}, {'qualification': ' '}]
[{'title': '.NET Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Full Stack Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer - .Net - 100% Remote\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Web Programmer\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer - Editor\n- job post'}, {'qualification': ' '}]
[{'title': 'Full-Stack Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Full Stack Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Frontend Software Engineer\n- job post'}, {'qualification': ' '}]
[{'title': 'Backend Developer Engineer - Remote (2022-4674)\n- job post'}, {'qualification': ' '}]
[{'title': 'Developer (Remote)\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Engineer, Frontend\n- job post'}, {'qualification': ' '}]
[{'title': '(Remote) Instructional Designer and Developer - OLAP\n- job post'}, {'qualification': ' '}]
[{'title': 'Remote - Senior C#, .Net, Backend Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'DevOps Engineer (New 911 Telehealth Technology!)\n- job post'}, {'qualification': ' '}]
[{'title': 'Python Developer Relations Engineer / DevRel\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Engineer II\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Javascript Developer - REMOTE\n- job post'}, {'qualification': ' '}]
[{'title': 'Database Administrator/ SQL Developer (Remote in US)\n- job post'}, {'qualification': ' '}]
[{'title': 'Software Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'Senior Oracle Developer\n- job post'}, {'qualification': ' '}]
[{'title': 'REMOTE - Sr. Backend Engineer\n- job post'}, {'qualification': 'Experience developing enterprise APIs using NodeJS with Typescript\nExperience with supporting applications in a serverless environment, AWS experience preferred\nExperience with git\nExperience developing in a Docker environment preferred\nExperience with SQL, familiarity with NoSQL a plus.'}]
[{'title': 'Full Stack Python/Django Engineer (Remote, US)\n- job post'}, {'qualification': ' '}]
Upvotes: 3
Reputation: 73
One strategy to finish the job by sticking to your original plan is to do it this way.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://www.indeed.com/jobs?q=developer&sc=0kf%3Aattr%28DSQF7%29%3B&start=640&pp=gQPAAAABhR6C4g8AAAAB8f6BVABIAQEBBg-PHLEDms2oSIodfSmVxw09STnASEoBTK5mKYOEa4i4O_Ur1l0A-QxgzLqNt1E6GP8A47DqWEqCMSpmIabUq7qaIzRCAAA&vjk=8008aba345c406ba'
def get_links(driver,link):
driver.get(link)
link_list = []
for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".job_seen_beacon"))):
title_link = item.find_element(By.CSS_SELECTOR,"h2 > a[class^='jcs-JobTitle']").get_attribute("href")
link_list.append(title_link)
return link_list
def get_content(link):
driver.get(link)
title = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.jobsearch-JobInfoHeader-title"))).text
try:
skill = driver.find_element(By.XPATH,"//*[@id='jobDescriptionText']//div[./div/b[contains(.,'Required Skills')]]").get_attribute("textContent")
except Exception as err: skill = ""
return title,skill
if __name__ == '__main__':
with webdriver.Chrome() as driver:
while True:
for item in get_links(driver,link):
print(get_content(item))
driver.get(link)
try:
next_page = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[aria-label='Next Page']")))
driver.execute_script("arguments[0].click();",next_page)
link = driver.current_url
except Exception as err:
break
Upvotes: 3
Reputation: 386
You actually don't need selenium for your clicking to the next page. If you inspect the element of the next page button. You can see a href there. Write first a function that gets the URLs of all the pages, then get the job postings links. Afterward, you can then scrape all the necessary information from the job posting sites. I would only use Selenium if there really is no other way to go to different pages, as Selenium makes you scraper slow compared to other solutions.
Edit: Here is a possible solution. Move the movement to another page into a separate function. The function will return a list of links to the different pages. Then you can loop over as you did, but instead of giving get_links()
the link defined, you have to pass the different page links. Be aware that you also need to change your get_content()
function, in order to stop and move to the next page when all the titles and skills are scraped for a page. Hope this helps :)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://www.indeed.com/jobs?q=developer&sc=0kf%3Aattr%28DSQF7%29%3B&start=640&pp=gQPAAAABhR6C4g8AAAAB8f6BVABIAQEBBg-PHLEDms2oSIodfSmVxw09STnASEoBTK5mKYOEa4i4O_Ur1l0A-QxgzLqNt1E6GP8A47DqWEqCMSpmIabUq7qaIzRCAAA&vjk=8008aba345c406ba'
def get_links(driver,link):
driver.get(link)
link_list = []
for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".job_seen_beacon"))):
title_link = item.find_element(By.CSS_SELECTOR,"h2 > a[class^='jcs-JobTitle']").get_attribute("href")
link_list.append(title_link)
return link_list
# new function
def get_pages(driver, link):
driver.get(link)
pages = []
while True:
try:
next_page = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"a[aria-label='Next Page']")))
pages.append(next_page.get_attribute('href'))
driver.execute_script("arguments[0].click();",next_page)
except Exception as err:
print("No more pages")
break
return pages
def get_content(link):
driver.get(link)
while True:
try:
title = WebDriverWait(driver,20).until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.jobsearch-JobInfoHeader-title"))).text
try:
skill = driver.find_element(By.XPATH,"//*[@id='jobDescriptionText']//div[./div/b[contains(.,'Required Skills')]]").get_attribute("textContent")
except Exception as err:
skill = ""
return title, skill
except Exception as err:
break
if __name__ == '__main__':
with webdriver.Chrome() as driver:
for page in get_pages(driver, link):
for item in get_links(driver, page):
print(get_content(item))
Upvotes: 2