issue on web scraping

I am having a problem on Web Scraping using Beautiful Soup This is the URL http://desiopt.com/company/4316/VST-CONSULTING-INC/ which i'm trying to web scraping of company Info details.

from selenium import webdriver
import bs4
import pandas as pd
from bs4 import BeautifulSoup
import re
driver =  webdriver.Chrome(executable_path=r"C:/Users/Chandra Sekhar/Desktop/chrome-driver/chromedriver.exe")
titles=[]
driver.get("http://desiopt.com/company/4316/VST-CONSULTING-INC/")
content = driver.page_source
soup = BeautifulSoup(content)
for a in soup.findAll('div',href=True, attrs={'class':'headerBgBlock'}):
    title=a.find('div', attrs={'class':'userInfo'})
    print(title.text)
    df = pd.DataFrame({'Product Title':titles})
    df['Price'] = df['Price'].map(lambda x: re.sub(r'\W+', '', x))
    df.to_csv('products1.csv', index=False)

Upvotes: 0

Views: 253

Answers (2)

Saurabh
Saurabh

Reputation: 1005

from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

#Starts the driver and goes to our starting webpage
driver = webdriver.Chrome(
    '/Users/sauraverma/Desktop/web_scraping/scraping_projects/chromedriver'
)

driver.get('https://ca.indeed.com/')

#This loop goes through every page and grabs all the details of each posting
#Loop will only end when there are no more pages to go through
while True:  
    #Imports the HTML of the current page into python
    soup = BeautifulSoup(driver.page_source, 'lxml')
    
    #Grabs the HTML of each posting
    postings = soup.find_all('div', class_ = 'job_seen_beacon')
    
    #grabs all the details for each posting and adds it as a row to the dataframe
    for post in postings:
        link = post.find('a', class_ = 'jcs-JobTitle css-jspxzf eu4oa1w0').get('href')
        link_full = 'https://ca.indeed.com'+link

Upvotes: 0

import requests
from bs4 import BeautifulSoup

r = requests.get('http://desiopt.com/company/4316/VST-CONSULTING-INC/')
soup = BeautifulSoup(r.text, 'html.parser')


for item in soup.findAll('div', attrs={'class': 'compProfileInfo'}):
    for a in item.findAll('span'):
          print(a.text.strip())

Output:

VST CONSULTING INC
Phone
732-491-8681
Email
bindu@vstconsulting.com
Web Site
www.vstconsulting.com

Upvotes: 2

Related Questions