Need help in Python web scraping

Question

I wrote a simple code to scrape title, address, contct_person, phone number and website link but my program just scraping title and I don't know how to scrape all other thing because there are no classes and id's for them.

Here is my code:

import requests
from bs4 import BeautifulSoup
import csv

def get_page(url):
    response = requests.get(url)

    if not response.ok:
        print('server responded:', response.status_code)
    else:
        soup = BeautifulSoup(response.text, 'html.parser')
    return soup

def get_detail_data(soup):

    try:
        title = soup.find('a',class_="ListingDetails_Level1_SITELINK",id=False).text
    except:
        title = 'empty'  
    print(title)
    try:
        address = soup.find('div',class_="ListingDetails_Level1_CONTACTINFO",id=False).find_all('span').text
    except:
        address = "address"
    print(address)
    try:
        person_name = soup.find('a',class_="",id=False).find_all('img').text
    except:
        person_name = "empty person"
    print(person_name)
    try:
        phone_no = soup.find('img',class_="",id=False).text
    except:
        phone_no = "empty phone no"
    print(phone_no)
    try:
        website = soup.find('a',class_="",id=False).text
    except:
        website = "empty website"
    print(website)




def main():
    url = "https://secure.kelownachamber.org/Pools-Spas/Rocky%27s-Reel-System-Inc-4751"
    #get_page(url)
    get_detail_data(get_page(url))
if __name__ == '__main__':
    main()

Pulkit Bansal · Accepted Answer

Following code worked for me (this is just to show you how you can fetch data from that website so I kept it simple):

import requests
from bs4 import BeautifulSoup
result = requests.get("https://secure.kelownachamber.org/Pools-Spas/Rocky%27s-Reel-System-Inc-4751")
src = result.content
soup = BeautifulSoup(src,'html.parser')
divs  = soup.find_all("div",attrs={"class":"ListingDetails_Level1_HEADERBOXBOX"})
for tag in divs:
  try:
   title = tag.find("a",attrs={"class":"ListingDetails_Level1_SITELINK"}).text
   address = tag.find("span",attrs={"itemprop":"street-address"}).text
   postal = tag.find("span",attrs={"itemprop":"postal-code"}).text
   maincontact = tag.find("span",attrs={"class":"ListingDetails_Level1_MAINCONTACT"}).text
   siteTag = tag.find("span",attrs={"class":"ListingDetails_Level1_VISITSITE"})
   site = siteTag.find("a").attrs['href']
   print(title)
   print(address) 
   print(postal)
   print(maincontact)
   print(site)
  except:
   pass

Need help in Python web scraping

Answers (2)

Related Questions