qwpr
qwpr

Reputation: 49

Webscraper throwing an error after website changed their code

I built a webscraper for realtor.com as i am looking for houses and agents in my area this has made it tons easier for me, however they just changed the code on their website (probably to stop people from doing this) and now i am getting an attribute error. The error I'm receiving is this:

File "webscraper.py", line 22, in name.getText().strip(), AttributeError: 'NoneType' object has no attribute 'getText'

Code below was working perfectly collecting names and numbers before they changed the code. It appears all they did was change the class names. adding the "jsx-1792441256"

import csv
import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint

sleep(randint(10,20))


realtor_data = []

for page in range(1, 10):
    print(f"Scraping page {page}...")
    url = f"https://www.realtor.com/realestateagents/san-diego_ca/pg-{page}"
    soup = BeautifulSoup(requests.get(url).text, "html.parser")

    for agent_card in soup.find_all("div", {"class": "jsx-1792441256 agent-list-card-title-text clearfix"}):
        name = agent_card.find("div", {"class": "jsx-1792441256 agent-name text-bold"}).find("a")
        number = agent_card.find("div", {"itemprop": "telephone"})
        realtor_data.append(
            [
                name.getText().strip(),
                number.getText().strip() if number is not None else "N/A"
                
             ],
        )

with open("sandiego.csv", "w") as output:
    w = csv.writer(output)
    w.writerow(["NAME:", "PHONE NUMBER:", "CITY:"])
    w.writerows(realtor_data)

import pandas as pd
a=pd.read_csv("sandiego.csv")
a2 = a.iloc[:,[0,1]]
a3 = a.iloc[:,[2]]
a3 = a3.fillna("San Diego")
b=pd.concat([a2,a3],axis=1)
b.to_csv("sandiego.csv")

Upvotes: 2

Views: 32

Answers (1)

Andrej Kesely
Andrej Kesely

Reputation: 195553

Fixed code:

import csv
import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint

# sleep(randint(10,20))


realtor_data = []

for page in range(1, 10):
    print(f"Scraping page {page}...")
    url = f"https://www.realtor.com/realestateagents/san-diego_ca/pg-{page}"
    soup = BeautifulSoup(requests.get(url).text, "html.parser")

    for agent_card in soup.select("div.agent-list-card-title.mobile-only"):
        name = agent_card.find("div", {"class": "agent-name"})
        number = agent_card.find("div", {"class": "agent-phone"})
        realtor_data.append(
            [
                name.getText().strip(),
                number.getText().strip() if number is not None else "N/A"                
            ],
        )

with open("data.csv", "w") as output:
    w = csv.writer(output)
    w.writerow(["NAME:", "PHONE NUMBER:", "CITY:"])
    w.writerows(realtor_data)

import pandas as pd
a=pd.read_csv("data.csv")
a2 = a.iloc[:,[0,1]]
a3 = a.iloc[:,[2]]
a3 = a3.fillna("San Diego")
b=pd.concat([a2,a3],axis=1)
b.to_csv("data.csv")

Creates data.csv:

enter image description here

Upvotes: 2

Related Questions