Webscraper throwing an error after website changed their code

Question

I built a webscraper for realtor.com as i am looking for houses and agents in my area this has made it tons easier for me, however they just changed the code on their website (probably to stop people from doing this) and now i am getting an attribute error. The error I'm receiving is this:

File "webscraper.py", line 22, in name.getText().strip(), AttributeError: 'NoneType' object has no attribute 'getText'

Code below was working perfectly collecting names and numbers before they changed the code. It appears all they did was change the class names. adding the "jsx-1792441256"

import csv
import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint

sleep(randint(10,20))


realtor_data = []

for page in range(1, 10):
    print(f"Scraping page {page}...")
    url = f"https://www.realtor.com/realestateagents/san-diego_ca/pg-{page}"
    soup = BeautifulSoup(requests.get(url).text, "html.parser")

    for agent_card in soup.find_all("div", {"class": "jsx-1792441256 agent-list-card-title-text clearfix"}):
        name = agent_card.find("div", {"class": "jsx-1792441256 agent-name text-bold"}).find("a")
        number = agent_card.find("div", {"itemprop": "telephone"})
        realtor_data.append(
            [
                name.getText().strip(),
                number.getText().strip() if number is not None else "N/A"
                
             ],
        )

with open("sandiego.csv", "w") as output:
    w = csv.writer(output)
    w.writerow(["NAME:", "PHONE NUMBER:", "CITY:"])
    w.writerows(realtor_data)

import pandas as pd
a=pd.read_csv("sandiego.csv")
a2 = a.iloc[:,[0,1]]
a3 = a.iloc[:,[2]]
a3 = a3.fillna("San Diego")
b=pd.concat([a2,a3],axis=1)
b.to_csv("sandiego.csv")

Andrej Kesely · Accepted Answer

Fixed code:

import csv
import requests
from bs4 import BeautifulSoup
from time import sleep
from random import randint

# sleep(randint(10,20))


realtor_data = []

for page in range(1, 10):
    print(f"Scraping page {page}...")
    url = f"https://www.realtor.com/realestateagents/san-diego_ca/pg-{page}"
    soup = BeautifulSoup(requests.get(url).text, "html.parser")

    for agent_card in soup.select("div.agent-list-card-title.mobile-only"):
        name = agent_card.find("div", {"class": "agent-name"})
        number = agent_card.find("div", {"class": "agent-phone"})
        realtor_data.append(
            [
                name.getText().strip(),
                number.getText().strip() if number is not None else "N/A"                
            ],
        )

with open("data.csv", "w") as output:
    w = csv.writer(output)
    w.writerow(["NAME:", "PHONE NUMBER:", "CITY:"])
    w.writerows(realtor_data)

import pandas as pd
a=pd.read_csv("data.csv")
a2 = a.iloc[:,[0,1]]
a3 = a.iloc[:,[2]]
a3 = a3.fillna("San Diego")
b=pd.concat([a2,a3],axis=1)
b.to_csv("data.csv")

Creates data.csv:

Webscraper throwing an error after website changed their code

Answers (1)

Related Questions