Reputation: 9
I was trying to scrape the indeed website as part of a project work. I encountered 'NoneType' object has no attribute 'text' while trying to obtain the summary of the job post.
Anyone got a solution ?
def extract(page):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = f'https://ae.indeed.com/jobs?q=data+analyst&l=dubai&start={page}'
r = requests.get(url, headers)
soup = bs(r.content, 'html.parser')
return soup
def transform(soup):
div = soup.find_all('div', class_ ='jobsearch-SerpJobCard')
for item in div:
title = item.find('a').text.strip()
company = item.find('span', class_ = 'company').text.strip()
summary = item.find(name='div',attrs={'class':'summary'}).text.strip()
jobs = {'title': title,
'company': company,
'summary': summary}
joblist.append(jobs)
return
joblist = []
c = extract(10)
transform(c)
print(joblist)
Error message:
AttributeError Traceback (most recent call last)
<ipython-input-65-eefd76c3693a> in <module>
24 joblist = []
25 c = extract(10)
---> 26 transform(c)
27 print(joblist)
<ipython-input-65-eefd76c3693a> in transform(soup)
11 title = item.find('a').text.strip()
12 company = item.find('span', class_ = 'company').text.strip()
---> 13 summary = item.find(name='div',attrs={'class':'summary'}).text.strip()
14 # summary = item.find(name='li',attrs={'style':'margin-bottom:0px;'}).text
15 # for sum in summary:
AttributeError: 'NoneType' object has no attribute 'text'
Upvotes: 0
Views: 575
Reputation: 9
I just modified the code with a try, except block. It worked fine.
try:
summary = item.find(name='div',attrs={'class':'summary'}).text.strip()
except:
summary = 'None'
Upvotes: 1