Reputation: 33
Trying to scrap text from different pages using a while loop. but getting TypeError: int() argument must be a string, a bytes-like object or a number, not 'Response'
I am using beautiful soup and global variable to increase the page number.
import re
from bs4 import BeautifulSoup , Comment
import requests
page = 1
total = 3
def get_chapter():
global page
c_page = int(page)
if c_page < 10:
chapter = f"0{page}"
else:
chapter = page
page += 1
return chapter
def filter_text(element):
if element.parent.name in ['style' , 'script' , 'head' , 'title' , 'meta' , '[document]']:
return False
if isinstance(element , Comment):
return False
elif re.match(r"[\s\r\n]+" , str(element)):
return False
return True
def run():
global page
c_page = int(page)
while c_page < total:
chapter = get_chapter()
# book url altered
url = f"https://wod.ng/wol/d/r1/lp-e/11020212{chapter}"
print(url)
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
# get text only
texts = soup.find_all(text=True)
visible_texts = filter(filter_text, texts)
print(u" ".join(t.strip() for t in visible_texts))
run()
Upvotes: 0
Views: 1170
Reputation: 4710
Don't use the same variable name for multiple variables.
Change this
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
to
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
or something because you're already using page
for counting pages/chapters
Upvotes: 1