Reputation: 21
i am trying to pull a set of weave sample images from a site
the objective is to create a dataset for creative project
code and screenshot of site to scrape included below
any pointers greatly appreciated, thank you
'''
from bs4 import BeautifulSoup
import requests
import urllib.request
import shutil
url = "https://cdndrafts-01-2019.handweaving.net"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
aas = soup.find_all("right-padding", class_='img')
image_info = []
for a in aas:
image_tag = a.findChildren('img')
image_info.append((image_tag[0]['src'], image_tag[0]['alt']))
def download_image(image):
response = requests.get(image[0], stream=True)
realname = ''.join(e for e in image[1] if e.isalnum())
file = open("C://cdnddrafts{}/jpg".format(realname))
response.raw.decode_conent = True
shutil.copyfileobj(response.raw, file)
del response
for i in range(0, len(image_info)):
download_image(image_info[i])
'''
Upvotes: 0
Views: 54
Reputation: 49
This is pretty in-depth guide on how to do this, take a look. Some parts are redundant & you can leave them out.
Upvotes: 1