Reputation: 137
I've written a script in python to download different movie images from a torrent site and store them in a folder in desktop. My script can download & save the images in a folder.
If none of the images or all of them are there in the folder, my script can handle the process of downloading or not downloading.
How can I make my script download rest of the images, if some of the images are already in the folder?
This is my try:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
link = "https://www.yify-torrent.org/search/1080p/"
dirf = os.environ['USERPROFILE'] + '\Desktop\Images'
if not os.path.exists(dirf):os.makedirs(dirf)
os.chdir(dirf)
items = len([name for name in os.listdir(dirf) if os.path.isfile(os.path.join(dirf, name))])
if not items:
response = requests.get(link)
soup = BeautifulSoup(response.text, "lxml")
for item in soup.select(".img-item .poster-thumb"):
filename = item['src'].split('/')[-1]
with open(filename, 'wb') as f:
f.write(requests.get(urljoin(link,item['src'])).content)
else:
print("All images are there")
Upvotes: 0
Views: 50
Reputation: 31
Try this. (Note that I haven't tested actually retrieving the images.) Holler if you need something clarified.
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
link = "https://www.yify-torrent.org/search/1080p/"
dirf = os.environ['USERPROFILE'] + '\Desktop\Images'
if not os.path.exists(dirf):os.makedirs(dirf)
os.chdir(dirf)
# get ist of previously downloaded images
items = [name for name in os.listdir(dirf) if os.path.isfile(os.path.join(dirf, name))]
# get list of available images as a dictionary since we need the full src
filenames = {}
response = requests.get(link)
soup = BeautifulSoup(response.text, "lxml")
for item in soup.select(".img-item .poster-thumb"):
filename = item['src'].split('/')[-1]
filenames[filename] = item['src']
# get list of images for download
remaining = set(filenames)-set(items)
if remaining:
for filename in remaining:
if filename in items: continue
with open(filename, 'wb') as f:
f.write(requests.get(urljoin(link,filenames[filename])).content)
else:
print("All images are there")
Upvotes: 1
Reputation: 189628
Examine each image separately.
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
link = "https://www.yify-torrent.org/search/1080p/"
dirf = os.environ['USERPROFILE'] + '\Desktop\Images'
if not os.path.exists(dirf):
os.makedirs(dirf)
response = requests.get(link)
soup = BeautifulSoup(response.text, "lxml")
counter = 0
for item in soup.select(".img-item .poster-thumb"):
filename = item['src'].split('/')[-1]
localfile = os.path.join(dirf, filename)
if os.path.isfile(localfile):
continue
# else
counter += 1
with open(localfile, 'wb') as f:
f.write(requests.get(urljoin(link,item['src'])).content)
if counter:
print("Downloaded {} images".format(counter))
else:
print("All images are there")
Inside the for
loop, for each extracted image link, we check if the image exists locally, and if it already does, we don't do anything with it.
(I also took out the chdir
because it wasn't doing anything useful. If you want to chdir
you can simplify the rest of the code to not append dirf
in front of the local file name.)
Upvotes: 1