Michael Amos
Michael Amos

Reputation: 47

How to extract and download web scrape difficultly placed image using BeautifulSoup Selenium Python?

the images I am trying to get are inside an <img tag, and I want the 'srcset' images.

I have found this code here but it doesn't seem to work.

import requests 
from bs4 import BeautifulSoup 
from selenium import webdriver
from time import sleep

from cookie_accepter3 import load_and_accept_cookies

driver = webdriver.Safari()

def getdata(url): 
    r = requests.get(url) 
    return r.text 

URL = 'https://www.autotrader.co.uk/car-details/202205215960809?sort=relevance&advertising-location=at_cars&radius=1501&make=SEAT&postcode=cv326ja&model=Ibiza&onesearchad=New&onesearchad=Nearly%20New&onesearchad=Used&include-delivery-option=on&page=1'
driver.get(URL)
sleep(3)
load_and_accept_cookies(URL, driver)

htmldata = getdata("https://www.autotrader.co.uk/car-details/202205215960809?sort=relevance&advertising-location=at_cars&radius=1501&make=SEAT&postcode=cv326ja&model=Ibiza&onesearchad=New&onesearchad=Nearly%20New&onesearchad=Used&include-delivery-option=on&page=1") 

soup = BeautifulSoup(htmldata, 'html.parser') 



for item in soup.find_all('img'):
    print(item['src'])

Inspect Element

Any help would be greatly appreciated, thank you.

Upvotes: 1

Views: 195

Answers (1)

ahmedshahriar
ahmedshahriar

Reputation: 1076

There is only one img element in this webpage, you can easily select it with the selenium CSS selector, you don't need to go for bs4

Working code -

import time

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()

# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)



URL = "https://www.autotrader.co.uk/car-details/202205215960809?sort=relevance&advertising-location=at_cars&radius=1501&make=SEAT&postcode=cv326ja&model=Ibiza&onesearchad=New&onesearchad=Nearly%20New&onesearchad=Used&include-delivery-option=on&page="

with chrome_driver as driver:
    driver.implicitly_wait(15)
    driver.get(URL)
    time.sleep(3)
    img_url = driver.find_element(By.CSS_SELECTOR, "img").get_attribute("srcset")
    time.sleep(0.3)
    print(img_url)

Output -

https://m.atcdn.co.uk/a/media/w300h225/afdc5f1656624e178f8af72d7632b92d.jpg 320w, https://m.atcdn.co.uk/a/media/w480h360/afdc5f1656624e178f8af72d7632b92d.jpg 480w, https://m.atcdn.co.uk/a/media/w600h450/afdc5f1656624e178f8af72d7632b92d.jpg 600w, https://m.atcdn.co.uk/a/media/w720h540/afdc5f1656624e178f8af72d7632b92d.jpg 720w, https://m.atcdn.co.uk/a/media/w800h600/afdc5f1656624e178f8af72d7632b92d.jpg 800w

Upvotes: 0

Related Questions