how to extract title out of img class using BeautifulSoup or Xpath

Question

This is the HTML file in which i have to extract title and alt value in img class.

This is my code!!

from bs4 import BeautifulSoup, Tag
import urllib2
import re


def complete_url(items_url):
items="http://www.walmart.com"+items_url
main_source=(urllib2.urlopen(items)).read()
soup=BeautifulSoup(main_source)
#Title=soup.find('h1',{"class":"productTitle"}).text.strip()
#Price=soup.find('span',{"class":"bigPriceText1"}).text.strip()+soup.find('span',{"class":"smallPriceText1"}).text.strip()
#Availability=soup.find('span',{"id":"STORE_AVAIL"}).text.strip()
#Description=soup.find('span',{"class":"ql-details-short-desc"}).text.strip()
images=soup.find('img',re.compile("bazaarvoice"))
print images


#print 'Title:%s,Price:%s,Availability:%s,Description:%s,Avg_Rating:%s' %(Title,Price,Availability,Description,Avg_Rating)


def url_soup(url):

source=(urllib2.urlopen(url)).read()
soup=BeautifulSoup(source)
link=soup.select('a.ListItemLink')
for links in link:
    item_links=(links['href'])
link1=soup.find('a',href=True,text=re.compile("Next"))
link2=soup.find('a',class_="SPPagNoLink jump next")
complete_url(item_links)
if link2 is None:
    next_url=('http://www.walmart.com/search/search-ng.do'+re.sub(r'\s','',link1['href']))
    url_soup(next_url)
else:
    print "<<<>>>"


Dept={"All Departments":"0","Apparel":"5438","Auto":"91083","Baby":"5427","Beauty":"1085666",
"Books":"3920","Electronics":"3944","Gifts":"1094765","Grocery":"976759","Health":"976760",
"Home":"4044","Home Improvement":"1072864","Jwelery":"3891","Movies":"4096","Music":"4104",
"Party":"2637","Patio":"5428","Pets":"5440","Pharmacy":"5431","Photo Center":"5426",
"Sports":"4125","Toys":"4171","Video Games":"2636"}


def gen_url(keyword,domain):

 if domain in Dept.keys():
    main_url=('http://www.walmart.com/search/search-ng.do?search_query='+'%s'+'&ic=16_0&Find=Find&search_constraint='+'%s') % (keyword,Dept.get(domain))
 print main_url
 url_soup(main_url)


gen_url('Laptop','All Departments')

how to extract title out of img class using BeautifulSoup or Xpath

Answers (1)

Related Questions