Reputation: 2407
This is my first scraper using scrapy.
I am trying to scrap video url, title from https://www.google.co.in/trends/hotvideos#hvsm=0 site.
import scrapy
from scrapy.item import Item, Field
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
class CraigslistItem(Item):
title = Field()
link = Field()
class DmozSpider(scrapy.Spider):
name = "google"
allowed_domains = ["google.co.in"]
start_urls = [
"https://www.google.co.in/trends/hotvideos#hvsm=0"
]
def parse(self, response):
#for sel in response.xpath('//body/div'):
hxs = HtmlXPathSelector(response)
sites = hxs.xpath("//span[@class='single-video-image-container']")
items = []
for sel in response.xpath("//span[@class='single-video-image-container']"):
item = CraigslistItem()
item['title'] = sel.xpath('a/text()').extract()
item['link'] = sel.xpath('a/@href').extract()
items.append(item)
print items
General walk through of what I am doing wrong would be much appreciable.
Upvotes: 0
Views: 660
Reputation: 1712
Use the help Scrapy
FormRequest
to get it done.
from scrapy.http import FormRequest
import json
class DmozSpider(scrapy.Spider):
name = "google"
allowed_domains = ["google.co.in"]
start_urls = [
"https://www.google.co.in/trends/hotvideos#hvsm=0"
]
def parse(self, response):
url = 'https://www.google.co.in/trends/hotvideos/hotItems'
formdata = {'hvd':'','geo': 'IN','mob': '0','hvsm': '0'}
yield FormRequest(url=url, formdata=formdata, callback=self.parse_data)
def parse_data(self, response):
json_response = json.loads(response.body)
videos = json_response.get('videoList')
for video in videos:
item = CraigslistItem()
item['title'] = video.get('title')
item['link'] = video.get('url')
yield item
Upvotes: 2