Reputation: 3
I would like to parse the value obtained from parse again by connecting to another url. How do I fix it?
from scrapy import Spider from scrapy.selector import Selector
from stack.items import StackItem
class StackSpider(Spider): name = "stack" allowed_domains = ["*"] global n #n = 1997 start_urls = ['https://www.melon.com/chart/age/list.htm?chartType=YE&chartGenre=KPOP&chartDate=2010',]
def parse(self, response):
url = 'https://www.melon.com/song/detail.htm?songId='
questions = Selector(response).xpath('//*[@id="frm"]/table/tbody/tr')
for question in questions:
item = StackItem()
item['musicid'] = question.xpath('td/div/input/@value').extract()[0]
item['title'] = question.xpath('td[4]/div/div/div/span/strong/a/@title').extract()
item['artlist'] = question.xpath(
'td[4]/div/div/div[2]/div[1]/a/text()').extract()
item['album'] = question.xpath(
'td[4]/div/div/div[2]/div[2]/a/text()').extract()
item['sunwhi'] = question.xpath(
'td[2]/div/span/text()').extract()[0]
response_url=requests.get(url+musicid)
def parse(self, response):
questions = Selector(response).xpath('//*[@id="downloadfrm"]/div/div/div[2]/div[2]/dl/dd')
for question in questions:
item = StackItem()
item['album'] = question.xpath('a/text()').extract()[0]
yield item
Upvotes: 0
Views: 159
Reputation: 3
class StackSpider(Spider):
name = "stack"
allowed_domains = ["*"]
global n
#n = 1997
start_urls = ['https://www.melon.com/chart/age/list.htm?chartType=YE&chartGenre=KPOP&chartDate=2010',]
def parse(self, response):
url = 'https://www.melon.com/song/detail.htm?songId='
questions = Selector(response).xpath('//*[@id="frm"]/table/tbody/tr')
for question in questions:
item = StackItem()
item['musicid'] = question.xpath('td/div/input/@value').extract()[0]
item['title'] = question.xpath('td[4]/div/div/div/span/strong/a/@title').extract()
item['artlist'] = question.xpath(
'td[4]/div/div/div[2]/div[1]/a/text()').extract()
item['album'] = question.xpath(
'td[4]/div/div/div[2]/div[2]/a/text()').extract()
item['sunwhi'] = question.xpath(
'td[2]/div/span/text()').extract()[0]
response_url=requests.get(url+musicid)
def parse(self, response):
questions = Selector(response).xpath('//*[@id="downloadfrm"]/div/div/div[2]/div[2]/dl/dd')
for question in questions:
item = StackItem()
item['album'] = question.xpath('a/text()').extract()[0]
yield item
Upvotes: 0