Scrapy response incomplete get url how to

I would like to parse the value obtained from parse again by connecting to another url. How do I fix it?

from scrapy import Spider from scrapy.selector import Selector

from stack.items import StackItem

class StackSpider(Spider): name = "stack" allowed_domains = ["*"] global n #n = 1997 start_urls = ['https://www.melon.com/chart/age/list.htm?chartType=YE&chartGenre=KPOP&chartDate=2010',]

def parse(self, response):
    url = 'https://www.melon.com/song/detail.htm?songId='
    questions = Selector(response).xpath('//*[@id="frm"]/table/tbody/tr')
    for question in questions:
        item = StackItem()
        item['musicid'] = question.xpath('td/div/input/@value').extract()[0]
        item['title'] = question.xpath('td[4]/div/div/div/span/strong/a/@title').extract()
        item['artlist'] = question.xpath(
            'td[4]/div/div/div[2]/div[1]/a/text()').extract()
        item['album'] = question.xpath(
            'td[4]/div/div/div[2]/div[2]/a/text()').extract()
        item['sunwhi'] = question.xpath(
            'td[2]/div/span/text()').extract()[0]
        response_url=requests.get(url+musicid)
        def parse(self, response):
            questions = Selector(response).xpath('//*[@id="downloadfrm"]/div/div/div[2]/div[2]/dl/dd')
            for question in questions:
                 item = StackItem()
                 item['album'] = question.xpath('a/text()').extract()[0]
        yield item

Upvotes: 0

Views: 159

Answers (1)

class StackSpider(Spider):
    name = "stack"
    allowed_domains = ["*"]
    global n
    #n = 1997
    start_urls = ['https://www.melon.com/chart/age/list.htm?chartType=YE&chartGenre=KPOP&chartDate=2010',]

    def parse(self, response):
        url = 'https://www.melon.com/song/detail.htm?songId='
        questions = Selector(response).xpath('//*[@id="frm"]/table/tbody/tr')
        for question in questions:
            item = StackItem()
            item['musicid'] = question.xpath('td/div/input/@value').extract()[0]
            item['title'] = question.xpath('td[4]/div/div/div/span/strong/a/@title').extract()
            item['artlist'] = question.xpath(
                'td[4]/div/div/div[2]/div[1]/a/text()').extract()
            item['album'] = question.xpath(
                'td[4]/div/div/div[2]/div[2]/a/text()').extract()
            item['sunwhi'] = question.xpath(
                'td[2]/div/span/text()').extract()[0]
            response_url=requests.get(url+musicid)
            def parse(self, response):
                questions = Selector(response).xpath('//*[@id="downloadfrm"]/div/div/div[2]/div[2]/dl/dd')
                for question in questions:
                     item = StackItem()
                     item['album'] = question.xpath('a/text()').extract()[0]
            yield item

Upvotes: 0

Related Questions