John Dene
John Dene

Reputation: 570

How to get the url of current tab in selenium?

Everything is working fine but I am unable to extract the url of new tab each time clicking on it?What I am missing I have tried all possible resources that is - current_url,response.url,getCurrentUrl none seems to work.What will be the best solution in this case?

import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from selenium import webdriver
from urlparse import urljoin
import time
from selenium.webdriver.common.keys import Keys

class CompItem(scrapy.Item):
    model_name = scrapy.Field()
    model_link = scrapy.Field()
    url  =scrapy.Field()

class criticspider(CrawlSpider):
    name = "paytm_l"
    allowed_domains = ["paytm.com"]
    start_urls = ["https://paytm.com/shop/g/electronics/mobile-accessories/mobiles"]


    def __init__(self, *args, **kwargs):
        super(criticspider, self).__init__(*args, **kwargs)
        self.download_delay = 0.25
        self.browser = webdriver.Firefox()

        self.browser.implicitly_wait(2)

    def parse_start_url(self, response):
        self.browser.get(response.url)
        #sites = response.xpath('//div[@class="single-review"]/div[@class="review-header"]')
        self.browser.implicitly_wait(30)

        items = []
        time.sleep(20)
    #   for i in range(0,200):
            #self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")


        sel = Selector(text=self.browser.page_source)
        sites = sel.xpath('//div[contains(@class,"overflow-hidden")]')

        item = CompItem()

        for r in range(1,5):


                    #item['model_name'] = site.xpath('.//p[contains(@ng-if,"applyLimit")]/text()')
                    button = self.browser.find_element_by_xpath("/html/body/div[5]/div[5]/div/div[5]/div[3]/ul/li[%d]/a"%r)
                    main_window = self.browser.current_window_handle
                    button.send_keys(Keys.CONTROL + Keys.RETURN)
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
                    time.sleep(5)
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'l')
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'c')

                    item["url"]=self.browser.current_url()
                    time.sleep(10)
                    self.browser.switch_to_window(main_window)
                    time.sleep(10)

                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
                    self.browser.switch_to_window(main_window)

                    #item['model_link'] = site.xpath('//a[contains(@class,"{"na": !productClasses(product)}"]/@href').extract()[0]

                    yield item

Upvotes: 2

Views: 2141

Answers (1)

John Dene
John Dene

Reputation: 570

By changing the focus of driver on the tab I was able to get the urls.

import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from selenium import webdriver
from urlparse import urljoin
import time
from selenium.webdriver.common.keys import Keys

class CompItem(scrapy.Item):
    model_name = scrapy.Field()
    model_link = scrapy.Field()
    url  =scrapy.Field()

class criticspider(CrawlSpider):
    name = "paytm_l"
    allowed_domains = ["paytm.com"]
    start_urls = ["https://paytm.com/shop/g/electronics/mobile-accessories/mobiles"]


    def __init__(self, *args, **kwargs):
        super(criticspider, self).__init__(*args, **kwargs)
        self.download_delay = 0.25
        self.browser = webdriver.Firefox()

        self.browser.implicitly_wait(2)

    def parse_start_url(self, response):
        self.browser.get(response.url)
        #sites = response.xpath('//div[@class="single-review"]/div[@class="review-header"]')
        self.browser.implicitly_wait(30)

        items = []
        time.sleep(20)
    #   for i in range(0,200):
            #self.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")


        sel = Selector(text=self.browser.page_source)
        sites = sel.xpath('//div[contains(@class,"overflow-hidden")]')

        item = CompItem()

        for r in range(1,5):


                    #item['model_name'] = site.xpath('.//p[contains(@ng-if,"applyLimit")]/text()')
                    button = self.browser.find_element_by_xpath("/html/body/div[5]/div[5]/div/div[5]/div[3]/ul/li[%d]/a"%r)
                    main_window = self.browser.current_window_handle
                    button.send_keys(Keys.CONTROL + Keys.RETURN)
                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
                    time.sleep(5)
                    self.browser.switch_to_window(main_window)


                    item["url"]=self.browser.current_url
                    time.sleep(10)

                    time.sleep(10)

                    self.browser.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
                    self.browser.switch_to_window(main_window)

                    #item['model_link'] = site.xpath('//a[contains(@class,"{"na": !productClasses(product)}"]/@href').extract()[0]

                    yield item

Upvotes: 2

Related Questions