Reinaldo Chaves
Reinaldo Chaves

Reputation: 995

In selenium how to find out the exact number of XPATH links with different ids?

With Python3 and selenium I want to automate the search on a public information site. In this site it is necessary to enter the name of a person, then select the spelling chosen for that name (without or with accents or name variations), access a page with the list of lawsuits found and in this list you can access the page of each case.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
import re

Name that will be searched

name = 'JOSE ROBERTO ARRUDA'

Create path, search start link, and empty list to store information

firefoxPath="/home/abraji/Documentos/Code/geckodriver"
link = 'https://ww2.stj.jus.br/processo/pesquisa/?aplicacao=processos.ea'
processos = []

Call driver and go to first search page

driver = webdriver.Firefox(executable_path=firefoxPath)
driver.get(link)

Position cursor, fill and click

WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idParteNome'))).click()
time.sleep(1)
driver.find_element_by_xpath('//*[@id="idParteNome"]').send_keys(name)
time.sleep(6)
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarFormularioExtendido'))).click() 

Mark all spelling possibilities for searching

WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoMarcarTodos'))).click()
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#idBotaoPesquisarMarcados'))).click()
time.sleep(1)

Check how many pages of data there are - to be used in "for range"

capta = driver.find_element_by_xpath('//*[@id="idDivBlocoPaginacaoTopo"]/div/span/span[2]').text
print(capta)
paginas = int(re.search(r'\d+', capta).group(0))
paginas = int(paginas) + 1
print(paginas)

Capture routine

for acumula in range(1, paginas):

    # Fill the field with the page number and press enter
    driver.find_element_by_xpath('//*[@id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(acumula)
    driver.find_element_by_xpath('//*[@id="idDivBlocoPaginacaoTopo"]/div/span/span[2]/input').send_keys(Keys.RETURN)
    time.sleep(2)

    # Captures the number of processes found on the current page - qt
    qt = driver.find_element_by_xpath('//*[@id="idDivBlocoMensagem"]/div/b').text
    qt = int(qt) + 2
    print(qt)

    # Iterate from found number of processes
    for item in range(2, qt):

        # Find the XPATH of each process link - start at number 2
        vez = '//*[@id="idBlocoInternoLinhasProcesso"]/div[' + str(item) + ']/span[1]/span[1]/span[1]/span[2]/a'
        print(vez)

        # Access the direct link and click
        element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, vez)))
        element.click()

        # Run tests to get data
        try:
            num_unico = driver.find_element_by_xpath('//*[@id="idProcessoDetalhesBloco1"]/div[6]/span[2]/a').text
        except NoSuchElementException:
            num_unico = "sem_numero_unico"

        try:
            nome_proc = driver.find_element_by_xpath('//*[@id="idSpanClasseDescricao"]').text
        except NoSuchElementException:
            nome_proc = "sem_nome_encontrado"

        try:
            data_autu = driver.find_element_by_xpath('//*[@id="idProcessoDetalhesBloco1"]/div[5]/span[2]').text
        except NoSuchElementException:
            data_autu = "sem_data_encontrada"

        # Fills dictionary and list
        dicionario = {"num_unico": num_unico,
                      "nome_proc": nome_proc,
                      "data_autu": data_autu
                                 }
        processos.append(dicionario)

        # Return a page to click on next process
        driver.execute_script("window.history.go(-1)")

# Close driver
driver.quit()

In this case I captured the number of link pages (3) and the total number of links (84). So my initial idea was to do the "for" three times and within them split the 84 links

The direct address of each link is in XPATH (//*[@id="idBlocoInternoLinhasProcesso"]/div[41]/span[1]/span[1]/span[1]/span[2]/a) which I replace with the "item" to click

For example, when it arrives at number 42 I have an error because the first page only goes up to 41

My problem is how to go to the second page and then restart only "for" secondary

I think the ideal would be to know the exact number of links on each of the three pages

Anyone have any ideas?

Upvotes: 0

Views: 156

Answers (2)

Sers
Sers

Reputation: 12255

Code below is "Capture routine":

wait = WebDriverWait(driver, 20)

#...    

while True:
    links = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//span[contains(@class,'classSpanNumeroRegistro')]")))
    print("links len", len(links))
    for i in range(1, len(links) + 1):
        # Access the direct link and click
        .until(EC.element_to_be_clickable((By.XPATH, f"(//span[contains(@class,'classSpanNumeroRegistro')])[{i}]//a"))).click()

        # Run tests to get data
        try:
            num_unico = driver.find_element_by_xpath('//*[@id="idProcessoDetalhesBloco1"]/div[6]/span[2]/a').text
        except NoSuchElementException:
            num_unico = "sem_numero_unico"

        try:
            nome_proc = driver.find_element_by_xpath('//*[@id="idSpanClasseDescricao"]').text
        except NoSuchElementException:
            nome_proc = "sem_nome_encontrado"

        try:
            data_autu = driver.find_element_by_xpath('//*[@id="idProcessoDetalhesBloco1"]/div[5]/span[2]').text
        except NoSuchElementException:
            data_autu = "sem_data_encontrada"

        # Fills dictionary and list
        dicionario = {"num_unico": num_unico,
                      "nome_proc": nome_proc,
                      "data_autu": data_autu
                      }
        processos.append(dicionario)

        # Return a page to click on next process
        driver.execute_script("window.history.go(-1)")

    # wait.until(EC.presence_of_element_located((By.CLASS_NAME, "classSpanPaginacaoImagensDireita")))
    next_page = driver.find_elements_by_css_selector(".classSpanPaginacaoProximaPagina")
    if len(next_page) == 0:
        break
    next_page[0].click()

Upvotes: 1

Sureshmani Kalirajan
Sureshmani Kalirajan

Reputation: 1938

You can try run the loop until next button is present on the screen. the logic will look like this,

try:
    next_page = driver.find_element_by_class_name('classSpanPaginacaoProximaPagina')
    if(next_page.is_displayed()):
        next_page.click()

except NoSuchElementException:
     print('next page does not exists')  

Upvotes: 1

Related Questions