Reputation: 366
I'm trying to get the text that comes after the label in a web site.
Here is the example:
<div class="result-group ng-scope">
<div class="result-group ng-scope">
<label>Orgão Julgador</label>
<div class="ng-binding">Vara Unica da Comarca de Serrita</div>
</div><label>Classe CNJ</label>
<div class="ng-binding">Procedimento Comum Cível</div>
</div>
I need to get this parts: -->'Vara Unica da Comarca de Serrita'; --> 'Procedimento Comum Cível'
I tryied select them by the position. However it changes when I select other process.
The only thing that they have in common is the label before them, 'Orgao Julgador' and 'Classe CNJ'
Here is my code:
from selenium import webdriver
import pandas as pd
import time
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from python_anticaptcha import AnticaptchaClient, ImageToTextTask
#TJPE
url = 'https://srv01.tjpe.jus.br/consultaprocessualunificada'
option = webdriver.FirefoxOptions()
#option.add_argument('--headless')
driver = webdriver.Firefox(options=option)
driver.get(url)
time.sleep(3)
# Inserir o CNPJ
driver.find_element_by_css_selector('[heading="Parte"]').click()
elem = driver.find_element_by_css_selector('[id="cpfCnpj"]')
elem.send_keys("11.361.219/0001-32")
#Solução do Captcha
captcha = driver.find_element_by_css_selector('[class="top-alignment captcha-wrapper"]').screenshot('captcha_0.png')
api_key = ''
captcha_fp = open('captcha_0.png', 'rb')
client = AnticaptchaClient(api_key)
task = ImageToTextTask(captcha_fp)
job = client.createTask(task)
job.join()
a = job.get_captcha_text()
elem = driver.find_element_by_css_selector('[id="captcha"]')
elem.send_keys(a)
elem.send_keys(Keys.RETURN)
# Pegando o número de páginas com processo
driver.find_element_by_css_selector('[ng-click="selectPage(totalPages, $event)"]').click()
paginas = driver.find_element_by_css_selector('[total-items="vm.processos.length"]').text
driver.find_element_by_css_selector('[ng-click="selectPage(1, $event)"]').click()
paginas = paginas.replace('\n', ',')
paginas = paginas.split(',')
lista = []
for pagina in paginas:
try:
lista.append(int(pagina))
except:
pass
# Pegando o número dos processos
lista_processos = []
for n in range(lista[-1]):
numero_processo = driver.find_elements_by_css_selector('[ng-if="processo.npu"]')
for processo in numero_processo:
lista_processos.append(processo.text)
driver.find_element_by_css_selector('[ng-click="selectPage(page + 1, $event)"]').click()
'''
for processo in lista_processos:
processo = processo.replace('-', '')
processo = processo.replace('.','')
url = f'https://srv01.tjpe.jus.br/consultaprocessualunificada/processo/{processo}'
driver.get(url)
time.sleep(10)
'''
processo = lista_processos[0]
processo = processo.replace('-', '')
processo = processo.replace('.','')
url = f'https://srv01.tjpe.jus.br/consultaprocessualunificada/processo/{processo}'
driver.get(url)
teste = driver.find_element_by_xpath("//label['Orgão Julgador']").text
driver.find_element_by_css_selector('[ng-click="exibirTodasPartes = !exibirTodasPartes"]').click()
dicionario = {'Processo': processo,
'Orgão Julgador': driver.find_element_by_css_selector('div.result-group:nth-child(3) > div:nth-child(2)').text,
'Classe CNJ': driver.find_element_by_css_selector('div.result-group:nth-child(4) > label:nth-child(1)').text,
'Assunto CNJ': driver.find_element_by_css_selector('div.result-group:nth-child(5) > div:nth-child(2) > span:nth-child(1)').text,
}
Upvotes: 1
Views: 285
Reputation: 33384
Use following xpath to get the value from div
tag.
driver.find_element_by_xpath("//label[text()='Orgão Julgador']/following-sibling::div[1]").text
And
driver.find_element_by_xpath("//label[text()='Classe CNJ']/following-sibling::div[1]").text
For best practice induce WebDriverWait
() and wait for visibility_of_element_located
()
WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH,"//label[text()='Orgão Julgador']/following-sibling::div[1]"))).text
And
WebDriverWait(driver,10).until(EC.visibility_of_element_located((By.XPATH,"//label[text()='Classe CNJ']/following-sibling::div[1]"))).text
You need to import following libraries.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
Upvotes: 1