Reputation: 130
Whenever I try to scrape Eastleigh website it does everything it needs to do. it goes to the URL, auto clicks "Advanced Settings" Writes in the decision date clicks search and gets all the href links but when it TRIES to go to them it fails... Why? All it needs to do is click the href links but it won't ;-;
Can someone help fix this please?
Code:
import sys
import time
import config
import datetime
from selenium import webdriver
print("1. Custom Date")
print("2. Last Week")
choice = input("#: ")
if choice == "1":
print("Start Example: 1/8/2018")
startDate = input("Start Date: ")
print("Stop Example: 30/8/2018")
stopDate = input("Stop Date: ")
elif choice == "2":
sd = str(datetime.datetime.today().day) # Gets day of the month
sm = str(datetime.datetime.today().month) # Gets month of the year
sy = str(datetime.datetime.today().year) # Gets year
nsd = int(sd) # Turns string variable "sd" into an integer
startDate = "%s/%s/%s" % (nsd-7, sm, sy) # Makes a new date string. Minus 7 off of the original date to go back 1 week
stopDate = "%s/%s/%s" % (nsd-1, sm, sy) # Makes a new date string. Minus 1 off of the original date, (Minusing 1 was Steve's idea, not mine.)
else:
print("This is not a choice.")
print("Press Enter to exit...")
input("")
sys.exit()
url = "https://planning.eastleigh.gov.uk/s/public-register"
driver = webdriver.Chrome(executable_path=r"C:\Users\Goten\Desktop\chromedriver.exe")
driver.get(url)
time.sleep(2)
driver.find_element_by_xpath("(//li[@class='slds-tabs_default__item'])[1]").click()
driver.find_element_by_id("728:0").click() # This changes for some reason... I cannot quite find a way to make it stay the same...
driver.find_element_by_id("728:0").send_keys(startDate)
driver.find_element_by_id("744:0").click() # This also changes
driver.find_element_by_id("744:0").send_keys(stopDate)
driver.find_element_by_xpath("(//button[@name='submit'])[2]").click()
time.sleep(2)
driver.find_element_by_xpath("//*[text()='View More']").click()
result = []
elements = driver.find_elements_by_css_selector(".slds-truncate a")
links = [link.get_attribute("href") for link in elements]
result.extend(links)
print(result)
for link in result:
result.remove(link)
driver.get(link)
for i in range(1):
div = driver.find_element_by_id("slds-form-element__group").text
log = open("log.txt", "a")
log.write(div + "\n")
log.write("\n")
#driver.close()
Output:
[8520:11412:0926/001704.487:ERROR:install_util.cc(603)] Failed to read HKLM\SOFTWARE\Policies\Google\Chrome\MachineLevelUserCloudPolicyEnrollmentToken: The system cannot find the file specified. (0x2)
DevTools listening on ws://127.0.0.1:59340/devtools/browser/bc325d32-310c-43e6-b8cf-dcfceaebf5a5
['javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);', 'javascript:void(0);']
Traceback (most recent call last):
File "code/main.py", line 17, in <module>
import urls.eastleigh
File "C:\Users\Goten\Anaconda3\code\urls\eastleigh.py", line 61, in <module>
driver.get(link)
File "C:\Users\Goten\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 332, in get
self.execute(Command.GET, {'url': url})
File "C:\Users\Goten\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 320, in execute
self.error_handler.check_response(response)
File "C:\Users\Goten\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: unsupported protocol
(Session info: chrome=68.0.3440.106)
(Driver info: chromedriver=2.41.578737 (49da6702b16031c40d63e5618de03a32ff6c197e),platform=Windows NT 10.0.17134 x86_64)
Upvotes: 0
Views: 807
Reputation: 461
In fact you cannot count on those ids, as they are generated dynamically, as noted by yourself. A hackie solution would be:
...
inputs = driver.find_elements_by_class_name(" input")
received_from_index = 1
received_to_index = 2
decision_from_index = 3
decision_to_index = 4
received_from = inputs[received_from_index]
received_to = inputs[received_to_index]
received_from.clear()
received_from.send_keys(startDate)
received_to.clear()
received_to.send_keys(stopDate)
This will fill out the fields (not sure if you need to fill all of them). After that, your script would submit and get the results page correctly.
You will need to revise this part of your code:
for link in result:
result.remove(link)
driver.get(link)
...
For the second part of what you need:
...
driver.find_element_by_xpath("//*[text()='View More']").click()
links = driver.find_elements_by_xpath("""//*[@id="arcusbuilt__PApplication__c"]/div/div/h4/a""")
print "Total of planning applications : ", len(links)
for link_index in range(1, len(links) +1):
result_link = driver.find_element_by_xpath("""//*[@id="arcusbuilt__PApplication__c"]/div[%d]/div/h4/a"""%link_index)
result_link.click()
print "visiting link %d"%link_index
WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.NAME, "BackButton")))
time.sleep(3)
#DO WHAT YOU NEED HERE...
back_btn = driver.find_element_by_name("BackButton")
back_btn.click()
Good luck!
Upvotes: 1