Reputation: 233
I am web-scraping from a lot of websites, I am using selenium with time.sleep(), but this is a risky way, because sometimes my computer get lagg, and in this way I lose the dataes.
How can I change my code to Wait_For_Element method to avoid losing informations?
Here my code:
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import urllib2
import unicodecsv as csv
import os
import sys
import io
import time
import datetime
import pandas as pd
from bs4 import BeautifulSoup
import re
import contextlib
import selenium.webdriver.support.ui as ui
import numpy as np
from datetime import datetime, timedelta
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def scrape(urls):
browser = webdriver.Firefox()
datatable=[]
for url in urls:
browser.get(url)
html = browser.page_source
soup=BeautifulSoup(html,"html.parser")
table = soup.find('table', { "class" : "table table-condensed table-hover data-table m-n-t-15" })
soup2=BeautifulSoup(html,"html.parser")
name = soup2.h2.string
soup3=BeautifulSoup(html,"html.parser")
name2 = soup3.h1.string
soup4=BeautifulSoup(html,"html.parser")
name3 = soup4.h3.string
soup5=BeautifulSoup(html,"html.parser")
name4 = soup5.find('span' , attrs={'class' : 'clock-time ng-binding'}).text.strip()
for record in table.find_all('tr', class_="hidden-xs hidden-sm ng-scope"):
temp_data = []
temp_data.append(name4)
temp_data.append(name)
temp_data.append(name2)
temp_data.append(name3)
for data in record.find_all("td"):
temp_data.append(data.text.encode('latin-1'))
newlist = filter(None, temp_data)
datatable.append(newlist)
time.sleep(10)
browser.close()
return datatable
Upvotes: 0
Views: 1471
Reputation: 121
You can create a small library with reusable methods and use the ExpectedConditions of Selenium.
public void clickWebElementVisible(String element, By by) throws ObjectMissing {
try {
Utilities.waitExplicit(1);
WebDriverWait wait = new WebDriverWait(this.driver, 30);
WebElement x = wait.until(ExpectedConditions.visibilityOfElementLocated(by));
if (x.isDisplayed()) {
x.click();
} else {
throw new ObjectMissing(" Error in " + getClass() + "." + element + ". Object Missing");
}
} catch (WebDriverException x) {
throw new ObjectMissing(" Error in " + x.getMessage());
}
}
Here element will be helpful for debugging purpose to know which element you were looking for and by is the locator value.
The usage could be as follow
clickWebElementVisible("lnkLoginUsername", "Locator");
And you can either use Exception or your custom exception "ObjectMissing"
Upvotes: 0
Reputation: 52665
As was mentioned in comments, you can use ExplicitWait
to get dynamic element as below:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
table = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "table.table.table-condensed.table-hover.data-table.m-n-t-15")))
Upvotes: 1