Reputation: 379
Please find the aatched screenshot.
and Below code is printing only first 4-5 rows which is visible in the screenshot. It is not scrolling down and inspecting element it is prining blank spaces.
Same code is running succesfully without i write code written in main function outside the function.
def close_up(driver, actions):
time.sleep(1)
actions.move_to_element(wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@data-dismiss='modal']"))))
button = driver.find_element_by_xpath("//button[@data-dismiss='modal']")
driver.execute_script("arguments[0].click();", button)
time.sleep(1)
def check_model_winodows(driver, actions):
try:
if len(driver.find_elements(By.XPATH, "(//button[@data-dismiss='modal'])[1]")) > 0:
# print("Pop up is visible")
close_up(driver, actions)
else:
print("")
except:
# print("Something went wrong")
pass
return driver, actions
def main(hashtag):
options = webdriver.ChromeOptions()
options.add_argument("--disable-infobars")
options.add_argument("--disable-notifications")
options.add_argument("--start-maximized")
options.add_argument("--disable-extensions")
options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2})
options.add_argument('--window-size=1920,1080')
options.add_experimental_option("prefs", {"profile.default_content_settings.cookies": 2})
driver = webdriver.Chrome(executable_path='/home/tukaram/chromedriver', options=options)
# driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(50)
driver.get("https://www.trackmyhashtag.com/")
wait = WebDriverWait(driver, 10)
actions = ActionChains(driver)
wait.until(EC.visibility_of_element_located((By.ID, "search_keyword"))).send_keys(hashtag, Keys.RETURN)
check_model_winodows(driver, actions)
wait = WebDriverWait(driver, 10)
time.sleep(3)
button = driver.find_element_by_css_selector("a[onclick*='preview-tweets']")
driver.execute_script("arguments[0].click();", button)
# wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[onclick*='preview-tweets']"))).click()
check_model_winodows(driver, actions)
total_number_of_tweet_row = len(driver.find_elements(By.XPATH, "//tbody/tr"))
# print(total_number_of_tweet_row)
rank = 1
page_number = 2
total_number_of_pages = 5
myhashtag = {}
for a, idx in enumerate(range(total_number_of_pages)):
print("idx>>>>", idx)
j = 0
for i in range(total_number_of_tweet_row):
check_model_winodows(driver, actions)
elems = driver.find_elements(By.XPATH, "//tbody/tr")
time.sleep(1)
# final_ele = elems[j].find_element_by_xpath(".//td[2]")
# print("code worked till here")
name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text
print("name>", name)
myhashtag['user_name'] = name
userid = elems[j].find_element_by_tag_name("td").text
userid = userid.partition('@')[2]
userid = '@' + userid
print("userid>", userid)
myhashtag['user_screen_name'] = userid
content = elems[j].find_element_by_xpath(".//td[2]").text
print("content", content)
myhashtag['content'] = content
date = elems[j].find_element_by_xpath(".//td[3]").text
print("1>>>>", date)
date = str(date).replace("\n", " ")
print("2>>>", date)
date = datetime.strptime(date, '%d %b %Y %H:%M:%S %p')
print("3>>>", date)
date = date.strftime('%Y-%m-%dT%H:%M:%SZ')
print("date", date)
myhashtag['articleDate'] = date
engm = elems[j].find_element_by_xpath(".//td[4]").text
print("engagement", engm)
myhashtag['engagement'] = engm
impressions = elems[j].find_element_by_xpath(".//td[6]").text
print("impressions", impressions)
myhashtag['impressions'] = impressions
myhashtag['rank'] = rank
rank = rank + 1
j = j + 1
print(myhashtag)
check_model_winodows(driver, actions)
driver.execute_script(
"var scrollingElement = (document.scrollingElement || document.body);scrollingElement.scrollTop = "
"scrollingElement.scrollHeight;")
wait.until(EC.element_to_be_clickable((By.XPATH, f"//a[text()='{page_number}']"))).click()
page_number = page_number + 1
print("Page numberrrr", page_number)
if page_number == 7:
break
driver.quit()
return driver, actions
if __name__ == '__main__':
for x in add_data.words:
main(x)
add_data.py ->
words = ['India','@pakistan'] #words to crawl
Upvotes: 0
Views: 249
Reputation: 3433
May be you need to scroll to each row to extract details. I added driver.execute_script("arguments[0].scrollIntoView(true);",elems[j])
in the code, and it extracted all the details. Try this once.
for a, idx in enumerate(range(total_number_of_pages)):
print("idx>>>>", idx)
j = 0
for i in range(total_number_of_tweet_row):
check_model_winodows(driver, actions)
elems = driver.find_elements(By.XPATH, "//tbody/tr")
time.sleep(1)
# final_ele = elems[j].find_element_by_xpath(".//td[2]")
# print("code worked till here")
driver.execute_script("arguments[0].scrollIntoView(true);",elems[j]) # Line to be addded.
name = elems[j].find_element_by_xpath(".//div[@class='tweet-name']").text
Upvotes: 1