Reputation: 452
I'm training to web crawling. To do so, I've challenged myself to get the list of all people having liked a post on instagram. My problem is that I'm stuck to the point where I only get the first 11 usernames of likers. I cannot find the right way to automate the scrolling process while getting the likes.
Here is my process in Jupyter Notebook (it doesn't work as a script yet):
from selenium import webdriver
import pandas as pd
driver = webdriver.Chrome()
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
userid_element = driver.find_elements_by_xpath('//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/a')[0].click()
elems = driver.find_elements_by_xpath("//*[@id]/div/a")
users = []
for elem in elems:
users.append(elem.get_attribute('title'))
print(users)
Do you guys have any idea?
Many thanks
Upvotes: 1
Views: 12802
Reputation: 1773
I tried all the solutions above, but none of them are working. I think they are outdated.
Instead, I wrote my own. It works perfectly in 2020.
This code goes to the "username" address and take the latest post in the profile and get the liked users.
def getPosts():
hrefs_in_view = driver.find_elements_by_tag_name('a')
# finding relevant hrefs
hrefs_in_view = [elem.get_attribute('href') for elem in hrefs_in_view
if '.com/p/' in elem.get_attribute('href')]
return hrefs_in_view;
def getLikers(username,limit,post=1):
driver.get('https://www.instagram.com/' + username)
time.sleep(1)
users=[]
#Get Latest Post
driver.get(getPosts()[post])
time.sleep(2)
#Open Dialog
followersLinkX = driver.find_element_by_xpath('//button[@class="sqdOP yWX7d _8A5w5 "]')
followersLinkX.click()
time.sleep(1)
#Get Dialog
xxx = driver.find_element_by_xpath('//div[@role="dialog"]/div[1]/div[2]/div[1]/div[1]')
#Focus on and Scroll
xxx.click()
# step 3
actionChain = webdriver.ActionChains(driver)
count = 0
while(count < limit):
for i in range(1,1000):
try:
users.append("https://www.instagram.com/" + driver.find_element_by_xpath('//div[@role="dialog"]/div[1]/div[2]/div[1]/div[1]/div['+ str(i) +']/div[2]/div[1]/div[1]').text)
count+=1
except:
break
actionChain.key_down(Keys.SPACE).key_up(Keys.SPACE).perform()
time.sleep(0.5)
return users
For runing likers = getLikers("deirvlon",100,1)
Upvotes: 0
Reputation: 107
This worked for me:
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
time.sleep(2)
userid_element = driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div[1]/article/div[3]/section[2]/div/div[2]/button').click()
time.sleep(2)
elems = driver.find_elements_by_xpath("//a[@class='FPmhX notranslate TlrDj']")
users = []
for i in range(10):
i += 1
if(i%10) == 9 :
driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div').click()
actionChain.key_down(Keys.SPACE).key_up(Keys.SPACE).perform()
print('/html/body/div[4]/div/div/div[2]/div/div/div['+str(i)+']/div[2]/div[1]/div/a')
Title = driver.find_element_by_xpath('/html/body/div[4]/div/div/div[2]/div/div/div['+str(i)+']/div[2]/div[1]/div/a').get_attribute('title')
users.append(Title)
print('Title : ' + Title)
print(users)
Upvotes: 0
Reputation: 1
I wasn't able to get the code to work as posted in predicty's answer. Therefore I made the adaptation below and it gets me now ~500 likers per post.
def get_post_likers(shortcode):
chrome = ch.initialize()
chrome.get('https://www.instagram.com/p/' + shortcode + '/')
chrome.execute_script("window.scrollTo(0, 1080)")
url = "/p/" + shortcode + "/liked_by/"
time.sleep(2)
like_link = chrome.find_element_by_xpath('//a[@href="'+url+'"]')
like_link.click()
time.sleep(2)
users = []
pb = chrome.find_element_by_xpath("//div[@role = 'dialog']/div[2]/div[1]/div[1]").value_of_css_property("padding-bottom")
match = False
while match==False:
lastHeight = pb
# step 1
elements = chrome.find_elements_by_xpath("//*[@id]/div/a")
# step 2
for element in elements:
if element.get_attribute('title') not in users:
users.append(element.get_attribute('title'))
# step 3
chrome.execute_script("return arguments[0].scrollIntoView();", elements[-1])
time.sleep(1)
# step 4
pb = chrome.find_element_by_xpath("//div[@role = 'dialog']/div[2]/div[1]/div[1]").value_of_css_property("padding-bottom")
if lastHeight==pb or len(users) >= 1500:
match = True
return users
Upvotes: 0
Reputation: 185
I guess instagram site use liked user elements maximum 17.
so, this is one loop
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
userid_element = driver.find_elements_by_xpath('//*[@id="react-root"]/section/main/div/div/article/div[2]/section[2]/div/div/a')[0].click()
time.sleep(2)
# here, you can see user list you want.
# you have to scroll down to download more data from instagram server.
# loop until last element with users table view height value.
users = []
height = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div").value_of_css_property("padding-top")
match = False
while match==False:
lastHeight = height
# step 1
elements = driver.find_elements_by_xpath("//*[@id]/div/a")
# step 2
for element in elements:
if element.get_attribute('title') not in users:
users.append(element.get_attribute('title'))
# step 3
driver.execute_script("return arguments[0].scrollIntoView();", elements[-1])
time.sleep(1)
# step 4
height = driver.find_element_by_xpath("/html/body/div[3]/div/div[2]/div/div").value_of_css_property("padding-top")
if lastHeight==height:
match = True
print(users)
print(len(users))
driver.quit()
I test in near 100 liked post, and it worked.
Upvotes: 1
Reputation: 33384
Please try the following code and let me know if this work.
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://www.instagram.com/p/BuE82VfHRa6/')
elems = driver.find_elements_by_xpath("//a[@class='FPmhX notranslate TlrDj']")
users = []
for elem in elems:
users.append(elem.get_attribute('title'))
print('Title : ' +elem.get_attribute('title'))
print(users)
output:-
Title : kyliejenner
Title : saturdayshade28
Title : worldmeetzboy
Title : mrokon
Title : addieisaac
Title : addieisaac
Title : amber_doerksen
Title : amber_doerksen
Title : addieisaac
Title : zayn6117
Title : amber_doerksen
Title : amber_doerksen
Title : worldmeetzboy
Title : worldmeetzboy
Title : razvanpopic1301
Title : johanna.trmn
Title : johanna.trmn
Title : johanna.trmn
Title : americ.av
Title : gabriellcostta1.0
Title : gabriellcostta1.0
Title : gabriellcostta1.0
Title : worldmeetzboy
Title : enactusepi
Title : enactusepi
[u'kyliejenner', u'saturdayshade28', u'worldmeetzboy', u'mrokon', u'addieisaac', u'addieisaac', u'amber_doerksen', u'amber_doerksen', u'addieisaac', u'zayn6117', u'amber_doerksen', u'amber_doerksen', u'worldmeetzboy', u'worldmeetzboy', u'razvanpopic1301', u'johanna.trmn', u'johanna.trmn', u'johanna.trmn', u'americ.av', u'gabriellcostta1.0', u'gabriellcostta1.0', u'gabriellcostta1.0', u'worldmeetzboy', u'enactusepi', u'enactusepi']
Upvotes: 1