Reputation: 216
I'm using the playwright library to scrape websites and so far it's been great. however, I want to scrape the followers of specific accounts and I can't manage to scroll through the followers' popup.
For example, when I use page.mouse.wheel(0,1000)
, it attempts to scroll through the whole Instagram page instead of scrolling in the popup.
I found solutions for such issues but they all use selenium which I'm not familiar with as I'm new to web scraping and I found selenium to be a bit overwhelming to start with.
So, my question is, how can I add some sort of bounding box so that playwright only scrolls through the followers' popup?
I've gone this far with the code using playwright codegen.. this is where I'm stuck:
from playwright.sync_api import Playwright, sync_playwright, expect
import time
def run(playwright: Playwright) -> None:
browser = playwright.chromium.launch(headless=False)
context = browser.new_context()
# Open new page
page = context.new_page()
# Go to https://www.instagram.com/
page.goto("https://www.instagram.com/")
# Click on Username field
page.locator(
"[aria-label=\"Phone number\\, username\\, or email\"]").click()
# Fill with username
page.locator(
"[aria-label=\"Phone number\\, username\\, or
email\"]").fill("USERNAME")
# Click on Password field
page.locator("[aria-label=\"Password\"]").click()
# Fill with password
page.locator("[aria-label=\"Password\"]").fill("PASSWORD")
# Click Log In
page.locator("button:has-text(\"Log In\")").first.click()
page.wait_for_url("https://www.instagram.com/accounts/onetap/?
next=%2F")
# Click text=Not Now
page.locator("text=Not Now").click()
page.wait_for_url("https://www.instagram.com/")
# Click text=Not Now
page.locator("text=Not Now").click()
page.goto("https://www.instagram.com/instagram/")
# Click text=542M followers
page.locator("text=542M followers").click()
page.wait_for_url("https://www.instagram.com/instagram/followers/")
page.mouse.wheel(0, 2000)
time.sleep(4)
page.mouse.wheel(0, 2000)
time.sleep(4)
page.mouse.wheel(0, 2000)
Upvotes: 3
Views: 1788
Reputation: 1
You can use this example as a starting point for your script
from playwright.sync_api import Playwright, sync_playwright
def run(playwright: Playwright) -> None:
browser = playwright.chromium.launch(headless=False)
context = browser.new_context()
# Open new page
page = context.new_page()
# Go to https://www.instagram.com/
page.goto("https://www.instagram.com/")
# Fill with username
page.get_by_label("Phone number, username, or email").click()
page.get_by_label("Phone number, username, or
email").fill("[email protected]")
# Fill with password
page.get_by_label("Password").click()
page.get_by_label("Password").fill("MyVeryStrongPassword!")
# Click Log In
page.get_by_role("button", name="Log in", exact=True).click()
page.wait_for_url("https://www.instagram.com/accounts/onetap/?next=%2F")
page.goto("https://www.instagram.com/")
# Click text=Not Now
page.get_by_role("button", name="Not Now").click()
page.wait_for_url("https://www.instagram.com/")
# put the link of the profile from which you want to get followers
page.goto("https://www.instagram.com/desired_profile/followers/")
# Use the while loop where you compare the number of profiles in the DOM
# with the number of followers indicated in the profile header
# because this example will only scroll 5 times
for _ in range(5):
page.locator('a > div > div >
span[dir="auto"]').last.scroll_into_view_if_needed()
page.wait_for_timeout(5 * 1000)
page.pause()
if __name__ == "__main__":
with sync_playwright() as pw:
run(pw)
Upvotes: 0