Mhmd Khalil
Mhmd Khalil

Reputation: 216

how to scrape the instagram followers popup with python playwright

I'm using the playwright library to scrape websites and so far it's been great. however, I want to scrape the followers of specific accounts and I can't manage to scroll through the followers' popup.

For example, when I use page.mouse.wheel(0,1000), it attempts to scroll through the whole Instagram page instead of scrolling in the popup.

I found solutions for such issues but they all use selenium which I'm not familiar with as I'm new to web scraping and I found selenium to be a bit overwhelming to start with.

So, my question is, how can I add some sort of bounding box so that playwright only scrolls through the followers' popup?

I've gone this far with the code using playwright codegen.. this is where I'm stuck:

from playwright.sync_api import Playwright, sync_playwright, expect 
import time


def run(playwright: Playwright) -> None:
browser = playwright.chromium.launch(headless=False)
context = browser.new_context()

# Open new page
page = context.new_page()

# Go to https://www.instagram.com/
page.goto("https://www.instagram.com/")

# Click on Username field
page.locator(
    "[aria-label=\"Phone number\\, username\\, or email\"]").click()

# Fill with username
page.locator(
    "[aria-label=\"Phone number\\, username\\, or 
email\"]").fill("USERNAME")

# Click on Password field
page.locator("[aria-label=\"Password\"]").click()

# Fill with password
page.locator("[aria-label=\"Password\"]").fill("PASSWORD")

# Click Log In
page.locator("button:has-text(\"Log In\")").first.click()
page.wait_for_url("https://www.instagram.com/accounts/onetap/? 
next=%2F")

# Click text=Not Now
page.locator("text=Not Now").click()
page.wait_for_url("https://www.instagram.com/")

# Click text=Not Now
page.locator("text=Not Now").click()

page.goto("https://www.instagram.com/instagram/")

# Click text=542M followers
page.locator("text=542M followers").click()
page.wait_for_url("https://www.instagram.com/instagram/followers/")


page.mouse.wheel(0, 2000)
time.sleep(4)
page.mouse.wheel(0, 2000)
time.sleep(4)
page.mouse.wheel(0, 2000)

Upvotes: 3

Views: 1788

Answers (1)

Anatolii
Anatolii

Reputation: 1

You can use this example as a starting point for your script

from playwright.sync_api import Playwright, sync_playwright


def run(playwright: Playwright) -> None:
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context()

    # Open new page
    page = context.new_page()

    # Go to https://www.instagram.com/
    page.goto("https://www.instagram.com/")

    # Fill with username
    page.get_by_label("Phone number, username, or email").click()
    page.get_by_label("Phone number, username, or 
                      email").fill("[email protected]")

    # Fill with password
    page.get_by_label("Password").click()
    page.get_by_label("Password").fill("MyVeryStrongPassword!")

    # Click Log In
    page.get_by_role("button", name="Log in", exact=True).click()
    page.wait_for_url("https://www.instagram.com/accounts/onetap/?next=%2F")

    page.goto("https://www.instagram.com/")

    # Click text=Not Now
    page.get_by_role("button", name="Not Now").click()
    page.wait_for_url("https://www.instagram.com/")

    # put the link of the profile from which you want to get followers
    page.goto("https://www.instagram.com/desired_profile/followers/")

    # Use the while loop where you compare the number of profiles in the DOM
    # with the number of followers indicated in the profile header
    # because this example will only scroll 5 times
    for _ in range(5):
        page.locator('a > div > div > 
                     span[dir="auto"]').last.scroll_into_view_if_needed()
        page.wait_for_timeout(5 * 1000)
    page.pause()


if __name__ == "__main__":
    with sync_playwright() as pw:
        run(pw)

Upvotes: 0

Related Questions