Reputation: 31
I'm working on a project right now where I try to get more insights on the things people comment under public Instagram posts. I've tried the module 'instaloader', but I won't get this to work. Does anybody have a clue how I could solve this problem? I don't want to download the entire profiles of users, I just want to be able to paste the link of a post in the code and get the comments of that specific post.
Any help would really be appreciated! :)
import instaloader
# Replace 'your_username' and 'your_password' with your Instagram credentials
username = 'username'
password = 'password'
# Replace 'post_shortcode' with the shortcode of the Instagram post you want to scrape comments from
post_shortcode = 'ABCDEF12345'
L = instaloader.Instaloader()
# Log in to Instagram
try:
L.load_session_from_file(username)
except FileNotFoundError:
# Session file not found, login with credentials
L.context.log_in(username, password)
L.save_session_to_file()
# Get comments for the post
post = instaloader.Post.from_shortcode(L.context, post_shortcode)
comments = post.get_comments()
# Extract usernames and comments
usernames = [comment.owner.username for comment in comments]
comment_texts = [comment.text for comment in comments]
# Print or process the extracted data as needed
for username, comment_text in zip(usernames, comment_texts):
print(f"{username}: {comment_text}")
# Logout from Instagram (optional)
# L.context.logout()
Upvotes: 3
Views: 3276
Reputation: 133
Using the below code, I was able to get only a few comments (about 100) of a Instagram post. But I really want to get all the comments of the posts:
def scrape_post(url_or_shortcode: str) -> Dict:
"""Scrape single Instagram post data"""
if "http" in url_or_shortcode:
shortcode = url_or_shortcode.split("/p/")[-1].split("/")[0]
else:
shortcode = url_or_shortcode
print(f"scraping instagram post: {shortcode}")
variables = {
"shortcode": shortcode,
"child_comment_count": 10000,
"fetch_comment_count": 10000,
"parent_comment_count": 10000,
"has_threaded_comments": True,
}
query_hash = "b3055c01b4b222b8a47dc12b090e4e64"
encoded_variables = quote(json.dumps(variables))
url = f"https://www.instagram.com/graphql/query/?query_hash={query_hash}&variables={encoded_variables}"
return url
def configure_driver():
chrome_options = Options()
chrome_options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(options=chrome_options)
return driver
def save_in_file(data, file_name):
# Escreve os resultados no arquivo de texto
with open(file_name, "w", encoding="utf-8") as file:
json.dump(data, file, indent=2, ensure_ascii=False)
def analyze_content(content):
soup = BeautifulSoup(content, "html.parser")
pre_tag = soup.find("pre")
json_data = pre_tag.text
parsed_data = json.loads(json_data)
return parsed_data
INSTAGRAM_APP_ID = "936619743392459" # this is the public app id for instagram.com
url = 'https://www.instagram.com/p/C2SGS95NQH_/'
driver = configure_driver()
driver.get(url)
wait_for_login= input('continue? y/n') # I add this pause to login in the Instagram
driver.get(scrape_post(url))
content = driver.page_source
parsed_data = analyze_content(content)
save_in_file(parsed_data, 'conteudo.txt')
Upvotes: 2