dhrice
dhrice

Reputation: 89

Why is this try/except loop exiting on errors?

I have a script that collects Reddit comments. It pulls from a csv file with a list of links in it. Some of the links are dead and I get 404/403/etc errors. The code below will correctly identify them and skip, but it then exits the loop and completes the process of making the csv file without continuing onto the next link.

import praw
import pprint
import csv
import os
import pandas as pd
from collections import namedtuple
from datetime import datetime
from pathlib import Path

def scrape_comments(reddit_api, csv_file, dest):
    df = pd.read_csv(csv_file)
    data = []
    try:
        for pid in df.id:
            # post_comment = []
            submission = reddit_api.submission(id=pid)
            submission.comments.replace_more(limit=None)
            for comment in submission.comments.list():
                # post_comment.append(comment.body)
                data.append((pid, comment.id, comment.parent_id, comment.body, comment.link_id,comment.author, comment.score, comment.created_utc, comment.subreddit))
            # data.append((pid, ";".join(post_comment)))
    except:
        print ("Error! Skip the Current subreddit")
    df = pd.DataFrame(data, columns=["post_id", "comment_id", "comment_parent_id", "comment_body", "comment_link_id","comment_author", "comment_score","comment_created","comment_subreddit"]) # append tuple
    df.to_csv(dest, index=False, encoding='utf-8')

if __name__ == "__main__":
    reddit_api = praw.Reddit(
        client_id="####",
        client_secret="####",
        user_agent="####",
        username="####",
        password="####"
    )
    # reddit_api = init_praw(client_id, client_secret, user_agent, username, password)
    csv_file = "####"
    dest_dir = "####"
    dest_name = "reddits_comments.csv"
    Path(dest_dir).mkdir(parents=True, exist_ok=True)
    dest = os.path.join(dest_dir, dest_name)
    scrape_comments(reddit_api, csv_file, dest)

Upvotes: 0

Views: 55

Answers (1)

CryptoFool
CryptoFool

Reputation: 23079

You should put the try/except around a smaller portion of your code, as said in the comments. Here's an illustration of that:

def scrape_comments(reddit_api, csv_file, dest):
    df = pd.read_csv(csv_file)
    data = []
    for pid in df.id:
        try:
            # post_comment = []
            submission = reddit_api.submission(id=pid)
            submission.comments.replace_more(limit=None)
            for comment in submission.comments.list():
                # post_comment.append(comment.body)
                data.append((pid, comment.id, comment.parent_id, comment.body, comment.link_id,comment.author, comment.score, comment.created_utc, comment.subreddit))
            # data.append((pid, ";".join(post_comment)))
        except Exception:
            print ("Error! Skip the Current subreddit")
    df = pd.DataFrame(data, columns=["post_id", "comment_id", "comment_parent_id", "comment_body", "comment_link_id","comment_author", "comment_score","comment_created","comment_subreddit"]) # append tuple
    df.to_csv(dest, index=False, encoding='utf-8')

Upvotes: 2

Related Questions