Why is the YouTube API v3 inconsistent with the amount of comments it lets you download before an error 400?

Question

I am downloading YouTube comments with a python script that uses API keys and the YouTube Data API V3, but sooner or later I run into the following error:

{'error': {'code': 400, 'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'errors': [{'message': "The API server failed to successfully process the request. While this can be a transient error, it usually indicates that the request's input is invalid. Check the structure of the commentThread resource in the request body to ensure that it is valid.", 'domain': 'youtube.commentThread', 'reason': 'processingFailure', 'location': 'body', 'locationType': 'other'}]}}

I am using the following code:

import argparse
import requests
import json
import time
start_time = time.time()

class YouTubeApi():

    YOUTUBE_COMMENTS_URL = 'https://www.googleapis.com/youtube/v3/commentThreads'
    comment_counter = 0
    def is_error_response(self, response):
        error = response.get('error')
        if error is None:
            return False
        print("API Error: "
            f"code={error['code']} "
            f"domain={error['errors'][0]['domain']} "
            f"reason={error['errors'][0]['reason']} "
            f"message={error['errors'][0]['message']!r}")
        print(self.comment_counter)
        return True


    def format_comments(self, results, likes_required):
        comments_list = []
        try:
            for item in results["items"]:
                comment = item["snippet"]["topLevelComment"]

                likes = comment["snippet"]["likeCount"]
                if likes < likes_required:
                    continue

                author = comment["snippet"]["authorDisplayName"]
                text = comment["snippet"]["textDisplay"]

                str = "Comment by {}:
 \"{}\"

".format(author, text)
                str = str.encode('ascii', 'replace').decode()

                comments_list.append(str)
                self.comment_counter += 1
                print("Comments downloaded:", self.comment_counter, end="
")
        except(KeyError):
            print(results)
             
        return comments_list
        

    def get_video_comments(self, video_id, likes_required):

        with open("API_keys.txt", "r") as f:
            key_list = f.readlines()
        comments_list = []
        
        key_list = [key.strip('/n') for key in key_list]
        
        
        params = {
            'part': 'snippet,replies',
            'maxResults': 100,
            'videoId': video_id,
            'textFormat': 'plainText',
            'key': key_list[0]
        }
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
        }
        comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
        results = comments_data.json()
        
        if self.is_error_response(results):
            return []
        nextPageToken = results.get("nextPageToken")

        comments_list = []
        comments_list += self.format_comments(results, likes_required)

        while nextPageToken:
            params.update({'pageToken': nextPageToken})
            if self.comment_counter <= 900000:
                params.update({'key': key_list[0]})
                
            elif self.comment_counter <= 1800000:
                params.update({'key': key_list[1]})
                
            elif self.comment_counter <= 2700000:
                params.update({'key': key_list[2]})
               
            elif self.comment_counter <= 3600000:
                params.update({'key': key_list[3]})

            elif self.comment_counter <= 4500000:
                params.update({'key': key_list[4]})
                
            else:
                params.update({'key': key_list[5]})
            if self.comment_counter % 900001 == 0:
                print(params["key"])
            comments_data = requests.get(self.YOUTUBE_COMMENTS_URL, params=params, headers=headers)
            results = comments_data.json()
            if self.is_error_response(results):
                return comments_list
            nextPageToken = results.get("nextPageToken")
            comments_list += self.format_comments(results, likes_required)
        return comments_list


    def get_video_id_list(self, filename):
        try:
            with open(filename, 'r') as file:
                URL_list = file.readlines()
        except FileNotFoundError:
            exit("File \"" + filename + "\" not found")

        list = []
        for url in URL_list:
            if url == "
":     # ignore empty lines
                continue
            if url[-1] == '
':     # delete '
' at the end of line
                url = url[:-1]
            if url.find('='):   # get id
                id = url[url.find('=') + 1:]
                list.append(id)
            else:
                print("Wrong URL")

        return list


def main():
    yt = YouTubeApi()

    parser = argparse.ArgumentParser(add_help=False, description=("Download youtube comments from many videos into txt file"))
    required = parser.add_argument_group("required arguments")
    optional = parser.add_argument_group("optional arguments")
    optional.add_argument("--likes", '-l', help="The amount of likes a comment needs to be saved", type=int)
    optional.add_argument("--input", '-i', help="URL list file name")
    optional.add_argument("--output", '-o', help="Output file name")
    optional.add_argument("--help", '-h', help="Help", action='help')
    args = parser.parse_args()

    # --------------------------------------------------------------------- #



    likes = 0
    if args.likes:
        likes = args.likes

    input_file = "URL_list.txt"
    if args.input:
        input_file = args.input

    output_file = "Comments.txt"
    if args.output:
        output_file = args.output

    list = yt.get_video_id_list(input_file)
    if not list:
        exit("No URLs in input file")

    try:
        
        vid_counter = 0
        with open(output_file, "a") as f:
            for video_id in list:
                vid_counter += 1
                print("Downloading comments for video ", vid_counter, ", id: ", video_id, sep='')
                comments = yt.get_video_comments(video_id, likes)
                if comments:
                    for comment in comments:
                        f.write(comment)

        print('
Done!')

    except KeyboardInterrupt:
        exit("User Aborted the Operation")

    # --------------------------------------------------------------------- #


if __name__ == '__main__':
    main()

In another thread, it was discovered that google does not currently permit downloading all the comments on a popular video, however you would expect it to cut off at the same point. Instead, I have found that it can range anywhere betweek 1.5 million to 200k comments downloaded before it returns a code 400. Is this to do with a bug in my code, or is the YouTube API rejecting my request as it is clear that is a script? Would adding a time.sleep clause help with this?

Why is the YouTube API v3 inconsistent with the amount of comments it lets you download before an error 400?

Answers (1)

Related Questions