Praveen Kumar
Praveen Kumar

Reputation: 299

Cloud Scheduler failing on a Python function created in cloud function(for inserting data into bigquery), works well when i run the function manually

Error that I'm getting when it runs through cloud scheduler.

{
errorGroups: [
0: {1}]
insertId: "667eaae80003fffda18f493f"
labels: {3}
logName: "projects/trans-market-427605-f1/logs/run.googleapis.com%2Fstderr"
receiveTimestamp: "2024-06-28T12:22:00.268374258Z"
resource: {2}
spanId: "6652963829757778279"
textPayload: "Exception on / [POST]
Traceback (most recent call last):
  File "/layers/google.python.pip/pip/lib/python3.12/site-packages/flask/app.py", line 1473, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/layers/google.python.pip/pip/lib/python3.12/site-packages/flask/app.py", line 882, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/layers/google.python.pip/pip/lib/python3.12/site-packages/flask/app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/layers/google.python.pip/pip/lib/python3.12/site-packages/flask/app.py", line 865, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/layers/google.python.pip/pip/lib/python3.12/site-packages/functions_framework/execution_id.py", line 106, in wrapper
    return view_function(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/layers/google.python.pip/pip/lib/python3.12/site-packages/functions_framework/__init__.py", line 142, in view_func
    return function(request._get_current_object())
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/main.py", line 88, in insert_rows
    video_ids = get_video_details(youtube)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/main.py", line 20, in get_video_details
    request = youtube.playlistItems().list(
              ^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'Request' object has no attribute 'playlistItems'"
timestamp: "2024-06-28T12:22:00.262141Z"
}

Here is the cloud function in python


import functions_framework
from googleapiclient.discovery import build
from google.cloud import bigquery
from datetime import datetime

client = bigquery.Client()
#Youtube Creditentals
api_key= 'my api key'

channel_id = 'UCONax_0CMWVL6b7tHulEQhA'   
api_service_name = "youtube"
api_version = "v3"
youtube = build(api_service_name,api_version,developerKey=api_key)


video_ids=[]

def get_video_details(youtube):

    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId='UUONax_0CMWVL6b7tHulEQhA',
        maxResults=50,
    )
    response = request.execute()

    # Appending for the first 50 entries
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])

    # Going for next pages
    next_page_token = response.get('nextPageToken')
    more_pages = True


    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId='UUONax_0CMWVL6b7tHulEQhA',
                maxResults=50,
                pageToken=next_page_token  # Include nextPageToken in the request
            )
            response = request.execute()

            for item in response['items']:
                video_ids.append(item['contentDetails']['videoId'])

            next_page_token = response.get('nextPageToken')

    return video_ids



def get_video_views(youtube,video_ids):
    monthly_views = {}
    for i in range(0, len(video_ids), 50):
        if i < len(video_ids):
            request = youtube.videos().list(
                part="snippet,contentDetails,statistics",
                id=','.join(video_ids[i:i + 50])
            )
            response = request.execute()

            for video in response['items']:
                published_date = video['snippet']['publishedAt']
                date_obj = datetime.strptime(published_date, "%Y-%m-%dT%H:%M:%SZ")
                month_year = date_obj.strftime("%Y-%m")
                view_count = int(video['statistics'].get('viewCount', 0))


                if month_year in monthly_views:
                   monthly_views[month_year] += view_count
                else:
                   monthly_views[month_year] = view_count
    monthly_views_list = []
    for month, views in monthly_views.items():
       monthly_views_list.append({'month': month, 'views': views})

    return monthly_views_list




def insert_rows(youtube):
    video_ids = get_video_details(youtube)
    monthly_views = get_video_views(youtube,video_ids)

    table_id = "trans-market-427605-f1.Dummy_Test.Dummy_Test_Python1"
    errors = client.insert_rows_json(table_id, monthly_views)
    if errors == []:
       print("New rows have been added.")
    else:    print("Encountered errors while inserting rows: {}".format(errors))



insert_rows(youtube)


So basically I'm trying to get video views from the YouTube API and then insert the video views in the big query dataset table, it works well when I run it manually in the cloud function, I'm able to insert data and it appears perfectly in the table. I want the cloud function to be triggered every hour, but it's failing.

Cloud Scheduler configuration Target type: HTTP URL: from function trigger value HTTP Method: post

For authentication, I used no authentication.

Upvotes: 0

Views: 83

Answers (0)

Related Questions