Reputation: 125
I am working with a cloud function in Google Cloud that will hopefully output a csv that is then loaded into a storage bucket and bigquery simultaneously.
As of now, my code looks something like the below snapshot. I have the cloud function entry point as pagespeedurls.
import requests
import os.path
import datetime
from google.cloud import storage
from google.cloud import bigquery
# Documentation: https://developers.google.com/speed/docs/insights/v5/get-started
# JSON paths: https://developers.google.com/speed/docs/insights/v4/reference/pagespeedapi/runpagespeed
# Populate 'pagespeed.txt' file with URLs to query against API.
with open('pagespeed.txt') as pagespeedurls:
date = datetime.datetime.now()
download_dir = 'pagespeed-results' + str(date.today()) + '.csv'
file = open(download_dir, 'w')
content = pagespeedurls.readlines()
content = [line.rstrip('\n') for line in content]
columnTitleRow = "URL, Cumulative Layout Shift Result, Largest Contentful Paint Result, First Input Delay Result, First Contentful Paint, First Interactive, Largest Contentful Paint, Cumulative Layout Shift, Max Potential FID\n"
file.write(columnTitleRow)
# This is the google pagespeed api url structure, using for loop to insert each url in .txt file
for line in content:
# If no "strategy" parameter is included, the query by default returns desktop data.
key = "API_KEY"
pagespeed = f'https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={line}&key={key}&strategy=mobile'
print(f'Requesting {pagespeed}...')
request = requests.get(pagespeed)
final = request.json()
try:
urlid = final['id']
split = urlid.split('?') # This splits the absolute url from the api key parameter
urlid = split[0] # This reassigns urlid to the absolute url
ID = f'URL ~ {urlid}'
ID2 = str(urlid)
cruxcls = final['loadingExperience']['metrics']['CUMULATIVE_LAYOUT_SHIFT_SCORE']['category']
CRUXCLS = f'Cumulative Layout Shift Result ~ {str(cruxcls)}'
CRUXCLS2 = str(cruxcls)
cruxlcp = final['loadingExperience']['metrics']['LARGEST_CONTENTFUL_PAINT_MS']['category']
CRUXLCP = f'Largest Contentful Paint Result ~ {str(cruxlcp)}'
CRUXLCP2 = str(cruxlcp)
cruxfid = final['loadingExperience']['metrics']['FIRST_INPUT_DELAY_MS']['category']
CRUXFID = f'First Input Delay Result ~ {str(cruxfid)}'
CRUXFID2 = str(cruxfid)
urlfcp = final['lighthouseResult']['audits']['first-contentful-paint']['displayValue']
FCP = f'First Contentful Paint ~ {str(urlfcp)}'
FCP2 = str(urlfcp)
urlfi = final['lighthouseResult']['audits']['interactive']['displayValue']
FI = f'First Interactive ~ {str(urlfi)}'
FI2 = str(urlfi)
urllcp = final['lighthouseResult']['audits']['largest-contentful-paint']['displayValue']
LCP = f'Largest Contentful Paint ~ {str(urllcp)}'
LCP2 = str(urllcp)
urlcls = final['lighthouseResult']['audits']['cumulative-layout-shift']['displayValue']
CLS = f'Cumulative Layout Shift ~ {str(urlcls)}'
CLS2 = str(urlcls)
urlmaxfid = final['lighthouseResult']['audits']['max-potential-fid']['numericValue']
MAXFID = f'Cumulative Layout Shift ~ {str(urlmaxfid)}'
MAXFID2 = str(urlmaxfid)
except KeyError:
print(f'<KeyError> One or more keys not found {line}.')
try:
row = f'{ID2},{CRUXCLS2},{CRUXLCP2},{CRUXFID2},{FCP2},{FI2},{LCP2},{CLS2},{MAXFID2}\n'
file.write(row)
except NameError:
print(f'<NameError> Failing because of KeyError {line}.')
file.write(f'<KeyError> & <NameError> Failing because of nonexistant Key ~ {line}.' + '\n')
try:
print(ID)
print(FCP)
print(FI)
except NameError:
print(f'<NameError> Failing because of KeyError {line}.')
file.close()
def upload_file(bucket_name):
"""Uploads a file to the google storage bucket."""
bucket_name = "BUCKET_NAME"
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
bucket.upload_from_filename(download_dir)
# Load data into BigQuery
def load_data_from_gcs(dataset, table1, source ):
bigquery_client = bigquery.Client(dataset)
dataset = bigquery_client.dataset('pageSpeed')
table = dataset.table(table1)
job_name = str(uuid.uuid4())
job = bigquery_client.load_table_from_storage(
job_name, table, "gs://psi-reports")
job.source_format = 'NEWLINE_DELIMITED_JSON'
job.begin()
wait_for_job(job)
print("state of job is: " + job.state)
print("errors: " + job.errors)
When deploying this I receive the following error:
"Function failed on loading user code. This is likely due to a bug in the user code. Error message: Code in file main.py can't be loaded.
Detailed stack trace:
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py", line 359, in check_or_load_user_function
_function_handler.load_user_function()
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py", line 236, in load_user_function
spec.loader.exec_module(main_module)
File "<frozen importlib._bootstrap_external>", line 724, in exec_module
File "<frozen importlib._bootstrap_external>", line 860, in get_code
File "<frozen importlib._bootstrap_external>", line 791, in source_to_code
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/user_code/main.py", line 82
bucket_name = "BUCKET_NAME"
^
IndentationError: unexpected indent
. Please visit https://cloud.google.com/functions/docs/troubleshooting for in-depth troubleshooting documentation. "
When I run the script locally with a simple path added for the output it works perfectly. I am still a novice with work in the cloud so any and all help is much appreciated!
Upvotes: -1
Views: 754
Reputation: 3077
I think the reason is that your comment indent could be wrong.
def upload_file(bucket_name):
"""Uploads a file to the google storage bucket."""
Instead, try below
def upload_file(bucket_name):
"""Uploads a file to the google storage bucket."""
or
def upload_file(bucket_name):
"""Uploads a file to the google storage bucket."""
Upvotes: 0