Sagar SN
Sagar SN

Reputation: 35

Upload files to gs bucket in for loop

In the below code a pdf document is splitted and kept in my local drive and once the splitting process completes the upload process takes place. In the upload process all the splitted files will be recursively uploaded to gs bucket. How can I convert my below code to upload the splitted file directly to the gs bucket instead of storing in local and then uploading? I tried but could not succeed

#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage

inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))

for i in range(inputpdf.numPages):
    output = PdfFileWriter()
    output.addPage(inputpdf.getPage(i))
    with open(r"/home/playground/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
        output.write(outputStream)

def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
        assert os.path.isdir(local_path)
        for local_file in glob.glob(local_path + '/**'):
            if not os.path.isfile(local_file):
                continue
            remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
            storage_client = storage.Client()
            buck = storage_client.bucket(bucket)
            blob = buck.blob(remote_path)
            blob.upload_from_filename(local_file)
            print("Uploaded " + local_file + " to gs bucket " + bucket)

upload_local_directory_to_gcs('/home/playground/doc_pages', 'doc_pages', '')

Upvotes: 2

Views: 489

Answers (1)

Juancki
Juancki

Reputation: 1882

Using the temporal file it would look something like this:

#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage

inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
# create temporal folder
os.makedirs('/tmp/doc_pages')   
for i in range(inputpdf.numPages):
    output = PdfFileWriter()
    output.addPage(inputpdf.getPage(i))
    # Write to temporal files
    with open(r"/tmp/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
        output.write(outputStream)

def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
        assert os.path.isdir(local_path)
        for local_file in glob.glob(local_path + '/**'):
            if not os.path.isfile(local_file):
                continue
            remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
            storage_client = storage.Client()
            buck = storage_client.bucket(bucket)
            blob = buck.blob(remote_path)
            blob.upload_from_filename(local_file)
            print("Uploaded " + local_file + " to gs bucket " + bucket)

upload_local_directory_to_gcs('/tmp/doc_pages', 'doc_pages', '') # Change source

Upvotes: 2

Related Questions