Reputation: 35
In the below code a pdf document is splitted and kept in my local drive and once the splitting process completes the upload process takes place. In the upload process all the splitted files will be recursively uploaded to gs bucket. How can I convert my below code to upload the splitted file directly to the gs bucket instead of storing in local and then uploading? I tried but could not succeed
#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage
inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
with open(r"/home/playground/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
upload_local_directory_to_gcs('/home/playground/doc_pages', 'doc_pages', '')
Upvotes: 2
Views: 489
Reputation: 1882
Using the temporal file it would look something like this:
#!/usr/bin/python3
import PyPDF2
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
import glob
import sys
from google.cloud import storage
inputpdf = PdfFileReader(open(r"ace.pdf", "rb"))
# create temporal folder
os.makedirs('/tmp/doc_pages')
for i in range(inputpdf.numPages):
output = PdfFileWriter()
output.addPage(inputpdf.getPage(i))
# Write to temporal files
with open(r"/tmp/doc_pages/document-page%s.pdf" % i, "wb") as outputStream:
output.write(outputStream)
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
upload_local_directory_to_gcs('/tmp/doc_pages', 'doc_pages', '') # Change source
Upvotes: 2