Infinite code
Infinite code

Reputation: 21

How do I fix this aspose-pdf-cloud-python script to successfully download a processed PDF file from aspose storage?

How do I fix this aspose-pdf-cloud-python script to successfully download a processed PDF file from aspose storage?
Environment:

import os
from asposepdfcloud import PdfApi, models
from asposepdfcloud.api_client import ApiClient

# Replace with your Aspose Cloud App key and App SID
app_key = 'app_key personal'
app_sid = 'app_sid personal'

# Initialize the PdfApi client
pdf_api_client = ApiClient(app_key=app_key, app_sid=app_sid)
pdf_api = PdfApi(pdf_api_client)

def process_pdf_files_in_folder(input_folder, output_folder):
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Iterate over all PDF files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".pdf"):
            input_file_path = os.path.join(input_folder, filename)
            remote_name = filename
            copied_file = f'processed_{filename}'
            
            # Upload PDF file to cloud storage
            pdf_api.upload_file(remote_name, input_file_path)

            # Copy the file
            pdf_api.copy_file(remote_name, copied_file)

            # Replace text
            text_replace = models.TextReplace(old_value='Watermark instead', new_value='', regex=True)
            text_replace_list = models.TextReplaceListRequest(text_replaces=[text_replace])
            pdf_api.post_document_text_replace(copied_file, text_replace_list)
            
            # Download the processed file to the local system
            output_file_path = os.path.join(output_folder, copied_file)
            
            # Retrieve the file content from the cloud
            response = pdf_api.download_file(copied_file)
            
            # Open a file stream to write the downloaded content
            with open(output_file_path, 'wb') as file:
                # Write the content to the file
                file.write(response)

            print(f'Processed and saved: {output_file_path}')

# Use specific folder paths
process_pdf_files_in_folder(r'D:\input', r'D:\output')

The error when running the code is as follows:

D:\>python rm.py
host: https://api.aspose.cloud/v3.0
tokenUrl: https://api.aspose.cloud/connect/token
Traceback (most recent call last):
  File "D:\rm.py", line 49, in <module>
    process_pdf_files_in_folder(r'D:\input', r'D:\output')
  File "D:\rm.py", line 44, in process_pdf_files_in_folder
    file.write(response)
TypeError: a bytes-like object is required, not 'str'

Multiple modify to the code still don't work, need a runnable code.

Upvotes: 1

Views: 81

Answers (1)

Infinite code
Infinite code

Reputation: 21

The problem has been resolved, and this is the successful and executable code I obtained after seeking help through other channels:

# Import the operating system interface module
import os
# Import the module for file and directory operations
import shutil
# Import Aspose.PDF cloud API and related models
from asposepdfcloud import PdfApi, models
from asposepdfcloud.api_client import ApiClient

# Replace with your Aspose Cloud App key and App SID
app_key = 'app_key personal'
app_sid = 'app_sid personal'

# Initialize the PdfApi client
pdf_api_client = ApiClient(app_key=app_key, app_sid=app_sid)
pdf_api = PdfApi(pdf_api_client)

def process_pdf_files_in_folder(input_folder, output_folder):
    """
    Process all PDF files in the input folder, replace text, and save the processed files to the output folder.

    Parameters:
    input_folder (str): The path to the input folder containing PDF files to be processed.
    output_folder (str): The path to the output folder where processed PDF files will be saved.
    """
    # Ensure the output folder exists, create if it does not
    os.makedirs(output_folder, exist_ok=True)

    # Iterate over all files in the input folder
    for filename in os.listdir(input_folder):
        # Only process files that end with .pdf
        if filename.endswith(".pdf"):
            # Get the full path of the input file
            input_file_path = os.path.join(input_folder, filename)
            # Set the remote file name for upload
            remote_name = filename
            # Set the name for the processed file
            copied_file = f'processed_{filename}'
            
            # Upload the PDF file to cloud storage
            pdf_api.upload_file(remote_name, input_file_path)

            # Copy the file in cloud storage
            pdf_api.copy_file(remote_name, copied_file)

            # Create a text replacement object
            text_replace = models.TextReplace(old_value='Watermark instead', new_value='', regex=True)
            # Create a text replacement list request
            text_replace_list = models.TextReplaceListRequest(text_replaces=[text_replace])
            # Perform text replacement in the copied file
            pdf_api.post_document_text_replace(copied_file, text_replace_list)
            
            # Set the full path for the output file
            output_file_path = os.path.join(output_folder, copied_file)
            
            # Retrieve the file content from the cloud
            download_filepath = pdf_api.download_file(copied_file)
            # Move the downloaded file to the output folder
            shutil.move(download_filepath, output_file_path)

            # Print the path of the processed and saved file
            print(f'Processed and saved: {output_file_path}')

# Use specific folder paths
process_pdf_files_in_folder(r'D:\input', r'D:\output')

Upvotes: 0

Related Questions