Reputation:
I'm trying to execute a Lambda function to decompress a TAR file inside an S3 bucket containing the file itself.
The point is that the TAR File dimension (12GB) exceeds the maximum RAM that AWS Lambda allows (10GB).
Indeed, I run out of memory when executing the lambda function:
Memory Size: 10240 MB Max Memory Used: 10240 MB
Is there a way to use the tarfile library and keep the memory level low?
Below my Lambda function code:
import logging
import tarfile
from io import BytesIO
from boto3 import resource
from urllib import parse
import io
import boto3
import botocore
import tarfile
import math
from io import BytesIO
s3_client = boto3.client('s3')
def lambda_handler(event, context):
invocation_id = event['invocationId']
invocation_schema_version = event['invocationSchemaVersion']
results = []
task = event['tasks'][0]
task_id = task['taskId']
key = parse.unquote(task['s3Key'], encoding='utf-8')
obj_version_id = task['s3VersionId']
bucket = task['s3BucketArn'].split(':')[-1]
input_tar_file = s3_client.get_object(Bucket = bucket, Key = key)
with tarfile.open(fileobj=input_tar_file['Body'], mode='r|') as tar:
for tar_resource in tar:
if (tar_resource.isfile()):
inner_file_bytes = tar.extractfile(tar_resource).read()
s3_client.upload_fileobj(BytesIO(inner_file_bytes), Bucket = bucket, Key = tar_resource.name)
return {
"invocationSchemaVersion": invocation_schema_version,
"treatMissingKeysAs" : "PermanentFailure",
"invocationId" : invocation_id,
"results": [
{
"taskId": task_id,
"resultCode": "Succeeded",
"resultString": "success"
}
]
}
Appreciate the help.
Upvotes: 1
Views: 641