Ram
Ram

Reputation: 287

How to rename multiple output files in S3 through Lambda

How to rename multiple output files in S3 bucket. As an example I am using

os.path.basename(keyprefix),

'w' to write the file which is of pattern

abc_00000.csv.gz
abc_00001.csv.gz 

I want to rename the above files with naming convention:

 abc_{today date in YYYYMMDD format}_00.csv.gz 
 abc_{today date in YYYYMMDD format}_01.csv.gz

Below is the code for reference:

    import boto3
    import os
    from smart_open import open
    import gzip
    import csv
    import io
    def lambda_handler(event, context):
    dirpath = 'output/'
    bucket = 'export'
    key = 'export/_SUCCESS'

    if '_SUCCESS' in key:
    client = boto3.client('s3')
    response = client.list_objects_v2(Bucket=bucket,Prefix=os.path.dirname(key))
    for i in response['Contents']:
        keyprefix = i['Key']
        if 'part-' in keyprefix:
            with gzip.GzipFile(fileobj=client.get_object(Bucket=bucket,Key=keyprefix)['Body']) as gzipfile, open('s3://'+bucket + '/' + dirpath + os.path.basename(keyprefix),'w') as fout :
    writer = csv.writer(fout , delimiter=',')
                writer.writerow(['test1','test1','test3','test4','test5','test6','test7'])        
                for row in csv.reader(gzipfile.read().decode('utf-8').splitlines(), delimiter=',', quotechar='"'):
                    if row[5] == 'CDE':         
                        writer.writerow(row)

Upvotes: 0

Views: 1471

Answers (1)

samtoddler
samtoddler

Reputation: 9665

S3 Service Resource gives more flexibility.

After copying the file I am deleting the older file, if you want to keep you can remove the delete call.

#!/usr/bin/env python3

import boto3
import os
from datetime import datetime
prefix = 'abc_'
new_prefix = f"{prefix}{datetime.today().strftime('%Y-%m-%d')}"
suffix = 'csv.gz'
bucket_name = 'mybucketname'

def lambda_handler(event, context):
    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)
    for obj in bucket.objects.all():
        key = obj.key
        path_part = os.path.dirname(key)
        filename = os.path.basename(key)
        copy_source = {
        'Bucket': bucket_name,
        'Key': key
        }
        if filename.startswith(prefix) and filename.endswith(suffix):
            new_key = f"{new_prefix}_{key.split('_')[1]}"
            full_key_with_path = os.path.join(path_part, new_key)
            destination_bucket = s3.Bucket(bucket_name)
            print(f'copying the object with new key : {full_key_with_path}')
            destination_bucket.copy(copy_source, full_key_with_path)
            print(f'deleting old key : {key}')
            s3.Object(bucket_name, key).delete()

❯❯ python3 s3rename.py 
copying the object with new key : myinventorylist/2021-02-07T00-00Z/abc_2021-03-07_00000.csv.gz
deleting old key : myinventorylist/2021-02-07T00-00Z/abc_00000.csv.gz
copying the object with new key : myinventorylist/2021-02-07T00-00Z/abc_2021-03-07_00001.csv.gz
deleting old key : myinventorylist/2021-02-07T00-00Z/abc_00001.csv.gz

Upvotes: 1

Related Questions