How to merge output results from lambda in s3

Question

I have some files in my s3 bucket and i use boto3 with lambda to look inside the files and count the frequency of a specific word in all files. I also added the date and a text. Until here, everything is fine. Now i got the output in 3 different lines. When i tried to put this output in s3, only the last line is uploaded in a file. But when i print the output in my lambda console all lines are there. I do not know what is wrong.

from datetime import datetime
from datetime import timedelta
import boto3
import json
from json import dumps
s3 = boto3.resource('s3')
bucket = s3.Bucket('metrics')
def lambda_handler(event, context):
    for obj in bucket.objects.all():
        if 'pinpoint' in obj.key:
            body = obj.get()['Body'].read().decode('utf-8')
            response = ('Number of sessions:' + str(body.count('session')) + ' , ' + 'date:' + str(obj.last_modified.date()) + ' , ' + 'Unit:Count') 
            splitcontent = response.splitlines()
            d = [] 
            for lines in splitcontent:
                pipesplit = lines.split(" , ")
                d.append(dict(s.split(":",1) for s in pipesplit))                
    object = s3.Object('metrics', 'TESTS/tests')
    object.put(Body=json.dumps(d, default=str).encode())

The output in my lambda console when i print(d) is:

[{'Number of sessions': '3', 'date': '2020-05-22', 'Unit': 'Count'}]
[{'Number of sessions': '1', 'date': '2020-05-22', 'Unit': 'Count'}]
[{'Number of sessions': '1', 'date': '2020-06-25', 'Unit': 'Count'}]

But when i check the file in s3, only the last line is there.

KiDoo Song · Accepted Answer

Local variable is inside a for statement.

from datetime import datetime
from datetime import timedelta
import boto3
import json
from json import dumps
s3 = boto3.resource('s3')
bucket = s3.Bucket('metrics')
def lambda_handler(event, context):
    d = []
    for obj in bucket.objects.all():
        if 'pinpoint' in obj.key:
            body = obj.get()['Body'].read().decode('utf-8')
            response = ('Number of sessions:' + str(body.count('session')) + ' , ' + 'date:' + str(obj.last_modified.date()) + ' , ' + 'Unit:Count') 
            splitcontent = response.splitlines()
            # d = [] 
            for lines in splitcontent:
                pipesplit = lines.split(" , ")
                d.append(dict(s.split(":",1) for s in pipesplit))                
    object = s3.Object('metrics', 'TESTS/tests')
    object.put(Body=json.dumps(d, default=str).encode())

How to merge output results from lambda in s3

Answers (1)

Related Questions