Reputation: 133
I have created a lambda that will scan an uploaded file and search for specific phrases which have been listed in another s3 bucket. If a phrase is matched in the original uploaded file, it will print the line of the transcript as well as the response.
This lambda works if we upload each transcript individually, however if we upload more than 1, it stores the original output and adds it to the beginning.
I feel that this issue may be caused by the /tmp/ file not being cleared when the lambda function ends.
Is there a way to clear the /tmp/ file each time a job is done?
The output looks as follows:
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
However, it should look like this:
ch_0 : Okay. And then, um, how do you guys typically allocate funding for a project like this?
-------------------------------------------------------------
ch_1 : Yeah, we do have capital projects and we've allocated money 3 place, which is and stuff, Um, every year.
My lambda code is as follows:
import boto3
def lambda_handler(event, context):
s3 = boto3.client("s3")
if event:
file_obj = event["Records"][0]
bucketname = str(file_obj['s3']['bucket']['name'])
filename = str(file_obj['s3']['object']['key'])
job_name = filename
print("Filename: ", filename)
fileObj = s3.get_object(Bucket=bucketname, Key=filename)
file_content = fileObj["Body"].read().decode('utf-8')
budget_file = s3.get_object(Bucket= "bantp-phrases", Key="B.txt")
budget_content = budget_file["Body"].read().decode('utf-8')
authority_file = s3.get_object(Bucket= "bantp-phrases", Key="A.txt")
authority_content = authority_file["Body"].read().decode('utf-8')
need_file = s3.get_object(Bucket= "bantp-phrases", Key="N.txt")
need_content = need_file["Body"].read().decode('utf-8')
timeline_file = s3.get_object(Bucket= "bantp-phrases", Key="T.txt")
timeline_content = timeline_file["Body"].read().decode('utf-8')
partner_file = s3.get_object(Bucket= "bantp-phrases", Key="P.txt")
partner_content = partner_file["Body"].read().decode('utf-8')
# Converts all to a list
budgets = budget_content.split("\n")
authorities = authority_content.split("\n")
needs = need_content.split("\n")
timelines = timeline_content.split("\n")
partners = partner_content.split("\n")
lines = file_content.split("\n")
directory_name = filename
mylist = lines
#Budget Phrase Analysis
for b in budgets:
with open("/tmp/budget.txt", "a") as x:
try:
output = None
for index, line in enumerate(lines):
if b.strip() in line:
output = index
break
if output:
x.write("\n" + lines[output] + "\n")
x.write("-------------------------------------------------------------")
x.write("\n" + lines[output +1] + "\n")
print ("It worked!")
break
except (ValueError):
x.write("Nothing found")
print ("It didn't work :(")
break
s3.upload_file(Filename = "/tmp/budget.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Budget_" + (filename)))
#Authority Phrase Analysis
for a in authorities:
with open("/tmp/authority.txt", "a") as c:
try:
output = None
for index, line in enumerate(lines):
if a.strip() in line:
output = index
if output:
c.write("\n" + lines[output] + "\n")
c.write("-------------------------------------------------------------")
c.write("\n" + lines[output +1] + "\n")
print ("It worked!")
except (ValueError):
c.write("Nothing found")
print ("It didn't work :(")
s3.upload_file(Filename = "/tmp/authority.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Authority_") + (filename))
#Need Phrase Analysis
for n in needs:
with open("/tmp/need.txt", "a") as v:
try:
output = None
for index, line in enumerate(lines):
if n.strip() in line:
output = index
break
if output:
v.write("\n" + lines[output] + "\n")
v.write("-------------------------------------------------------------")
v.write("\n" + lines[output +1] + "\n")
print ("It worked!")
break
except (ValueError):
v.write("Nothing found")
print ("It didn't work :(")
break
s3.upload_file(Filename = "/tmp/need.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Need_") + (filename))
#Timeline Phrase Analysis
for t in timelines:
with open("/tmp/timeline.txt", "a") as z:
try:
output = None
for index, line in enumerate(lines):
if t.strip() in line:
output = index
break
if output:
z.write("\n" + lines[output] + "\n")
z.write("-------------------------------------------------------------")
z.write("\n" + lines[output +1] + "\n")
print ("It worked!")
break
except (ValueError):
z.write("Nothing found")
print ("It didn't work :(")
break
s3.upload_file(Filename = "/tmp/timeline.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Timeline_") + (filename))
#Partner Phrase Analysis
for p in partners:
with open("/tmp/partner.txt", "a") as q:
try:
output = None
for index, line in enumerate(lines):
if p.strip() in line:
output = index
break
if output:
q.write("\n" + lines[output] + "\n")
q.write("-------------------------------------------------------------")
q.write("\n" + lines[output +1] + "\n")
print ("It worked!")
except (ValueError):
q.write("Nothing found")
print ("It didn't work :(")
s3.upload_file(Filename = "/tmp/partner.txt" , Bucket="bantp-analysis", Key = ((directory_name)+'/'+"Partner_") + (filename))
Upvotes: 0
Views: 607
Reputation: 1896
Welcome to stackoverflow!
Can you try the following solutions and comment the results please
In all your open
operation change the mode of opening the file from a
to w
.
Example
with open("/tmp/timeline.txt", "a") as z:
to
with open("/tmp/timeline.txt", "w") as z:
This change for all open operations, to override the existing metafile. Also do take care of indentation.
Upvotes: 2