Lambda automatically deletes transcribe job upon completion

Question

I am looking to edit my lambda so it will delete the transcription job when it's job status reads "Complete". I have the following code:

 import json
    import time
    import boto3
    from urllib.request import urlopen

    def lambda_handler(event, context):
        transcribe = boto3.client("transcribe")
        s3 = boto3.client("s3")

        if event:
            file_obj = event["Records"][0]
            bucket_name = str(file_obj["s3"]["bucket"]["name"])
            file_name = str(file_obj["s3"]["object"]["key"])
            s3_uri = create_uri(bucket_name, file_name)
            file_type = file_name.split("2019.")[1]
            job_name = file_name
            transcribe.start_transcription_job(TranscriptionJobName=job_name,
                                                Media ={"MediaFileUri": s3_uri},
                                                MediaFormat = file_type,
                                                LanguageCode = "en-US",
                                                Settings={
                                                    "VocabularyName": "Custom_Vocabulary_by_Brand_Other_Brands",
                                                    "ShowSpeakerLabels": True,
                                                    "MaxSpeakerLabels": 4
                                                })


            while True:
                status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
                if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["FAILED"]:
                    break
                print("It's in progress")
            while True:
                status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
                if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED"]:
                    transcribe.delete_transcription_job(TranscriptionJobName=job_name
                )

                time.sleep(5)

            load_url = urlopen(status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"])
            load_json = json.dumps(json.load(load_url))

            s3.put_object(Bucket = bucket_name, Key = "transcribeFile/{}.json".format(job_name), Body=load_json)


        # TODO implement
        return {
            'statusCode': 200,
            'body': json.dumps('Hello from Lambda!')
        }

    def create_uri(bucket_name, file_name):
        return "s3://"+bucket_name+"/"+file_name

The section that handles the job is:

 while True:
        status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
        if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["FAILED"]:
            break
        print("It's in progress")
    while True:
        status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
        if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED"]:
            transcribe.delete_transcription_job(TranscriptionJobName=job_name
        )

If the job is in progress, it will say "It's in progress", but when it reads "Completed" it will delete.

Any ideas why my current code would not be working? It completes the transcribe job but does not delete it.

Owen Murray · Accepted Answer

Sorry guys, I had another look and made a very very stupid mistake. I had the transcribe.delete_transcription_job(TranscriptionJobName=job_name in the complete incorrect part.

Please find the correct and working code below:

import json
import time
import boto3
from urllib.request import urlopen

def lambda_handler(event, context):
    transcribe = boto3.client("transcribe")
    s3 = boto3.client("s3")

    if event:
        file_obj = event["Records"][0]
        bucket_name = str(file_obj["s3"]["bucket"]["name"])
        file_name = str(file_obj["s3"]["object"]["key"])
       s3_uri = create_uri(bucket_name, file_name)
        file_type = file_name.split("2019.")[1]
        job_name = file_name
        transcribe.start_transcription_job(TranscriptionJobName=job_name,
                                            Media ={"MediaFileUri": s3_uri},
                                            MediaFormat = file_type,
                                            LanguageCode = "en-US",
                                            Settings={
                                                "VocabularyName": "Custom_Vocabulary_by_Brand_Other_Brands",
                                                "ShowSpeakerLabels": True,
                                                "MaxSpeakerLabels": 4
                                            })


        while True:
            status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
            if status["TranscriptionJob"]["TranscriptionJobStatus"] in ["COMPLETED", "FAILED"]:
                transcribe.delete_transcription_job(TranscriptionJobName=job_name)
                break
            print("It's in progress")

            time.sleep(5)

        load_url = urlopen(status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"])
        load_json = json.dumps(json.load(load_url))

        s3.put_object(Bucket = bucket_name, Key = "transcribeFile/{}.json".format(job_name), Body=load_json)


    # TODO implement
    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }

def create_uri(bucket_name, file_name):
    return "s3://"+bucket_name+"/"+file_name

Lambda automatically deletes transcribe job upon completion

Answers (2)

Related Questions