arm
arm

Reputation: 1

How to make Google Cloud V2 ASR to transcribe 2 audios in parallel?

I have a web app where users can use Google Cloud v2 ASR for transcription. However, when two logged-in users try to transcribe simultaneously, the platform processes the requests in a queue (the first one gets transcribed first). I want to implement parallel transcription using Google Cloud v2 ASR in Python. How can I achieve this?

This is the function which I am calling from endpoint for each user:

def upload_to_gcs(local_file: str, bucket_name: str, destination_blob_name: str):
    destination_blob_name=destination_blob_name
    storage_client = storage.Client(credentials=credentials)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(local_file)
    return f"gs://{bucket_name}/{destination_blob_name}"

def transcribe_chirp_2_asynch(audio_file: str, is_punct_capit: bool = False) -> str:
    client = SpeechClient(credentials=credentials, client_options=ClientOptions(api_endpoint="us-central1-speech.googleapis.com",))

    #Reads a file as bytes
    with open(audio_file, "rb") as f:
        audio_content = f.read()
    destination_blob_name = os.path.basename(audio_file)
    uri_file = upload_to_gcs(audio_file, "xxx", destination_blob_name)
    long_audio_files = [{"uri": uri_file}]
    features=cloud_speech.RecognitionFeatures(
                profanity_filter=False,
                enable_word_time_offsets=False,
                enable_word_confidence = False,
                enable_automatic_punctuation = is_punct_capit,
                #enable_spoken_punctuation = False,
                enable_spoken_emojis = False,
                multi_channel_mode = False,
                #diarization_config = diarizarion_config,
                max_alternatives = 0,
            )
    config = cloud_speech.RecognitionConfig(
        language_codes=["en-US"],
        model="chirp_2",
        features = features,
        auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
    )

    file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=uri_file)
    request = cloud_speech.BatchRecognizeRequest(
        recognizer=f"projects/{PROJECT_ID}/locations/us-central1/recognizers/_",
        config=config,
        files=[file_metadata],
        recognition_output_config=cloud_speech.RecognitionOutputConfig(
            inline_response_config=cloud_speech.InlineOutputConfig(),
        ),
    )
    # Transcribes the audio into text
    operation = client.batch_recognize(request=request)

    print("Waiting for operation to complete...")
    response = operation.result(timeout=120)
    for result in response.results[uri_file].transcript.results:
        print(f"Transcript: {result.alternatives[0].transcript}")
        return result.alternatives[0].transcript

Should I use other functions provided by Google? I can't find any solution for this.

Upvotes: 0

Views: 36

Answers (0)

Related Questions