LearningToCode
LearningToCode

Reputation: 1

I'm trying AWS Transcribe streaming service and it does not work. What's wrong with my code?

I'm recording an audio file, I convert it to a format accepted by AWS Transcribe and I pass it to the trascribe service. Here's the error:

TypeError: e.$unknown is undefined visit models_0.js:21 fn Aws_restJson1.js:343 asyncIterator SmithyMessageEncoderStream.js:10 domande_generiche.html:107:29

Here's the javascript code:

    import { TranscribeStreamingClient, StartStreamTranscriptionCommand } from "https://cdn.jsdelivr.net/npm/@aws-sdk/client-transcribe-streaming@latest/+esm";
    let mediaRecorder;
    let audioChunks = [];

    const transcribeClient = new TranscribeStreamingClient({
        region: "us-east-1",
        credentials: {
            accessKeyId: "xxxxxx",
            secretAccessKey: "xxxxxx"
        }
    });

    document.addEventListener("click", async function(event) {

        if (event.target.tagName === "BUTTON" && event.target.hasAttribute("data-domanda")) {
            let bottone = event.target;
            let numeroDomanda = bottone.getAttribute("data-domanda");
            console.log("Hai premuto il pulsante della domanda:", numeroDomanda);

            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
                audioChunks = [];

                mediaRecorder.ondataavailable = event => {
                    if (event.data.size > 0) {
                        audioChunks.push(event.data);
                    }
                };

                mediaRecorder.onstop = async () => {
                    const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
                    const audioFile = new File([audioBlob], "recording.webm", { type: "audio/webm" });
                    console.log("File audio registrato", audioFile);
                    
                    
                    try { //errore in questo try
                    
                        const audioBuffer = await audioFile.arrayBuffer();
                        const audioUint8 = new Uint8Array(audioBuffer);
                        
                        // Invia l'audio per la trascrizione streaming
                        const audioStream = new ReadableStream({
                            start(controller) {
                                controller.enqueue(audioUint8);
                                controller.close();
                            }
                        });

                        
                        const command = new StartStreamTranscriptionCommand({
                            LanguageCode: "it-IT",
                            MediaSampleRateHertz: 16000,  // Cambiato per adattarsi a AWS
                            MediaEncoding: "pcm",
                            AudioStream: audioStream
                        });
                        
                        
                        const response = await transcribeClient.send(command);
                        let transcription = "";

                        if (response?.TranscriptResultStream) {
                        
                            for await (const event of response.TranscriptResultStream) {
                                if (event.TranscriptEvent && event.TranscriptEvent.Transcript) {
                                    const results = event.TranscriptEvent.Transcript.Results;
                                    if (results?.length > 0 && results[0].Alternatives?.length > 0) {
                                        transcription += results[0].Alternatives[0].Transcript + " ";
                                    }
                                }
                            }
                            
                        } else {
                            console.error("TranscriptResultStream non disponibile", response);
                        }

                        console.log("Trascrizione:", transcription.trim());
                        alert("Trascrizione: " + transcription.trim());
                    } catch (err) {
                    
                        console.error("Errore durante la trascrizione:", err);
                    }

                    // Reset bottone
                    bottone.textContent = "Rispondi";
                    bottone.classList.remove("registrando");
                };

                mediaRecorder.start();
                bottone.textContent = "Stop";
                bottone.classList.add("registrando");

                bottone.addEventListener("click", () => {
                    if (mediaRecorder.state === "recording") {
                        mediaRecorder.stop();
                        stream.getTracks().forEach(track => track.stop());
                    }
                }, { once: true });
            } catch (error) {
                console.error("Errore durante il processo:", error);
                alert("Errore durante il processo.");
            }
        }
    });

Upvotes: 0

Views: 47

Answers (1)

swawge
swawge

Reputation: 73

Based on my understanding it is mostly due to mismatch between the audio format you're providing and what AWS Transcribe expect.

Try below code

    // ... existing code ...

mediaRecorder.onstop = async () => {
    // Convert WebM to PCM
    const audioContext = new AudioContext();
    const audioBlob = new Blob(audioChunks, { type: "audio/webm" });
    const arrayBuffer = await audioBlob.arrayBuffer();
    const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
    
    // Convert to 16-bit PCM
    const pcmData = convertToPCM(audioBuffer);
    
    // Create audio stream
    const audioStream = new ReadableStream({
        start(controller) {
            controller.enqueue(pcmData);
            controller.close();
        }
    });

    const command = new StartStreamTranscriptionCommand({
        LanguageCode: "it-IT",
        MediaSampleRateHertz: 16000,
        MediaEncoding: "pcm",
        AudioStream: audioStream
    });
    
    // ... rest of the code ...
};

// Helper function to convert AudioBuffer to PCM
function convertToPCM(audioBuffer) {
    const numChannels = 1; // Mono
    const samples = audioBuffer.getChannelData(0);
    const pcmData = new Int16Array(samples.length);
    
    // Convert Float32 to Int16
    for (let i = 0; i < samples.length; i++) {
        const s = Math.max(-1, Math.min(1, samples[i]));
        pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
    }
    
    return new Uint8Array(pcmData.buffer);
}

Upvotes: 0

Related Questions