Reputation: 1
I'm working on a project using NextJS where I need to implement continuous Speech-to-Text with language detection. While I have successfully set up Speech-to-Text for a single language, I'm struggling to get automatic language detection to work. The documentation seems limited, and I can't seem to figure out what I'm doing wrong.
Following the Documentation it should be implemented is this way (source)
var autoDetectSourceLanguageConfig = SpeechSDK.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "de-DE"]); var speechRecognizer = SpeechSDK.SpeechRecognizer.FromConfig(speechConfig, autoDetectSourceLanguageConfig, audioConfig);
This is my part of my component:
useEffect(() => {
const fetchTokenAndSetupRecognizer = async () => {
const tokenObj = await getTokenOrRefresh();
if (tokenObj.authToken && tokenObj.region) {
audioConfig.current = AudioConfig.fromDefaultMicrophoneInput();
const autoDetectLanguages = [
"en-US",
"de-DE"
];
speechConfig.current = SpeechConfig.fromAuthorizationToken(
tokenObj.authToken,
tokenObj.region
);
const autoDetectConfig =
AutoDetectSourceLanguageConfig.fromLanguages(autoDetectLanguages);
audioConfig.current = AudioConfig.fromDefaultMicrophoneInput();
recognizer.current = SpeechRecognizer.FromConfig(
speechConfig.current,
autoDetectConfig,
audioConfig.current
);
recognizer.current.recognized = (s, e) =>
processRecognizedTranscript(e);
recognizer.current.canceled = (s, e) => handleCanceled(e);
}
setIsDisabled(!recognizer.current);
};
fetchTokenAndSetupRecognizer();
return () => {
recognizer.current?.close();
};
}, []);
I searched through here, the documentation, and the repository, but there are limited examples and information for React/JavaScript
Upvotes: 0
Views: 319
Reputation: 189
@jojak did you resolved the issue? i am also tried to capture the audio from microphone. it wont works well
Upvotes: 0
Reputation: 1268
I tried your code and encountered issues with implementing automatic language detection in Azure Speech-to-Text using the Azure Speech SDK.
To enable language identification, you should use code like this.
Const autoDetectConfig = sdk. AutoDetectSourceLanguageConfig.fromLanguages(["en-US","de-DE","zh-CN"]);
Const recognizer = new sdk. SpeechRecognizer(config, audioConfig, autoDetectConfig);
Below code is recognizing speech from an audio file using the Azure Speech SDK and the code is taken from MSDOC and git.
const sdk = require('microsoft-cognitiveservices-speech-sdk');
const fs = require('fs');
require('dotenv').config();
const subscriptionKey =process.env.AZURE_SPEECH_KEY;
const serviceRegion = process.env.AZURE_SpeechRegion;
class AutoDetectSourceLanguageResult {
constructor(language, confidence) {
this.privLanguage = language;
this.privLanguageDetectionConfidence = confidence;
}
static fromResult(result) {
return new AutoDetectSourceLanguageResult(result.language, result.languageDetectionConfidence);
}
get language() {
return this.privLanguage;
}
get languageDetectionConfidence() {
return this.privLanguageDetectionConfidence;
}
}
async function recognitionWithMicrophone() {
const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
const config = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "zh-CN"]);
const recognizer = new sdk.SpeechRecognizer(config, audioConfig,autoDetectConfig);
recognizer.recognizeOnceAsync(result => {
if (result.reason === sdk.ResultReason.RecognizedSpeech) {
const languageResult = AutoDetectSourceLanguageResult.fromResult(result);
var detectedLanguage = languageDetectionResult.language;
console.log(`RECOGNIZED: Text=${result.text}`);
console.log(`DETECTED: Language=${detectedLanguage}; (Confidence: ${languageResult.languageDetectionConfidence})`);
} else if (result.reason === sdk.ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
} else if (result.reason === sdk.ResultReason.Canceled) {
const cancellation = sdk.CancellationDetails.fromResult(result);
console.log(`CANCELED: Reason=${cancellation.reason}`);
if (cancellation.reason === sdk.CancellationReason.Error) {
console.log(`CANCELED: ErrorCode=${cancellation.errorCode}`);
console.log(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
}
}
});
}
async function multiLingualRecognitionWithAudioFile() {
const audioFilePath = "console_en-us_zh-cn.wav";
console.log(`Attempting to access audio file at: ${audioFilePath}`);
if (!fs.existsSync(audioFilePath)) {
console.error(`Error: Audio file '${audioFilePath}' not found.`);
return;
}
try {
const audioData = fs.readFileSync(audioFilePath);
const audioConfig = sdk.AudioConfig.fromWavFileInput(audioData);
const config = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
const autoDetectConfig = sdk.AutoDetectSourceLanguageConfig.fromLanguages(["en-US", "zh-CN"]);
const recognizer = new sdk.SpeechRecognizer(config, audioConfig);
recognizer.recognizing = (s, e) => {
if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
const languageResult = AutoDetectSourceLanguageResult.fromResult(e.result);
console.log(`RECOGNIZING: Text=${e.result.text}`);
console.log(`DETECTED: Language=${languageResult.language} (Confidence: ${languageResult.languageDetectionConfidence})`);
}
};
recognizer.recognized = (s, e) => {
if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
const languageResult = AutoDetectSourceLanguageResult.fromResult(e.result);
console.log(`RECOGNIZED: Text=${e.result.text}`);
console.log(`DETECTED: Language=${languageResult.language} (Confidence: ${languageResult.languageDetectionConfidence})`);
} else if (e.result.reason === sdk.ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
}
};
recognizer.canceled = (s, e) => {
console.log(`CANCELED: Reason=${e.reason}`);
if (e.reason === sdk.CancellationReason.Error) {
console.log(`CANCELED: ErrorCode=${e.errorCode}`);
console.log(`CANCELED: ErrorDetails=${e.errorDetails}`);
}
recognizer.stopContinuousRecognitionAsync();
};
recognizer.sessionStarted = (s, e) => {
console.log("\n Session started event.");
};
recognizer.sessionStopped = (s, e) => {
console.log("\n Session stopped event.");
recognizer.stopContinuousRecognitionAsync();
};
await recognizer.startContinuousRecognitionAsync();
} catch (error) {
console.error("Error while initializing speech recognizer:", error);
}
}
async function main() {
console.log("Starting Speech Recognition Samples...");
try {
await Promise.all([
recognitionWithMicrophone(),
multiLingualRecognitionWithAudioFile()
]);
} catch (err) {
console.error("Error occurred:", err);
}
}
main().catch(err => {
console.error("Error occurred:", err);
});
Output:
Upvotes: 0