Reputation: 23
I need to create a simple javascript function to capture the input, then return the text with a confidence percentage using AzureSDK.
My biggest problem is that I am new to coding and this is the most difficult issue I faced, so please be kind to this humble student.
I am building a language learning webapp using voice input. I have been able to get the google services to work the way I wanted but, unfortunately, those services don't work in China where my market is. I am also using Phaser 3 api to build this app.
I have been able to get the sample code provided on git for the AzureSDK speech to text javascript to work but when I try to create my own function with the code I get: Uncaught TypeError: Cannot read property 'SpeechConfig' of undefined
I also do not know how to add a confidence level to the speech result.
recordButton.on('pointerdown', function() {
var SDK = window.SpeechSDK;
try {
AudioContext = window.AudioContext // our preferred impl
|| window.webkitAudioContext // fallback, mostly for Safari
|| false; // could not find.
if (AudioContext) {
soundContext = new AudioContext();
console.log("AudioContext", AudioContext);
} else {
alert("Audio context not supported");
}
}
catch (e) {
console.log("no sound context found, no audio output. " + e);
}
console.log("SpeechSDK initialized", SDK);
speechConfig =
SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey,
serviceRegion);
speechConfig.speechRecognitionLanguage = "en-US";
console.log("speechConfig", SpeechConfig);
audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig,
audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log("result", result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
});
}, this);
I need to capture the speech input, then show the words/phrases/sentences the students have said and score them based on the confidence level.
Upvotes: 0
Views: 1853
Reputation: 12153
If you want to get confident score of the text value that you get from speech to text SDK ,try the code below :
<html>
<head>
<title>Speech SDK JavaScript Quickstart</title>
</head>
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<body>
<div id="warning">
<h1 style="font-weight:500;">Speech Recognition Speech SDK not found (microsoft.cognitiveservices.speech.sdk.bundle.js missing).</h1>
</div>
<div id="content" style="display:none">
<table width="100%">
<tr>
<td></td>
<td><h1 style="font-weight:500;">Microsoft Cognitive Services Speech SDK JavaScript Quickstart</h1></td>
</tr>
<tr>
<td align="right"><a href="https://learn.microsoft.com/azure/cognitive-services/speech-service/get-started" target="_blank">Subscription</a>:</td>
<td><input id="subscriptionKey" type="text" size="40" value="subscription"></td>
</tr>
<tr>
<td align="right">Region</td>
<td><input id="serviceRegion" type="text" size="40" value="YourServiceRegion"></td>
</tr>
<tr>
<td></td>
<td><button id="startRecognizeOnceAsyncButton">Start recognition</button></td>
</tr>
<tr>
<td align="right" valign="top">Results</td>
<td><textarea id="phraseDiv" style="display: inline-block;width:500px;height:200px"></textarea></td>
</tr>
</table>
</div>
</body>
<!-- Speech SDK USAGE -->
<script>
// status fields and start button in UI
var phraseDiv;
var startRecognizeOnceAsyncButton;
// subscription key and region for speech services.
var subscriptionKey, serviceRegion;
var authorizationToken;
var SpeechSDK;
var recognizer;
document.addEventListener("DOMContentLoaded", function () {
startRecognizeOnceAsyncButton = document.getElementById("startRecognizeOnceAsyncButton");
subscriptionKey = document.getElementById("subscriptionKey");
serviceRegion = document.getElementById("serviceRegion");
phraseDiv = document.getElementById("phraseDiv");
startRecognizeOnceAsyncButton.addEventListener("click", function () {
startRecognizeOnceAsyncButton.disabled = true;
phraseDiv.innerHTML = "";
// if we got an authorization token, use the token. Otherwise use the provided subscription key
var speechConfig;
if (authorizationToken) {
speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
} else {
if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
return;
}
speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
}
speechConfig.speechRecognitionLanguage = "en-US";
speechConfig.outputFormat=1;
var audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += "Recognize Result:"+ result.text + "\nConfidence Score:" + JSON.parse(result.json).NBest[0].Confidence;
window.console.log(result);
recognizer.close();
recognizer = undefined;
},
function (err) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += err;
window.console.log(err);
recognizer.close();
recognizer = undefined;
});
});
if (!!window.SpeechSDK) {
SpeechSDK = window.SpeechSDK;
startRecognizeOnceAsyncButton.disabled = false;
document.getElementById('content').style.display = 'block';
document.getElementById('warning').style.display = 'none';
// in case we have a function for getting an authorization token, call it.
if (typeof RequestAuthorizationToken === "function") {
RequestAuthorizationToken();
}
}
});
</script>
</html>
Run the page just the same as official doc indicated. In a word, while you use sdk you should config speechConfig.outputFormat=1
so that you can get detailed format of Speech service which includes confident score value.
In your code , seems the undefined error is due to you want to print SpeechConfig
but that param is defined as speechConfig
...
Anyway, to demo get confient score successfully, my code is based on official demo. Hope it helps .
For your code , try the html below :
<html>
<body>
<button id='recordButton' onclick = 'test()'>test </button>
</body>
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<script>
function test(){
var SDK = window.SpeechSDK;
try {
AudioContext = window.AudioContext // our preferred impl
|| window.webkitAudioContext // fallback, mostly for Safari
|| false; // could not find.
if (AudioContext) {
soundContext = new AudioContext();
console.log("AudioContext", AudioContext);
} else {
alert("Audio context not supported");
}
}
catch (e) {
console.log("no sound context found, no audio output. " + e);
}
console.log("SpeechSDK initialized", SDK);
var speechConfig =
SpeechSDK.SpeechConfig.fromSubscription("<your subscription key>",
"<your service region>");
speechConfig.speechRecognitionLanguage = "en-US";
console.log("speechConfig", speechConfig);
audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig,
audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log("result", result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
});
}
</script>
</html>
Result: as you can see the result has been logged :
If my answer is helpful , click on the check mark beside the answer to toggle it from greyed out to filled in to mark this answer , thanks !
Upvotes: 1