Reputation: 21
I try to implement Speech recognititon using Watson Speech To Text service. I wrote some code in javascript using "MediaStreamRecorder" library. I send data through Websocket and get this problem: if I use "content-type": "audio/wav", Watson recognizes only first blob and set inactivity_timeout to defaul value meanwhile I set it to 2 seconds.
I use this code for opening websocket:
initWebSocket(startRecordingCallback) {
var that = this;
that.websocket = new WebSocket(that.wsURI);
that.websocket.onopen = function (evt) {
console.log("WebSocket: connection OK ");
var message = {
"action": "start",
"content-type": "audio/wav",
"interim_results": true,
"continuous": true,
"inactivity_timeout": 2
};
that.websocket.send(JSON.stringify(message));
};
that.websocket.onclose = function (evt) {
if (event.wasClean) {
console.log("WebSocket: connection closed clearly " + JSON.stringify(evt));
} else {
console.log("WebSocket: disconnect " + JSON.stringify(evt));
}
};
that.websocket.onmessage = function (evt) {
console.log(evt)
};
that.websocket.onerror = function (evt) {
console.log("WebSocket: error " + JSON.stringify(evt));
};
}
And this code for recording audio:
startRecording() {
var that = this;
this.initWebSocket(function () {
var mediaConstraints = {
audio: true
};
function onMediaSuccess(stream) {
that.mediaRecorder = new MediaStreamRecorder(stream);
that.mediaRecorder.mimeType = 'audio/wav';
that.mediaRecorder.ondataavailable = function (blob) {
that.websocket.send(blob);
};
that.mediaRecorder.start(3000);
}
function onMediaError(e) {
console.error('media error', e);
}
navigator.getUserMedia(mediaConstraints, onMediaSuccess, onMediaError);
});
}
I need do recognition in real-time using websocket with socket auto closing after 2 second of inactivity. Please, advice me.
Upvotes: 2
Views: 632
Reputation: 23663
As @Daniel Bolanos said, inactivity_timeout
is not triggered if the transcript is empty for more than inactivity_timeout
seconds. The service uses a different way to detect if there is speech rather than relying on the transcription.
If the service detects speech it won't trigger the
inactivity_timeout
even if the transcript is empty.
Here is a snippet of code that does what you were trying to do with your question but using the speech-javascript-sdk. Hopefully, it will help future StackOverflow users trying to recognize audio from the microphone.
document.querySelector('#button').onclick = function () {
// you need to provide this endpoint to fetch a watson token
fetch('/api/speech-to-text/token')
.then(function(response) {
return response.text();
}).then(function (token) {
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
token: token,
outputElement: '#output' // CSS selector or DOM Element
});
stream.on('error', function(err) {
console.log(err);
});
document.querySelector('#stop').onclick = function() {
stream.stop();
};
}).catch(function(error) {
console.log(error);
});
};
Demo: https://watson-speech.mybluemix.net/microphone-streaming.html
Credits to @Nathan Friedly who wrote the library.
Upvotes: 0