Anna Motoshkina
Anna Motoshkina

Reputation: 21

Watson ignores inactivity timeout while recognizing "audio/wav"

I try to implement Speech recognititon using Watson Speech To Text service. I wrote some code in javascript using "MediaStreamRecorder" library. I send data through Websocket and get this problem: if I use "content-type": "audio/wav", Watson recognizes only first blob and set inactivity_timeout to defaul value meanwhile I set it to 2 seconds.

I use this code for opening websocket:

initWebSocket(startRecordingCallback) {
    var that = this;
    that.websocket = new WebSocket(that.wsURI);
    that.websocket.onopen = function (evt) {
        console.log("WebSocket: connection OK ");
        var message = {
            "action": "start",
            "content-type": "audio/wav",
            "interim_results": true,
            "continuous": true,
            "inactivity_timeout": 2
        };
        that.websocket.send(JSON.stringify(message));
    };
    that.websocket.onclose = function (evt) {
        if (event.wasClean) {
            console.log("WebSocket: connection closed clearly " + JSON.stringify(evt));

        } else {
            console.log("WebSocket: disconnect " + JSON.stringify(evt));
        }
    };
    that.websocket.onmessage = function (evt) {
      console.log(evt)
    };
    that.websocket.onerror = function (evt) {
        console.log("WebSocket: error " + JSON.stringify(evt));
    };
}

And this code for recording audio:

startRecording() {
    var that = this;
    this.initWebSocket(function () {
      var mediaConstraints = {
          audio: true
      };
      function onMediaSuccess(stream) {
          that.mediaRecorder = new MediaStreamRecorder(stream);
          that.mediaRecorder.mimeType = 'audio/wav';
          that.mediaRecorder.ondataavailable = function (blob) {
              that.websocket.send(blob);
          };
          that.mediaRecorder.start(3000);
      }

      function onMediaError(e) {
          console.error('media error', e);
      }
      navigator.getUserMedia(mediaConstraints, onMediaSuccess, onMediaError);
    });
}

I need do recognition in real-time using websocket with socket auto closing after 2 second of inactivity. Please, advice me.

Upvotes: 2

Views: 632

Answers (1)

German Attanasio
German Attanasio

Reputation: 23663

As @Daniel Bolanos said, inactivity_timeout is not triggered if the transcript is empty for more than inactivity_timeout seconds. The service uses a different way to detect if there is speech rather than relying on the transcription.

If the service detects speech it won't trigger the inactivity_timeout even if the transcript is empty.

Here is a snippet of code that does what you were trying to do with your question but using the speech-javascript-sdk. Hopefully, it will help future StackOverflow users trying to recognize audio from the microphone.

document.querySelector('#button').onclick = function () {
  // you need to provide this endpoint to fetch a watson token
  fetch('/api/speech-to-text/token') 
  .then(function(response) {
    return response.text();
  }).then(function (token) {
    var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
      token: token,
      outputElement: '#output' // CSS selector or DOM Element
    });

    stream.on('error', function(err) {
      console.log(err);
    });

    document.querySelector('#stop').onclick = function() {
      stream.stop();
    };
  }).catch(function(error) {
    console.log(error);
  });
};

Demo: https://watson-speech.mybluemix.net/microphone-streaming.html

Credits to @Nathan Friedly who wrote the library.

Upvotes: 0

Related Questions