LordCas
LordCas

Reputation: 77

Can't upload soundfile towards IBM Watson speech to text

I want to get a response from the IBM Watson server. Same code worked for the Google Speech API as well. As is shown in the code below, I have am sending the right audio to the Watson API.

try
{
    FileStream fileStream = File.OpenRead(data_to_translate);
    MemoryStream memoryStream = new MemoryStream();
    memoryStream.SetLength(fileStream.Length);
    fileStream.Read(memoryStream.GetBuffer(), 0, (int)fileStream.Length);
    byte[] BA_AudioFile = memoryStream.GetBuffer();
    string auth = string.Format("{0}:{1}", "xxxx", "xxxx");
    string auth64 = Convert.ToBase64String(Encoding.ASCII.GetBytes(auth));
    string credentials = string.Format("{0} {1}", "Basic", auth64);
    HttpWebRequest _HWR_SpeechToText = null;
    _HWR_SpeechToText =
                (HttpWebRequest)HttpWebRequest.Create(
                    "https://stream.watsonplatform.net/speech-to-text/api/v1/recognize");
    _HWR_SpeechToText.Headers[HttpRequestHeader.Authorization] = credentials;
    _HWR_SpeechToText.Method = "POST";
    _HWR_SpeechToText.ContentType = "audio/flac; rate=44100 channels=2";
    _HWR_SpeechToText.ContentLength = BA_AudioFile.Length;
    _HWR_SpeechToText.KeepAlive = false;
    _HWR_SpeechToText.ProtocolVersion = HttpVersion.Version10;
    _HWR_SpeechToText.ServicePoint.ConnectionLimit = 1;
    Stream stream = _HWR_SpeechToText.GetRequestStream();
    stream.Write(BA_AudioFile, 0, BA_AudioFile.Length);
    stream.Close();

    HttpWebResponse HWR_Response = (HttpWebResponse)_HWR_SpeechToText.GetResponse();
    if ( HWR_Response.StatusCode == HttpStatusCode.OK )
    {
        StreamReader SR_Response = new StreamReader(HWR_Response.GetResponseStream());
        // string text = SR_Response.ReadToEnd();
        // jsonResponse json = JsonConvert.DeserializeObject<jsonResponse>(SR_Response.ReadToEnd());

        //Speech API response here
        var result = SR_Response.ReadToEnd();
        Console.WriteLine(result);
        var jsons = result.Split('\n');

        foreach ( var j in jsons )
        {
            var jsonObject = JsonConvert.DeserializeObject<jsonResponse.SpeechResponse>(j);
            if ( jsonObject == null || jsonObject.Result.Length <= 0 ) continue;

            if ( jsonObject.Result[0].Alternative[0].Confidence > 0.90 )
            {
                Console.WriteLine("text-to-speech van google: " + jsonObject.Result[0].Alternative[0].Transcript + "\nconfidence: " + jsonObject.Result[0].Alternative[0].Confidence);
            }
            else if ( jsonObject.Result[0].Alternative[0].Confidence < 0.90 )
            {
                Console.WriteLine("Watson is te onzeker " + jsonObject.Result[0].Alternative[0].Confidence + " :( \nmaar hier is toch het antwoord: " + jsonObject.Result[0].Alternative[0].Transcript);
            }
        }
    }
}
catch ( Exception ex )
{
    Console.WriteLine(ex.ToString());
}

Console.ReadLine();

Could somebody explain to me what I am doing wrong? I want to make a call without the /session/, because I only need the final answer + confidence.

With this code I am getting the following error:

System.IO.IOException: Unable to write data to the transport connection: An existing connection was forcibly closed by the remote host. ---> System.Net.Sockets.SocketException: An existing connection was forcibly closed by the remote host
   at System.Net.Sockets.Socket.Send(Byte[] buffer, Int32 offset, Int32 size, SocketFlags socketFlags)
   at System.Net.Sockets.NetworkStream.Write(Byte[] buffer, Int32 offset, Int32 size)
   --- End of inner exception stack trace ---
   at System.Net.Sockets.NetworkStream.Write(Byte[] buffer, Int32 offset, Int32 size)
   at System.Net.Security._SslStream.StartWriting(Byte[] buffer, Int32 offset, Int32 count, AsyncProtocolRequest asyncRequest)
   at System.Net.Security._SslStream.ProcessWrite(Byte[] buffer, Int32 offset, Int32 count, AsyncProtocolRequest asyncRequest)
   at System.Net.TlsStream.Write(Byte[] buffer, Int32 offset, Int32 size)
   at System.Net.PooledStream.Write(Byte[] buffer, Int32 offset, Int32 size)
   at System.Net.ConnectStream.InternalWrite(Boolean async, Byte[] buffer, Int32 offset, Int32 size, AsyncCallback callback, Object state)
   at System.Net.ConnectStream.Write(Byte[] buffer, Int32 offset, Int32 size)
   at GoogleSpeech.speechRecognitionWatson.speechRecognize(String data_to_translate) in C:\Program Files (x86)\School\herkansingen jaar 4\INFAFS\ProofofConcepts\GoogleSpeech\GoogleSpeech\speechRecognitionWatson.cs:line 47

Could anybody tell me what I should add, remove or if my approach towards IBM Watson is even close? I thought this would work since it worked for the Google API and other API...

Upvotes: 2

Views: 578

Answers (1)

LordCas
LordCas

Reputation: 77

I have found the problem to my own code.

 FileStream fileStream = File.OpenRead(data_to_translate);
            MemoryStream memoryStream = new MemoryStream();
            memoryStream.SetLength(fileStream.Length);
            fileStream.Read(memoryStream.GetBuffer(), 0, (int)fileStream.Length);
            byte[] BA_AudioFile = memoryStream.GetBuffer();
            HttpWebRequest _HWR_SpeechToText = null;
            _HWR_SpeechToText =
                                (HttpWebRequest)HttpWebRequest.Create("https://stream.watsonplatform.net/speech-to-text/api/v1/recognize");
            string auth = string.Format("{0}:{1}","Watson.uID","Watson_uPWD");
            string auth64 = Convert.ToBase64String(Encoding.ASCII.GetBytes(auth));
            string credentials = string.Format("{0} {1}", "Basic", auth64);

            _HWR_SpeechToText.Headers[HttpRequestHeader.Authorization] = credentials;
            _HWR_SpeechToText.Method = "POST";
            _HWR_SpeechToText.ContentType = "audio/flac; rate=44100; channels=2;";
            _HWR_SpeechToText.ContentLength = BA_AudioFile.Length;
            Stream stream = _HWR_SpeechToText.GetRequestStream();
            stream.Write(BA_AudioFile, 0, BA_AudioFile.Length);
            stream.Close();

            HttpWebResponse HWR_Response = (HttpWebResponse)_HWR_SpeechToText.GetResponse();
            if (HWR_Response.StatusCode == HttpStatusCode.OK)
            {
                StreamReader SR_Response = new StreamReader(HWR_Response.GetResponseStream());
                var result = SR_Response.ReadToEnd();
                Console.WriteLine(result); var JsonObject = Newtonsoft.Json.Linq.JObject.Parse(result);
                double confidence = (double)JsonObject["results"][0]["alternatives"][0]["confidence"];
                if (confidence <= 0.9)
                {
                    Console.WriteLine("Watson knows what he has heard: " + (string)JsonObject["results"][0]["alternatives"][0]["transcript"]);
                    Console.WriteLine("Watson's confidence!: " + (string)JsonObject["results"][0]["alternatives"][0]["confidence"]);
                }
                else
                {
                    Console.WriteLine("Watson thinks: " + (string)JsonObject["results"][0]["alternatives"][0]["transcript"]);
                    Console.WriteLine("Watson has this confidence: " + (string)JsonObject["results"][0]["alternatives"][0]["confidence"]);
                }

Upvotes: 3

Related Questions