Julian
Julian

Reputation: 35

(Unity 5.6) (Watson SDK) My 'text to speech' widget will not parse / play the assosciated string

I'm using the unity watson SDK to take user's speech, parse it to text, and then pass the text to the SDK's 'textToSpeechWidget', https://i.sstatic.net/fV3pb.png.

I've created a button "button (button)" that is meant to, when pressed, play the assosciated text input (which is pulling user speech). The 'speech to text' function is working- when a user speaks, the 'input text' ui is filled with their words. but, the program should then play the assosciated words when the user presses it. Instead, nothing happens, and the system doesn't indicate any errors or describe why. here is the TextToSpeech widget code, as well as the textToSpeech code.

using UnityEngine;
using UnityEngine.UI;
using IBM.Watson.DeveloperCloud.Services.TextToSpeech.v1;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.DataTypes;
using System.Collections.Generic;
using IBM.Watson.DeveloperCloud.Utilities;

#pragma warning disable 414

namespace IBM.Watson.DeveloperCloud.Widgets
{
  /// <summary>
  /// TextToSpeech widget class wraps the TextToSpeech serivce.
  /// </summary>
  [RequireComponent(typeof(AudioSource))]
  public class TextToSpeechWidget : Widget
  {
 #region Inputs
[SerializeField]
private Input m_TextInput = new Input("Text", typeof(TextToSpeechData), 
"OnTextInput");
[SerializeField]
private Input m_VoiceInput = new Input("Voice", typeof(VoiceData), 
"OnVoiceSelect");
#endregion

#region Outputs
[SerializeField]
private Output m_Speaking = new Output(typeof(SpeakingStateData), true);
[SerializeField]
private Output m_DisableMic = new Output(typeof(DisableMicData));
[SerializeField]
private Output m_LevelOut = new Output(typeof(LevelData));
#endregion

#region Private Data
TextToSpeech m_TextToSpeech = new TextToSpeech();

[SerializeField, Tooltip("How often to send level out data in seconds.")]
private float m_LevelOutInterval = 0.05f;
[SerializeField]
private float m_LevelOutputModifier = 1.0f;
[SerializeField]
private Button m_TextToSpeechButton = null;
[SerializeField]
private InputField m_Input = null;
[SerializeField]
private Text m_StatusText = null;
[SerializeField]
private VoiceType m_Voice = VoiceType.en_US_Michael;
[SerializeField]
private bool m_UsePost = false;

private AudioSource m_Source = null;
private int m_LastPlayPos = 0;

private class Speech
{
  ~Speech()
  {
    if (Clip != null)
      UnityObjectUtil.DestroyUnityObject(Clip);
  }

  public bool Ready { get; set; }
  public AudioClip Clip { get; set; }

  public Speech(TextToSpeech textToSpeech, string text, bool usePost)
  {
    textToSpeech.ToSpeech(text, OnAudioClip, usePost);
  }

  private void OnAudioClip(AudioClip clip)
  {
    Clip = clip;
    Ready = true;
  }

};

private Queue<Speech> m_SpeechQueue = new Queue<Speech>();
private Speech m_ActiveSpeech = null;
#endregion

#region Public Memebers

/// <summary>
/// Gets or sets the voice. Default voice is English, US - Michael
/// </summary>
/// <value>The voice.</value>
public VoiceType Voice
{
  get
  {
    return m_Voice;
  }
  set
  {
    m_Voice = value;
  }
}

#endregion

#region Event Handlers
/// <summary>
/// Button event handler.
/// </summary>
public void OnTextToSpeech()
{
  if (m_TextToSpeech.Voice != m_Voice)
    m_TextToSpeech.Voice = m_Voice;
  if (m_Input != null)
    m_SpeechQueue.Enqueue(new Speech(m_TextToSpeech, m_Input.text, m_UsePost));
  if (m_StatusText != null)
    m_StatusText.text = "THINKING";
  if (m_TextToSpeechButton != null)
    m_TextToSpeechButton.interactable = false;
}
#endregion

#region Private Functions
private void OnTextInput(Data data)
{
  TextToSpeechData text = data as TextToSpeechData;
  if (text == null)
    throw new WatsonException("Wrong data type received.");

  if (!string.IsNullOrEmpty(text.Text))
  {
    if (m_TextToSpeech.Voice != m_Voice)
      m_TextToSpeech.Voice = m_Voice;

    m_SpeechQueue.Enqueue(new Speech(m_TextToSpeech, text.Text, m_UsePost));
  }
}

private void OnVoiceSelect(Data data)
{
  VoiceData voice = data as VoiceData;
  if (voice == null)
    throw new WatsonException("Unexpected data type");

  m_Voice = voice.Voice;
}

private void OnEnable()
{
  UnityObjectUtil.StartDestroyQueue();

  if (m_StatusText != null)
    m_StatusText.text = "READY";
}

/// <exclude />
protected override void Start()
{
  base.Start();
  m_Source = GetComponent<AudioSource>();
}

private void Update()
{
  if (m_Source != null && !m_Source.isPlaying
      && m_SpeechQueue.Count > 0
      && m_SpeechQueue.Peek().Ready)
  {
    CancelInvoke("OnEndSpeech");

    m_ActiveSpeech = m_SpeechQueue.Dequeue();
    if (m_ActiveSpeech.Clip != null)
    {
      if (m_Speaking.IsConnected)
        m_Speaking.SendData(new SpeakingStateData(true));
      if (m_DisableMic.IsConnected)
        m_DisableMic.SendData(new DisableMicData(true));

      m_Source.spatialBlend = 0.0f;     // 2D sound
      m_Source.loop = false;            // do not loop
      m_Source.clip = m_ActiveSpeech.Clip;             // clip
      m_Source.Play();

      Invoke("OnEndSpeech", ((float)m_ActiveSpeech.Clip.samples / (float)m_ActiveSpeech.Clip.frequency) + 0.1f);
      if (m_LevelOut.IsConnected)
      {
        m_LastPlayPos = 0;
        InvokeRepeating("OnLevelOut", m_LevelOutInterval, m_LevelOutInterval);
      }
    }
    else
    {
      Log.Warning("TextToSpeechWidget", "Skipping null AudioClip");
    }
  }

  if (m_TextToSpeechButton != null)
    m_TextToSpeechButton.interactable = true;
  if (m_StatusText != null)
    m_StatusText.text = "READY";
}

private void OnLevelOut()
{
  if (m_Source != null && m_Source.isPlaying)
  {
    int currentPos = m_Source.timeSamples;
    if (currentPos > m_LastPlayPos)
    {
      float[] samples = new float[currentPos - m_LastPlayPos];
      m_Source.clip.GetData(samples, m_LastPlayPos);
      m_LevelOut.SendData(new LevelData(Mathf.Max(samples) * m_LevelOutputModifier, m_LevelOutputModifier));
      m_LastPlayPos = currentPos;
    }
  }
  else
    CancelInvoke("OnLevelOut");
}
private void OnEndSpeech()
{
  if (m_Speaking.IsConnected)
    m_Speaking.SendData(new SpeakingStateData(false));
  if (m_DisableMic.IsConnected)
    m_DisableMic.SendData(new DisableMicData(false));
  if (m_Source.isPlaying)
    m_Source.Stop();

  m_ActiveSpeech = null;
}

/// <exclude />
protected override string GetName()
{
  return "TextToSpeech";
}
#endregion

}

}

The serialized field asks for a button to play the audio, which I've provided. why does it not play the audio? Thanks so much for your help :)

Upvotes: 2

Views: 173

Answers (1)

Dogukan
Dogukan

Reputation: 111

In order to play the text via button (Button m_TextToSpeechButton), the button needs to call 'OnTextToSpeech' on click and connected to Widget. Then, you need to connect your text input field to widget as Input (InputField m_Input). This should the trick.

Upvotes: 2

Related Questions