kairo
kairo

Reputation: 23

Azure text-to-voice: how can I change language and voice for output?

I need help for the following JavaScript and hope someone can kindly help me. Text is read out in an English voice.

How can I change language and voice within the following working code? I intensively searched the web but couldn't find a suitable solution due to my poor java skills.

So, unfortunately my programming skills are not good enough, so I need some assistance for a concrete line of code. Thanks.

<!DOCTYPE html>
<html lang="en">
<head>
  <title>Microsoft Cognitive Services Speech SDK JavaScript Quickstart</title>
  <meta charset="utf-8" />
</head>
<body>
  
  <button id="startSpeakTextAsyncButton">speak</button>
  
  <!-- Speech SDK reference sdk. -->
  <script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>

  <!--   Speech SDK Authorization token  -->
  <script>
  var authorizationEndpoint = "token.php";

  function RequestAuthorizationToken() {
    if (authorizationEndpoint) {
      var a = new XMLHttpRequest();
      a.open("GET", authorizationEndpoint);
      a.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
      a.send("");
      a.onload = function() {
        var token = JSON.parse(atob(this.responseText.split(".")[1]));
        serviceRegion.value = token.region;
        authorizationToken = this.responseText;
        subscriptionKey.disabled = true;
      }
    }
  }
  </script>

  <!-- Speech SDK USAGE -->
  <script>
    var startSpeakTextAsyncButton;
    var serviceRegion = "westeurope"; 
    // for testing:
    // var voiceName = "HeddaRUS";
    // var voiceLanguage ="de-DE";

    var subscriptionKey;
    var authorizationToken;
    var SpeechSDK;
    var synthesizer;    

    document.addEventListener("DOMContentLoaded", function () {
      startSpeakTextAsyncButton = document.getElementById("startSpeakTextAsyncButton");
      subscriptionKey = document.getElementById("subscriptionKey");
      
        startSpeakTextAsyncButton.addEventListener("click", function () {
        startSpeakTextAsyncButton.disabled = true;
        
        speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion);  
            
        // I don't know how the code should looke like:
        // speechConfig = SpeechSDK.SpeechConfig.setSpeechSynthesisLanguage(voiceLanguage);
        // speechConfig = SpeechSDK.SpeechConfig.setSpeechSynthesisVoiceName(voiceName);
            
        synthesizer = new SpeechSDK.SpeechSynthesizer(speechConfig);

        // output should be in German language:    
        let inputText = "Ich möchte es auf deutsche Sprache setzen, weiß aber nicht wie!";
                
        synthesizer.speakTextAsync(
          inputText,
          function (result) {
            startSpeakTextAsyncButton.disabled = false;
            window.console.log(result);
            synthesizer.close();
            synthesizer = undefined;
          });
      });

      if (!!window.SpeechSDK) {
        SpeechSDK = window.SpeechSDK;
        startSpeakTextAsyncButton.disabled = false;
        if (typeof RequestAuthorizationToken === "function") {RequestAuthorizationToken();}
      }
    });
  
  </script>
</body>
</html>

Upvotes: 2

Views: 2179

Answers (2)

parsecer
parsecer

Reputation: 5106

The function to save the given text as an mp3 file, feel free to use!

const textToSpeech = async (key, region, text, filename)=> {
    // convert callback function to promise
    return new Promise((resolve, reject) => {

        const speechConfig = sdk.SpeechConfig.fromSubscription(key, region);
        speechConfig.speechSynthesisOutputFormat = 5; // mp3
   
        speechConfig.speechSynthesisVoiceName = "es-MX-LibertoNeural";
        speechConfig.speechSynthesisLanguage = "es-ES";

        let audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename);

        const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);

        synthesizer.speakTextAsync(
            text,
            result => {
                const { audioData } = result;

                synthesizer.close();

                // return stream from file
                const audioFile = fs.createReadStream(filename);
                resolve(audioFile);                
            },
            error => {
                console.log(error);
                synthesizer.close();
                reject(error);
            });
    });
};

module.exports = {
    textToSpeech
};

// example of use - saves the generated audio in the oof3.mp3 file
(async () => {
    await textToSpeech('yourkeygoeshere', 'youregiongoeshere', 'El gato y el perro', 'oof3.mp3');
})();

PS

speechConfig.speechSynthesisVoiceName = "es-MX-LibertoNeural";

is enough to make the output Spanish. Adding

speechConfig.speechSynthesisLanguage = "es-ES";

doesn't change the output.

When you only set

speechConfig.speechSynthesisLanguage = "es-ES";

the output is Spanish, but the voice is different (some default voice, I assume)

Voices are here: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/samples/js/browser/index.html#L130

To get more voices, execute these commands:

curl --location --request GET 'https://YOUR_RESOURCE_REGION.tts.speech.microsoft.com/cognitiveservices/voices/list' \
--header 'Ocp-Apim-Subscription-Key: YOUR_RESOURCE_KEY' > voices.txt
vim voices.txt

(taken from https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=streaming#get-a-list-of-voices)

Upvotes: 0

Stanley Gong
Stanley Gong

Reputation: 12153

For a quick test, you can specify your language and voice in speechConfig as below:

<!DOCTYPE html>
<html lang="en">
<head>
  <title>Microsoft Cognitive Services Speech SDK JavaScript Quickstart</title>
  <meta charset="utf-8" />
</head>
<body>
  
  <button id="startSpeakTextAsyncButton" onclick="synthesizeSpeech()">speak</button>
  
  <!-- Speech SDK reference sdk. -->
  <script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
 
  <script>
    function synthesizeSpeech() {
        
        var speechConfig = SpeechSDK.SpeechConfig.fromSubscription("<your subscription key,you can find it on azure portal=> Kesy and Endpoints blade>", "<your region>");
        speechConfig.speechSynthesisVoiceName = "Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)";
        speechConfig.speechSynthesisLanguage = "de-DE";
        
        
        var synthesizer = new SpeechSDK.SpeechSynthesizer(speechConfig);
        let inputText = "Ich möchte es auf deutsche Sprache setzen, weiß aber nicht wie!";
                
        synthesizer.speakTextAsync(
          inputText,
          function (result) {
            startSpeakTextAsyncButton.disabled = false;
            window.console.log(result);
            synthesizer.close();
            synthesizer = undefined;
        });
    }
  
  </script>
</body>
</html>

You can find all voice names on this page from Line 130. It is not your fault,seems there is no official js sample for this :(

Upvotes: 5

Related Questions