speech_recognition and gtts don't understand numbers lower than 11

Question

I put together straightforward code that asks the user to choose between option 1, oranges, and option 2, pears:
```
options = {
    (1, "1", "one", "number one", "oranges", "orange", "orange's", "oranges'"): 1,
    (2, "2", "two", "number two", "pears", "pear", "pear's", "pears'", "pier"): 2
}
```
No matter what I write above, the speech recogniser does not recognise the numbers "one" and "two." Only the non-numeric options, "number one," "pears," etc., are recognised correctly.

The second issue is the weirdest. The speech recogniser doesn't understand any number below 11 unless you say it followed by ".0" ("three point zero". It understands "10" if you say "one zero". From 11 onwards, it understands the numbers as you say them:**

def convert_to_number(text):
    number_words = {
        "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
        "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
        "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
        "fourteen": 14, "fifteen": 15, "sixteen": 16, "seventeen": 17,
        "eighteen": 18, "nineteen": 19, "twenty": 20
    }

Output:

Output window - With notes of what I am saying Output window - Testing what the speech recognition understands

These were the steps I followed to correct the bugs described:

a. I initially used the libraries pyttsx3 and speech_recognition, then I changed pyttsx3 to gtts and pydub. There was no change in the buggy behaviour with different libraries.

b. I added an en-GB locale, no impact either.

c. I asked a native British speaker to pronounce the options, no difference either.

d. Everything is well configured, ffmpeg, microphone, etc...

This is the full code:

from gtts import gTTS
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play
import os

# Set the path to the ffmpeg executable
os.environ["PATH"] += os.pathsep + "C:/ffmpeg/bin"

# Initialize STT recognizer
recognizer = sr.Recognizer()

def speak(text):
    tts = gTTS(text=text, lang='en-GB')
    tts.save("temp.mp3")
    sound = AudioSegment.from_mp3("temp.mp3")
    play(sound)
    os.remove("temp.mp3")

def listen():
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        try:
            text = recognizer.recognize_google(audio, language='en-GB')
            print(f"You said: {text}")
            return text.lower()
        except sr.UnknownValueError:
            print("Sorry, I did not understand that.")
            speak("Sorry, I did not understand that.")
            return None
        except sr.RequestError:
            print("Sorry, my speech service is down.")
            speak("Sorry, my speech service is down.")
            return None

def convert_to_number(text):
    number_words = {
        # BUG: #3 All numbers below 11 are not recognized by the speech recognizer
        # BUG: #4 10 is only recognized if one says "one zero" instead of "ten"
        "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
        "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9,
        "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13,
        "fourteen": 14, "fifteen": 15, "sixteen": 16, "seventeen": 17,
        "eighteen": 18, "nineteen": 19, "twenty": 20
    }
    try:
        return float(text)
    except ValueError:
        return number_words.get(text, None)

def get_choice(options):
    while True:
        choice = listen()
        if choice is not None:
            for key, value in options.items():
                if choice in key:
                    print(f"Recognized choice: {value}")
                    return value
            print("Invalid input. Please say a valid option.")
            speak("Invalid input. Please say a valid option.")

def get_quantity():
    while True:
        quantity = listen()
        if quantity is not None:
            quantity = convert_to_number(quantity)
            if quantity is not None and quantity > 0:
                print(f"Recognized quantity: {quantity}")
                return quantity
            else:
                print("Please enter a positive number.")
                speak("Please enter a positive number.")

item1 = "Oranges"
item1_price = 0.75
item2 = "Pears"
item2_price = 1.25
vat_tax = 0.20

options = {
    # BUG: #2 No matter what I write here the numbers "one" and "two" are not recognized by the speech recognizer
    # BUG: #1 Only non-numeric options are recognized correctly
    (1, "1", "one", "number one", "oranges", "orange", "orange's", "oranges'"): 1,
    (2, "2", "two", "number two", "pears", "pear", "pear's", "pears'", "pier"): 2
}

while True:
    speak("- What would you like to taste today, guvnor?
"
          f"  1. Our fresh {item1}, for £{item1_price} each?
"
          f"  2. Or, our delicious {item2}, for £{item2_price} each?
")
    print("- What would you like to taste today, guvnor?
"
          f"  1. Our fresh {item1}, for £{item1_price} each?
"
          f"  2. Or, our delicious {item2}, for £{item2_price} each?
")

    buyer_choice = get_choice(options)

    if buyer_choice == 1:
        speak(f"
- And how many {item1} for the lady?
")
        print(f"
- And how many {item1} for the lady?
")
        buyer_quant = get_quantity()
        sub_total = (item1_price * buyer_quant)
        vat_total = (sub_total * vat_tax)
        total = sub_total + vat_total
        speak(f"
- That will be {buyer_quant:,.0f} {item1} for only £{sub_total:,.2f}.
"
              f"  Plus £{vat_total:,.2f} of V.A.T., total is £{total:,.2f}.
"
              "  Thanks for your custom!
")
        print(f"
- That will be {buyer_quant:,.0f} {item1} for only £{sub_total:,.2f}.
"
              f"  Plus £{vat_total:,.2f} of V.A.T., total is £{total:,.2f}.
"
              "  Thanks for your custom!
")
        break
    elif buyer_choice == 2:
        speak(f"
- And how many {item2} for the lady?
")
        print(f"
- And how many {item2} for the lady?
")
        buyer_quant = get_quantity()
        sub_total = (item2_price * buyer_quant)
        vat_total = (sub_total * vat_tax)
        total = sub_total + vat_total
        speak(f"
- That will be {buyer_quant:,.0f} {item2} for only £{sub_total:,.2f}.
"
              f"  Plus £{vat_total:,.2f} of V.A.T., total is £{total:,.2f}.
"
              "  Thanks for your custom!
")
        print(f"
- That will be {buyer_quant:,.0f} {item2} for only £{sub_total:,.2f}.
"
              f"  Plus £{vat_total:,.2f} of V.A.T., total is £{total:,.2f}.
"
              "  Thanks for your custom!
")
        break
    else:
        speak("
- We just ran out of that, sorry. Please choose a valid option.
")
        print("
- We just ran out of that, sorry. Please choose a valid option.
")

speech_recognition and gtts don't understand numbers lower than 11

Answers (0)

Related Questions

speech_recognition and gtts don&#39;t understand numbers lower than 11

Answers (0)

Related Questions

speech_recognition and gtts don't understand numbers lower than 11