Vosk speech to text stops working when i disconnect my external mic

Question

In tauri JS app I am recording audio from JS and processing it and sending data to python child process through a rust handler. In python script i am using vosk to convert speech to text on real time.

JS front end

getting the stream using macbook mic's id

const micStream = await navigator.mediaDevices.getUserMedia({
        audio: { deviceId: micId, sampleRate: 16000, channelCount: 1 },
        video: false,
      });

      streamAudioToPython(micStream);

getting data from stream and sending it to rust handler

const streamAudioToPython = async (micStream?: any) => {
    try {
      const audioContext = new AudioContext();
      await audioContext.audioWorklet.addModule('/processor.js');

      const mediaStreamSource = audioContext.createMediaStreamSource(
        micStream || stream
      );
      setMediaStreamSource(mediaStreamSource);
      const audioProcessor = new AudioWorkletNode(
        audioContext,
        'audio-processor'
      );

      setAudioProcessor(audioProcessor);

      audioProcessor.port.onmessage = (event) => {
        const regularArray = event.data;
        // console.log(
        //   `${DateTime.now().toFormat(
        //     'yyyy-MM-dd HH:mm:ss.SSS'
        //   )} regularArray===>`,
        //   regularArray
        // );

        invoke('send_audio_to_python', {
          audioData: regularArray,
        });
      };

      mediaStreamSource.connect(audioProcessor);
    } catch (error) {
      console.error('Error~~~>', error);
    }
  };

processor.js

class AudioProcessor extends AudioWorkletProcessor {
  process(inputs, outputs, parameters) {
    const input = inputs[0];
    if (input) {
      const channelData = input[0];
      const int16Data = new Int16Array(channelData.length);
      for (let i = 0; i < channelData.length; i++) {
        int16Data[i] = Math.min(1, Math.max(-1, channelData[i])) * 0x7fff;
      }

      const byteArray = new Uint8Array(int16Data.buffer);
      const regularArray = Array.from(byteArray);

      this.port.postMessage(regularArray);
    }
    return true;
  }
}

registerProcessor('audio-processor', AudioProcessor);

main.rs

mod execute_python_scripts;
mod show_in_folder;

fn main() {
    tauri::Builder::default()
        .setup(|app| {
            let app_handle = app.handle();
            execute_python_scripts::force_initialize_python_process(app_handle);
            Ok(())
        })
        .invoke_handler(tauri::generate_handler![
            show_in_folder::show_in_folder,
            execute_python_scripts::send_audio_to_python,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
}

execute_python_scripts.rs

use chrono::Local;
use std::fs::{self, OpenOptions};
use std::io::{BufRead, BufReader, Write};
use std::path::Path;
use std::process::{Child, Command, Stdio};
use std::sync::{Arc, Mutex};
use tauri::{AppHandle, Manager};

#[tauri::command]
pub async fn send_audio_to_python(audio_data: Vec) -> Result<(), String> {
    let mut child_guard = PYTHON_PROCESS
        .lock()
        .map_err(|_poisoned| "Mutex poisoned.".to_string())?;

    if let Some(child_process) = child_guard.as_mut() {
        match child_process.try_wait() {
            Ok(Some(_)) => {
                return Err("Python process is no longer running.".to_string());
            }
            Ok(None) => {
                let stdin = child_process.stdin.as_mut().ok_or("Failed to open stdin")?;
                stdin.write_all(&audio_data).map_err(|e| e.to_string())?;
            }
            Err(e) => {
                return Err(format!("Failed to check Python process status: {}", e));
            }
        }
    } else {
        return Err("No Python process is currently running.".to_string());
    }

    Ok(())
}

pub fn start_python_process(app_handle: AppHandle) -> std::process::Child {
    let log_dir = "log";

    let exists = Path::new(log_dir).exists();

    if !exists {
        fs::create_dir_all(log_dir).expect("Failed to create log directory");
    }

    let now = Local::now();
    let log_file_name = format!("{}/logs_{}.txt", log_dir, now.format("%Y-%m-%d_%H-%M-%S"));

    let mut log_file = OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_file_name)
        .expect("Failed to open or create log file");

    let mut child = Command::new("python3")
        .arg("python/audio_to_speech.py")
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .expect("Failed to start Python process");

    if let Some(stdout) = child.stdout.take() {
        let reader = BufReader::new(stdout);
        let app_handle_clone = app_handle.clone();
        std::thread::spawn(move || {
            for line in reader.lines() {
                if let Ok(text) = line {
                    writeln!(log_file, "{}", text).expect("Failed to write to log file");
                    app_handle_clone
                        .emit_all("python-log", format!("{}", text))
                        .expect("Failed to emit Python log");
                }
            }
        });
    }

    if let Some(stderr) = child.stderr.take() {
        let reader = BufReader::new(stderr);
        std::thread::spawn(move || {
            for line in reader.lines() {
                if let Ok(text) = line {
                    eprintln!("Down===> {}", text);
                }
            }
        });
    }

    return child;
}

pub fn force_initialize_python_process(app_handle: AppHandle) {
    let mut process_guard = PYTHON_PROCESS.lock().unwrap();

    if process_guard.is_none() {
        *process_guard = Some(start_python_process(app_handle));
    }
}

lazy_static::lazy_static! {
    static ref PYTHON_PROCESS: Arc>> = Arc::new(Mutex::new(None));
}

audio_to_speech.py

import sys
import vosk
import numpy as np
import json

fs = 16000
model = vosk.Model(lang="en-us")
recognizer = vosk.KaldiRecognizer(model, fs)


def process_audio_data(audio_data):
    """Process audio data with Vosk recognizer."""

    if recognizer.AcceptWaveform(audio_data):
        text = recognizer.Result()
        text_json = json.loads(text)
        print("text-", text_json["text"], flush=True)
    else:
        partial = recognizer.PartialResult()
        partial_json = json.loads(partial)
        print("partial-", partial_json["partial"], flush=True)


def handle_stream():
    audio_data = sys.stdin.buffer.read(4096)

    if len(audio_data) == 0:
        print("No audio data received.", flush=True)
        return

    audio_data = np.frombuffer(audio_data, dtype=np.int16)

    audio_data_bytes = audio_data.tobytes()

    process_audio_data(audio_data_bytes)


if __name__ == "__main__":
    while True:
        handle_stream()

The text to speech works fine if i use my huawei free buds pro while recording (even though the micId i am using is of macbook's mic) but when i turn off huawei free buds pro and refresh and record again then text to speech stops working and i only get "" in text.

The recording is working fine and it's being saved as well as an audio file, only issue is with the speech to text.

Vosk speech to text stops working when i disconnect my external mic

Answers (1)

Related Questions