Reputation: 237
I'm building a simple transcription app in SwiftUI for Spanish, English, and Japanese. When the locale is set to Mexico, Japan, or Canada, the transcript keeps growing as the speaker speaks, as it should. If the locale is set to US, the transcript resets whenever there is a brief pause and starts over again. I'm not treating the various locales any different in my code. Is there any way to remedy this? Here's my code:
import AVFoundation
import Speech
class SpeechRecognizerViewModel: ObservableObject {
@Published var transcribedText = "Press the button and start speaking"
@Published var isRecording = false
@Published var selectedLocale: SpeechLocale = .us {
didSet {
speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: selectedLocale.rawValue))
}
}
private var audioRecorder: AVAudioRecorder?
private var speechRecognizer: SFSpeechRecognizer?
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
private var lastTranscription: String = ""
init() {
speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: selectedLocale.rawValue))
}
func requestPermissions() {
SFSpeechRecognizer.requestAuthorization { authStatus in
DispatchQueue.main.async {
switch authStatus {
case .authorized:
print("Speech recognition authorized")
case .denied, .restricted, .notDetermined:
print("Speech recognition not authorized")
@unknown default:
fatalError("Unknown authorization status")
}
}
}
AVAudioSession.sharedInstance().requestRecordPermission { granted in
DispatchQueue.main.async {
if granted {
print("Microphone access granted")
} else {
print("Microphone access denied")
}
}
}
}
func toggleRecording() {
if audioEngine.isRunning {
stopRecording()
} else {
startRecording()
}
}
private func startRecording() {
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("Audio session properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let inputNode = audioEngine.inputNode
guard let recognitionRequest = recognitionRequest else { return }
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest) { result, error in
if let result = result {
let newText = result.bestTranscription.formattedString
if newText != self.lastTranscription {
self.transcribedText = newText
self.lastTranscription = newText
}
} else if let error = error {
print("Recognition error: \(error)")
self.stopRecording()
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, when in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("Audio engine couldn't start because of an error.")
}
DispatchQueue.main.async {
self.transcribedText = "Listening..."
self.isRecording = true
}
}
private func stopRecording() {
audioEngine.stop()
audioEngine.inputNode.removeTap(onBus: 0)
recognitionRequest?.endAudio()
recognitionTask?.cancel()
recognitionTask = nil
DispatchQueue.main.async {
self.isRecording = false
}
}
}
import SwiftUI
struct ContentView: View {
@EnvironmentObject var viewModel: SpeechRecognizerViewModel
var body: some View {
VStack {
Text(viewModel.transcribedText)
.padding()
.background(Color.gray.opacity(0.2))
.cornerRadius(10)
Button(action: {
viewModel.toggleRecording()
}) {
Text(viewModel.isRecording ? "Stop Recording" : "Start Recording")
.padding()
.background(viewModel.isRecording ? Color.red : Color.blue)
.foregroundColor(.white)
.cornerRadius(8)
}
}
.padding()
.onAppear {
viewModel.requestPermissions()
}
}
}
import SwiftUI
struct LanguageSelectionView: View {
@StateObject var viewModel = SpeechRecognizerViewModel()
var body: some View {
VStack {
Picker("Select Locale", selection: $viewModel.selectedLocale) {
ForEach(SpeechLocale.allCases) { locale in
Text(locale.displayName).tag(locale)
}
}
.pickerStyle(SegmentedPickerStyle())
.padding()
NavigationLink(destination:
ContentView().environmentObject(viewModel)
) {
Text("Speech Transcriber")
}
}
}
}
import Foundation
enum SpeechLocale: String, CaseIterable, Identifiable {
case mexico = "es-MX"
case us = "en-US"
case japan = "ja-JP"
case canada = "en-CA"
var id: String { self.rawValue }
var displayName: String {
switch self {
case .mexico:
return "Mexico"
case .us:
return "US"
case .japan:
return "Japan"
case .canada:
return "Canada"
}
}
var localeIdentifier: String {
return self.rawValue
}
}
Upvotes: 0
Views: 32