ListenerApp/ListenerGS/SpeechForwarder.swift

143 lines
5.2 KiB
Swift

//
// SpeechForwarder.swift
// ListenerGS
//
// Created by Jeremy Rand on 2021-10-18.
//
import Foundation
import os
import Speech
class SpeechForwarder : SpeechForwarderProtocol {
private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: Locale.preferredLanguages[0]))!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
private let logger = Logger()
private let audioQueue = DispatchQueue.global()
func startListening(connection : GSConnection) -> Bool {
SFSpeechRecognizer.requestAuthorization { authStatus in
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
break
case .denied, .restricted, .notDetermined:
connection.stopListening()
default:
connection.stopListening()
}
}
}
do {
try startRecording(connection: connection)
logger.debug("Started listening")
}
catch {
return false
}
return true
}
func stopListening() {
logger.debug("Stopped listening")
recognitionRequest?.endAudio()
audioEngine.stop()
audioEngine.inputNode.removeTap(onBus: 0)
recognitionTask?.cancel()
recognitionTask?.finish()
recognitionRequest = nil
recognitionTask = nil
}
private func startRecording(connection : GSConnection) throws {
// Cancel the previous task if it's running.
recognitionTask?.cancel()
self.recognitionTask = nil
// Configure the audio session for the app.
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
let inputNode = audioEngine.inputNode
// Create and configure the speech recognition request.
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.requiresOnDeviceRecognition = false
// Configure the microphone input.
let inputFormat = inputNode.outputFormat(forBus: 0)
let speechFormat = recognitionRequest.nativeAudioFormat
logger.debug("Recording format \(inputFormat), speech format \(speechFormat)")
var formatConverter: AVAudioConverter?
if (!inputFormat.isEqual(speechFormat)) {
formatConverter = AVAudioConverter(from:inputFormat, to: speechFormat)
formatConverter?.downmix = true
}
inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
guard let formatConverter = formatConverter else {
self.recognitionRequest?.append(buffer)
return
}
// self.recognitionRequest?.append(buffer)
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: speechFormat, frameCapacity: AVAudioFrameCount(Double(buffer.frameLength) * speechFormat.sampleRate / inputFormat.sampleRate))
var error: NSError? = nil
let inputBlock: AVAudioConverterInputBlock = {inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
formatConverter.convert(to: pcmBuffer!, error: &error, withInputFrom: inputBlock)
if error == nil {
self.recognitionRequest?.append(pcmBuffer!)
}
}
// Create a recognition task for the speech recognition session.
// Keep a reference to the task so that it can be canceled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { [weak connection] result, error in
var isFinal = false
if let result = result {
// Update the text view with the results.
OperationQueue.main.addOperation {
guard let connection = connection else { return }
connection.set(text: result.bestTranscription.formattedString)
}
isFinal = result.isFinal
}
if error != nil {
self.logger.error("Error from recognizer: \(String(describing: error))")
}
if error != nil || isFinal {
OperationQueue.main.addOperation {
guard let connection = connection else { return }
connection.stopListening()
}
}
}
audioEngine.prepare()
try audioEngine.start()
}
}