Fix the bug which did not allow the connection to go down when switching to another destination when the previous destination was in listen mode. Fix the problem with supporting all input types on my iMac. I have added code which converts from the audio format of the input to the preferred audio format of the speech recognizer.

This commit is contained in:
Jeremy Rand 2022-03-24 22:58:11 -04:00
parent 109213a577
commit 7279d7ecb1
5 changed files with 50 additions and 14 deletions

View File

@ -20,6 +20,7 @@ enum GSConnectionState {
case connected
case listening
case stoplistening
case deleting
}
extension GSConnectionState: CustomStringConvertible
@ -36,6 +37,8 @@ extension GSConnectionState: CustomStringConvertible
return "listening"
case .stoplistening:
return "stop listening"
case .deleting:
return "deleting"
}
}
}
@ -95,6 +98,9 @@ class GSConnection : ObservableObject {
case .stoplistening:
legalTransition = ((oldState == .connected) || (oldState == .listening))
case .deleting:
legalTransition = true
}
if (!legalTransition) {
@ -182,6 +188,7 @@ class GSConnection : ObservableObject {
}
deinit {
changeState(newState:.deleting)
disconnect()
}
@ -197,7 +204,10 @@ class GSConnection : ObservableObject {
waitForWriteQueue()
waitForReadQueue()
self.changeState(newState:.disconnected)
if (state != .deleting) {
changeState(newState:.disconnected)
}
}
func stopListening() {
@ -230,7 +240,8 @@ class GSConnection : ObservableObject {
func listen(speechForwarder: SpeechForwarderProtocol) {
textHeard = ""
lastSent = ""
writeQueue.addOperation {
writeQueue.addOperation { [weak self] in
guard let self = self else { return }
if (!self.sendListenMsg(isListening: true)) {
self.errorOccurred(title: "Write Error", message: "Unable to send data to the GS")
return

View File

@ -56,7 +56,7 @@ struct GSView: View {
.disabled(true)
.buttonStyle(GSButtonStyle())
case .connected, .listening, .stoplistening:
case .connected, .listening, .stoplistening, .deleting:
Button("\(Image(systemName: "desktopcomputer.trianglebadge.exclamationmark")) Disconnect from \(ipAddress)") {
connection.disconnect()
}
@ -66,7 +66,7 @@ struct GSView: View {
switch (connection.state)
{
case .disconnected, .stoplistening, .connecting:
case .disconnected, .stoplistening, .connecting, .deleting:
Button("\(Image(systemName: "ear.and.waveform")) Listen and Send Text") {
}
.disabled(true)

View File

@ -19,7 +19,7 @@
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>645</string>
<string>759</string>
<key>LSApplicationCategoryType</key>
<string>public.app-category.utilities</string>
<key>LSRequiresIPhoneOS</key>

View File

@ -21,6 +21,8 @@ class SpeechForwarder : SpeechForwarderProtocol {
private let logger = Logger()
private let audioQueue = DispatchQueue.global()
func startListening(connection : GSConnection) -> Bool {
SFSpeechRecognizer.requestAuthorization { authStatus in
OperationQueue.main.addOperation {
@ -71,18 +73,42 @@ class SpeechForwarder : SpeechForwarderProtocol {
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
let inputNode = audioEngine.inputNode
// Configure the microphone input.
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
// Create and configure the speech recognition request.
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.requiresOnDeviceRecognition = false
// Configure the microphone input.
let inputFormat = inputNode.outputFormat(forBus: 0)
let speechFormat = recognitionRequest.nativeAudioFormat
logger.debug("Recording format \(inputFormat), speech format \(speechFormat)")
var formatConverter: AVAudioConverter?
if (!inputFormat.isEqual(speechFormat)) {
formatConverter = AVAudioConverter(from:inputFormat, to: speechFormat)
formatConverter?.downmix = true
}
inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
guard let formatConverter = formatConverter else {
self.recognitionRequest?.append(buffer)
return
}
// self.recognitionRequest?.append(buffer)
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: speechFormat, frameCapacity: AVAudioFrameCount(Double(buffer.frameLength) * speechFormat.sampleRate / inputFormat.sampleRate))
var error: NSError? = nil
let inputBlock: AVAudioConverterInputBlock = {inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
formatConverter.convert(to: pcmBuffer!, error: &error, withInputFrom: inputBlock)
if error == nil {
self.recognitionRequest?.append(pcmBuffer!)
}
}
// Create a recognition task for the speech recognition session.
// Keep a reference to the task so that it can be canceled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { [weak connection] result, error in

View File

@ -364,7 +364,6 @@ class ListenerGSTests: XCTestCase {
XCTAssert(server.getDisconnect())
}
/* This test hangs at the getDisconnect() line at the end. Something is holding a connection reference.
func testDestructWhileListening() throws {
let server = GSServerMock()
@ -392,8 +391,9 @@ class ListenerGSTests: XCTestCase {
XCTAssert(!speechForwarder.isListening)
connection.listen(speechForwarder: speechForwarder)
XCTAssert(server.getListenState(isListening: true))
connection.waitForWriteQueue()
connection.waitForMain()
XCTAssert(server.getListenState(isListening: true))
XCTAssert(speechForwarder.isListening)
XCTAssertEqual(connection.state, .listening)
@ -403,7 +403,6 @@ class ListenerGSTests: XCTestCase {
XCTAssert(server.getDisconnect())
}
*/
/*
func testPerformanceExample() throws {