Fix the bug which did not allow the connection to go down when switching to another destination when the previous destination was in listen mode. Fix the problem with supporting all input types on my iMac. I have added code which converts from the audio format of the input to the preferred audio format of the speech recognizer.

This commit is contained in:
Jeremy Rand 2022-03-24 22:58:11 -04:00
parent 109213a577
commit 7279d7ecb1
5 changed files with 50 additions and 14 deletions

View File

@ -20,6 +20,7 @@ enum GSConnectionState {
case connected case connected
case listening case listening
case stoplistening case stoplistening
case deleting
} }
extension GSConnectionState: CustomStringConvertible extension GSConnectionState: CustomStringConvertible
@ -36,6 +37,8 @@ extension GSConnectionState: CustomStringConvertible
return "listening" return "listening"
case .stoplistening: case .stoplistening:
return "stop listening" return "stop listening"
case .deleting:
return "deleting"
} }
} }
} }
@ -95,6 +98,9 @@ class GSConnection : ObservableObject {
case .stoplistening: case .stoplistening:
legalTransition = ((oldState == .connected) || (oldState == .listening)) legalTransition = ((oldState == .connected) || (oldState == .listening))
case .deleting:
legalTransition = true
} }
if (!legalTransition) { if (!legalTransition) {
@ -182,6 +188,7 @@ class GSConnection : ObservableObject {
} }
deinit { deinit {
changeState(newState:.deleting)
disconnect() disconnect()
} }
@ -197,7 +204,10 @@ class GSConnection : ObservableObject {
waitForWriteQueue() waitForWriteQueue()
waitForReadQueue() waitForReadQueue()
self.changeState(newState:.disconnected)
if (state != .deleting) {
changeState(newState:.disconnected)
}
} }
func stopListening() { func stopListening() {
@ -230,7 +240,8 @@ class GSConnection : ObservableObject {
func listen(speechForwarder: SpeechForwarderProtocol) { func listen(speechForwarder: SpeechForwarderProtocol) {
textHeard = "" textHeard = ""
lastSent = "" lastSent = ""
writeQueue.addOperation { writeQueue.addOperation { [weak self] in
guard let self = self else { return }
if (!self.sendListenMsg(isListening: true)) { if (!self.sendListenMsg(isListening: true)) {
self.errorOccurred(title: "Write Error", message: "Unable to send data to the GS") self.errorOccurred(title: "Write Error", message: "Unable to send data to the GS")
return return

View File

@ -56,7 +56,7 @@ struct GSView: View {
.disabled(true) .disabled(true)
.buttonStyle(GSButtonStyle()) .buttonStyle(GSButtonStyle())
case .connected, .listening, .stoplistening: case .connected, .listening, .stoplistening, .deleting:
Button("\(Image(systemName: "desktopcomputer.trianglebadge.exclamationmark")) Disconnect from \(ipAddress)") { Button("\(Image(systemName: "desktopcomputer.trianglebadge.exclamationmark")) Disconnect from \(ipAddress)") {
connection.disconnect() connection.disconnect()
} }
@ -66,7 +66,7 @@ struct GSView: View {
switch (connection.state) switch (connection.state)
{ {
case .disconnected, .stoplistening, .connecting: case .disconnected, .stoplistening, .connecting, .deleting:
Button("\(Image(systemName: "ear.and.waveform")) Listen and Send Text") { Button("\(Image(systemName: "ear.and.waveform")) Listen and Send Text") {
} }
.disabled(true) .disabled(true)

View File

@ -19,7 +19,7 @@
<key>CFBundleShortVersionString</key> <key>CFBundleShortVersionString</key>
<string>1.0</string> <string>1.0</string>
<key>CFBundleVersion</key> <key>CFBundleVersion</key>
<string>645</string> <string>759</string>
<key>LSApplicationCategoryType</key> <key>LSApplicationCategoryType</key>
<string>public.app-category.utilities</string> <string>public.app-category.utilities</string>
<key>LSRequiresIPhoneOS</key> <key>LSRequiresIPhoneOS</key>

View File

@ -21,6 +21,8 @@ class SpeechForwarder : SpeechForwarderProtocol {
private let logger = Logger() private let logger = Logger()
private let audioQueue = DispatchQueue.global()
func startListening(connection : GSConnection) -> Bool { func startListening(connection : GSConnection) -> Bool {
SFSpeechRecognizer.requestAuthorization { authStatus in SFSpeechRecognizer.requestAuthorization { authStatus in
OperationQueue.main.addOperation { OperationQueue.main.addOperation {
@ -71,18 +73,42 @@ class SpeechForwarder : SpeechForwarderProtocol {
try audioSession.setActive(true, options: .notifyOthersOnDeactivation) try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
let inputNode = audioEngine.inputNode let inputNode = audioEngine.inputNode
// Configure the microphone input.
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
// Create and configure the speech recognition request. // Create and configure the speech recognition request.
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") } guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true recognitionRequest.shouldReportPartialResults = true
recognitionRequest.requiresOnDeviceRecognition = false recognitionRequest.requiresOnDeviceRecognition = false
// Configure the microphone input.
let inputFormat = inputNode.outputFormat(forBus: 0)
let speechFormat = recognitionRequest.nativeAudioFormat
logger.debug("Recording format \(inputFormat), speech format \(speechFormat)")
var formatConverter: AVAudioConverter?
if (!inputFormat.isEqual(speechFormat)) {
formatConverter = AVAudioConverter(from:inputFormat, to: speechFormat)
formatConverter?.downmix = true
}
inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
guard let formatConverter = formatConverter else {
self.recognitionRequest?.append(buffer)
return
}
// self.recognitionRequest?.append(buffer)
let pcmBuffer = AVAudioPCMBuffer(pcmFormat: speechFormat, frameCapacity: AVAudioFrameCount(Double(buffer.frameLength) * speechFormat.sampleRate / inputFormat.sampleRate))
var error: NSError? = nil
let inputBlock: AVAudioConverterInputBlock = {inNumPackets, outStatus in
outStatus.pointee = AVAudioConverterInputStatus.haveData
return buffer
}
formatConverter.convert(to: pcmBuffer!, error: &error, withInputFrom: inputBlock)
if error == nil {
self.recognitionRequest?.append(pcmBuffer!)
}
}
// Create a recognition task for the speech recognition session. // Create a recognition task for the speech recognition session.
// Keep a reference to the task so that it can be canceled. // Keep a reference to the task so that it can be canceled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { [weak connection] result, error in recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { [weak connection] result, error in

View File

@ -364,7 +364,6 @@ class ListenerGSTests: XCTestCase {
XCTAssert(server.getDisconnect()) XCTAssert(server.getDisconnect())
} }
/* This test hangs at the getDisconnect() line at the end. Something is holding a connection reference.
func testDestructWhileListening() throws { func testDestructWhileListening() throws {
let server = GSServerMock() let server = GSServerMock()
@ -392,8 +391,9 @@ class ListenerGSTests: XCTestCase {
XCTAssert(!speechForwarder.isListening) XCTAssert(!speechForwarder.isListening)
connection.listen(speechForwarder: speechForwarder) connection.listen(speechForwarder: speechForwarder)
XCTAssert(server.getListenState(isListening: true)) connection.waitForWriteQueue()
connection.waitForMain() connection.waitForMain()
XCTAssert(server.getListenState(isListening: true))
XCTAssert(speechForwarder.isListening) XCTAssert(speechForwarder.isListening)
XCTAssertEqual(connection.state, .listening) XCTAssertEqual(connection.state, .listening)
@ -403,7 +403,6 @@ class ListenerGSTests: XCTestCase {
XCTAssert(server.getDisconnect()) XCTAssert(server.getDisconnect())
} }
*/
/* /*
func testPerformanceExample() throws { func testPerformanceExample() throws {