From 7f8328cc9b7c96127fe46ad97aa589033d7cb11c Mon Sep 17 00:00:00 2001 From: Jeremy Rand Date: Fri, 16 Jul 2021 22:58:41 -0400 Subject: [PATCH] Some basic code to turn on the speech recognizer on iOS and update the UI based on the text heard. This code is based on the SpokenWord demo app from Apple. --- ListenerApp.xcodeproj/project.pbxproj | 12 +++ ListenerApp/ContentView.swift | 123 +++++++++++++++++++++++++- ListenerApp/Info.plist | 4 +- 3 files changed, 137 insertions(+), 2 deletions(-) diff --git a/ListenerApp.xcodeproj/project.pbxproj b/ListenerApp.xcodeproj/project.pbxproj index 9c4a49e..f10fa66 100644 --- a/ListenerApp.xcodeproj/project.pbxproj +++ b/ListenerApp.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ 9D51560526A1EF7C0075EBC7 /* ListenerAppTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D51560426A1EF7C0075EBC7 /* ListenerAppTests.swift */; }; 9D51561026A1EF7C0075EBC7 /* ListenerAppUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D51560F26A1EF7C0075EBC7 /* ListenerAppUITests.swift */; }; 9D51562226A1F0DF0075EBC7 /* LICENSE in Resources */ = {isa = PBXBuildFile; fileRef = 9D51562126A1F0DF0075EBC7 /* LICENSE */; }; + 9D51563126A278BB0075EBC7 /* Speech.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9D51563026A278BB0075EBC7 /* Speech.framework */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -48,6 +49,7 @@ 9D51561126A1EF7C0075EBC7 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 9D51562126A1F0DF0075EBC7 /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; 9D51562A26A1F1B40075EBC7 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + 9D51563026A278BB0075EBC7 /* Speech.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Speech.framework; path = System/Library/Frameworks/Speech.framework; sourceTree = SDKROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -55,6 +57,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 9D51563126A278BB0075EBC7 /* Speech.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -84,6 +87,7 @@ 9D51560326A1EF7C0075EBC7 /* ListenerAppTests */, 9D51560E26A1EF7C0075EBC7 /* ListenerAppUITests */, 9D5155F026A1EF7B0075EBC7 /* Products */, + 9D51562F26A278BA0075EBC7 /* Frameworks */, ); sourceTree = ""; }; @@ -135,6 +139,14 @@ path = ListenerAppUITests; sourceTree = ""; }; + 9D51562F26A278BA0075EBC7 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 9D51563026A278BB0075EBC7 /* Speech.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ diff --git a/ListenerApp/ContentView.swift b/ListenerApp/ContentView.swift index 458f299..ef232fe 100644 --- a/ListenerApp/ContentView.swift +++ b/ListenerApp/ContentView.swift @@ -6,11 +6,132 @@ // import SwiftUI +import Speech struct ContentView: View { + @State private var listening = false + @State private var textHeard = "" + + private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))! + + @State private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? + + @State private var recognitionTask: SFSpeechRecognitionTask? + + private let audioEngine = AVAudioEngine() + var body: some View { - Text("Hello, world!") + VStack { + Label(textHeard, systemImage:"") + .labelStyle(TitleOnlyLabelStyle()) + .padding() + Button("Listen") { + listen() + } .padding() + .background(listening ? Color.red : Color.white) + .foregroundColor(listening ? .black : .blue) + } + } + + func listen() { + self.listening.toggle() + if (self.listening) { + SFSpeechRecognizer.requestAuthorization { authStatus in + // The authorization status results in changes to the + // app’s interface, so process the results on the app’s + // main queue. + OperationQueue.main.addOperation { + switch authStatus { + case .authorized: + break + + case .denied: + self.listening = false + break + + case .restricted: + self.listening = false + break + + case .notDetermined: + self.listening = false + break + + default: + self.listening = false + break + } + } + } + } + + if (self.listening) { + do { + try startRecording() + } + catch { + + } + } else { + audioEngine.stop() + recognitionRequest?.endAudio() + } + } + + private func startRecording() throws { + + // Cancel the previous task if it's running. + recognitionTask?.cancel() + self.recognitionTask = nil + + // Configure the audio session for the app. + let audioSession = AVAudioSession.sharedInstance() + try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers) + try audioSession.setActive(true, options: .notifyOthersOnDeactivation) + let inputNode = audioEngine.inputNode + + // Create and configure the speech recognition request. + recognitionRequest = SFSpeechAudioBufferRecognitionRequest() + guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") } + recognitionRequest.shouldReportPartialResults = true + + // Keep speech recognition data on device + if #available(iOS 13, *) { + recognitionRequest.requiresOnDeviceRecognition = false + } + + // Create a recognition task for the speech recognition session. + // Keep a reference to the task so that it can be canceled. + recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in + var isFinal = false + + if let result = result { + // Update the text view with the results. + self.textHeard = result.bestTranscription.formattedString + isFinal = result.isFinal + print("Text \(result.bestTranscription.formattedString)") + } + + if error != nil || isFinal { + // Stop recognizing speech if there is a problem. + self.audioEngine.stop() + inputNode.removeTap(onBus: 0) + + self.recognitionRequest = nil + self.recognitionTask = nil + self.listening = false + } + } + + // Configure the microphone input. + let recordingFormat = inputNode.outputFormat(forBus: 0) + inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in + self.recognitionRequest?.append(buffer) + } + + audioEngine.prepare() + try audioEngine.start() } } diff --git a/ListenerApp/Info.plist b/ListenerApp/Info.plist index 5565210..298ef92 100644 --- a/ListenerApp/Info.plist +++ b/ListenerApp/Info.plist @@ -17,9 +17,11 @@ CFBundleShortVersionString 1.0 CFBundleVersion - 2 + 29 LSRequiresIPhoneOS + NSSpeechRecognitionUsageDescription + The app uses speech recognition to dictate text to your Apple IIgs UIApplicationSceneManifest UIApplicationSupportsMultipleScenes