Some basic code to turn on the speech recognizer on iOS and update the UI based on the text heard. This code is based on the SpokenWord demo app from Apple.

2024-12-30 12:29:52 +00:00 · 2021-07-16 22:58:41 -04:00 · 2021-07-16 22:58:41 -04:00 · 7f8328cc9b
commit 7f8328cc9b
parent da478782d4
3 changed files with 137 additions and 2 deletions
--- a/ListenerApp.xcodeproj/project.pbxproj
+++ b/ListenerApp.xcodeproj/project.pbxproj
@ -14,6 +14,7 @@
 		9D51560526A1EF7C0075EBC7 /* ListenerAppTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D51560426A1EF7C0075EBC7 /* ListenerAppTests.swift */; };
 		9D51561026A1EF7C0075EBC7 /* ListenerAppUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D51560F26A1EF7C0075EBC7 /* ListenerAppUITests.swift */; };
 		9D51562226A1F0DF0075EBC7 /* LICENSE in Resources */ = {isa = PBXBuildFile; fileRef = 9D51562126A1F0DF0075EBC7 /* LICENSE */; };
+		9D51563126A278BB0075EBC7 /* Speech.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9D51563026A278BB0075EBC7 /* Speech.framework */; };
 /* End PBXBuildFile section */

 /* Begin PBXContainerItemProxy section */
@ -48,6 +49,7 @@
 		9D51561126A1EF7C0075EBC7 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
 		9D51562126A1F0DF0075EBC7 /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
 		9D51562A26A1F1B40075EBC7 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
+		9D51563026A278BB0075EBC7 /* Speech.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Speech.framework; path = System/Library/Frameworks/Speech.framework; sourceTree = SDKROOT; };
 /* End PBXFileReference section */

 /* Begin PBXFrameworksBuildPhase section */
@ -55,6 +57,7 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				9D51563126A278BB0075EBC7 /* Speech.framework in Frameworks */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@ -84,6 +87,7 @@
 				9D51560326A1EF7C0075EBC7 /* ListenerAppTests */,
 				9D51560E26A1EF7C0075EBC7 /* ListenerAppUITests */,
 				9D5155F026A1EF7B0075EBC7 /* Products */,
+				9D51562F26A278BA0075EBC7 /* Frameworks */,
 			);
 			sourceTree = "<group>";
 		};
@ -135,6 +139,14 @@
 			path = ListenerAppUITests;
 			sourceTree = "<group>";
 		};
+		9D51562F26A278BA0075EBC7 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				9D51563026A278BB0075EBC7 /* Speech.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */

 /* Begin PBXNativeTarget section */
--- a/ListenerApp/ContentView.swift
+++ b/ListenerApp/ContentView.swift
@ -6,11 +6,132 @@
 //

 import SwiftUI
+import Speech

 struct ContentView: View {
+    @State private var listening = false
+    @State private var textHeard = ""
+    
+    private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))!
+    
+    @State private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
+    
+    @State private var recognitionTask: SFSpeechRecognitionTask?
+    
+    private let audioEngine = AVAudioEngine()
+    
    var body: some View {
-        Text("Hello, world!")
+        VStack {
+            Label(textHeard, systemImage:"")
+                .labelStyle(TitleOnlyLabelStyle())
+                .padding()
+            Button("Listen") {
+                listen()
+            }
            .padding()
+            .background(listening ? Color.red : Color.white)
+            .foregroundColor(listening ? .black : .blue)
+        }
+    }
+    
+    func listen() {
+        self.listening.toggle()
+        if (self.listening) {
+            SFSpeechRecognizer.requestAuthorization { authStatus in
+                // The authorization status results in changes to the
+                // app’s interface, so process the results on the app’s
+                // main queue.
+                OperationQueue.main.addOperation {
+                switch authStatus {
+                    case .authorized:
+                        break
+                        
+                    case .denied:
+                        self.listening = false
+                        break
+
+                    case .restricted:
+                        self.listening = false
+                        break
+
+                    case .notDetermined:
+                        self.listening = false
+                        break
+                        
+                    default:
+                        self.listening = false
+                        break
+                    }
+                }
+            }
+        }
+        
+        if (self.listening) {
+            do {
+                try startRecording()
+            }
+            catch {
+                
+            }
+        } else {
+            audioEngine.stop()
+            recognitionRequest?.endAudio()
+        }
+    }
+
+    private func startRecording() throws {
+        
+        // Cancel the previous task if it's running.
+        recognitionTask?.cancel()
+        self.recognitionTask = nil
+        
+        // Configure the audio session for the app.
+        let audioSession = AVAudioSession.sharedInstance()
+        try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
+        try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
+        let inputNode = audioEngine.inputNode
+
+        // Create and configure the speech recognition request.
+        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
+        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
+        recognitionRequest.shouldReportPartialResults = true
+        
+        // Keep speech recognition data on device
+        if #available(iOS 13, *) {
+            recognitionRequest.requiresOnDeviceRecognition = false
+        }
+        
+        // Create a recognition task for the speech recognition session.
+        // Keep a reference to the task so that it can be canceled.
+        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
+            var isFinal = false
+            
+            if let result = result {
+                // Update the text view with the results.
+                self.textHeard = result.bestTranscription.formattedString
+                isFinal = result.isFinal
+                print("Text \(result.bestTranscription.formattedString)")
+            }
+            
+            if error != nil || isFinal {
+                // Stop recognizing speech if there is a problem.
+                self.audioEngine.stop()
+                inputNode.removeTap(onBus: 0)
+
+                self.recognitionRequest = nil
+                self.recognitionTask = nil
+                self.listening = false
+            }
+        }
+
+        // Configure the microphone input.
+        let recordingFormat = inputNode.outputFormat(forBus: 0)
+        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
+            self.recognitionRequest?.append(buffer)
+        }
+        
+        audioEngine.prepare()
+        try audioEngine.start()
    }
 }

--- a/ListenerApp/Info.plist
+++ b/ListenerApp/Info.plist
@ -17,9 +17,11 @@
 	<key>CFBundleShortVersionString</key>
 	<string>1.0</string>
 	<key>CFBundleVersion</key>
-	<string>2</string>
+	<string>29</string>
 	<key>LSRequiresIPhoneOS</key>
 	<true/>
+	<key>NSSpeechRecognitionUsageDescription</key>
+	<string>The app uses speech recognition to dictate text to your Apple IIgs</string>
 	<key>UIApplicationSceneManifest</key>
 	<dict>
 		<key>UIApplicationSupportsMultipleScenes</key>