Some basic code to turn on the speech recognizer on iOS and update the UI based on the text heard. This code is based on the SpokenWord demo app from Apple.

This commit is contained in:
Jeremy Rand 2021-07-16 22:58:41 -04:00
parent da478782d4
commit 7f8328cc9b
3 changed files with 137 additions and 2 deletions

View File

@ -14,6 +14,7 @@
9D51560526A1EF7C0075EBC7 /* ListenerAppTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D51560426A1EF7C0075EBC7 /* ListenerAppTests.swift */; };
9D51561026A1EF7C0075EBC7 /* ListenerAppUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9D51560F26A1EF7C0075EBC7 /* ListenerAppUITests.swift */; };
9D51562226A1F0DF0075EBC7 /* LICENSE in Resources */ = {isa = PBXBuildFile; fileRef = 9D51562126A1F0DF0075EBC7 /* LICENSE */; };
9D51563126A278BB0075EBC7 /* Speech.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9D51563026A278BB0075EBC7 /* Speech.framework */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
@ -48,6 +49,7 @@
9D51561126A1EF7C0075EBC7 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
9D51562126A1F0DF0075EBC7 /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = "<group>"; };
9D51562A26A1F1B40075EBC7 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
9D51563026A278BB0075EBC7 /* Speech.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Speech.framework; path = System/Library/Frameworks/Speech.framework; sourceTree = SDKROOT; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -55,6 +57,7 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
9D51563126A278BB0075EBC7 /* Speech.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -84,6 +87,7 @@
9D51560326A1EF7C0075EBC7 /* ListenerAppTests */,
9D51560E26A1EF7C0075EBC7 /* ListenerAppUITests */,
9D5155F026A1EF7B0075EBC7 /* Products */,
9D51562F26A278BA0075EBC7 /* Frameworks */,
);
sourceTree = "<group>";
};
@ -135,6 +139,14 @@
path = ListenerAppUITests;
sourceTree = "<group>";
};
9D51562F26A278BA0075EBC7 /* Frameworks */ = {
isa = PBXGroup;
children = (
9D51563026A278BB0075EBC7 /* Speech.framework */,
);
name = Frameworks;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */

View File

@ -6,11 +6,132 @@
//
import SwiftUI
import Speech
struct ContentView: View {
@State private var listening = false
@State private var textHeard = ""
private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))!
@State private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
@State private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
var body: some View {
Text("Hello, world!")
VStack {
Label(textHeard, systemImage:"")
.labelStyle(TitleOnlyLabelStyle())
.padding()
Button("Listen") {
listen()
}
.padding()
.background(listening ? Color.red : Color.white)
.foregroundColor(listening ? .black : .blue)
}
}
func listen() {
self.listening.toggle()
if (self.listening) {
SFSpeechRecognizer.requestAuthorization { authStatus in
// The authorization status results in changes to the
// apps interface, so process the results on the apps
// main queue.
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
break
case .denied:
self.listening = false
break
case .restricted:
self.listening = false
break
case .notDetermined:
self.listening = false
break
default:
self.listening = false
break
}
}
}
}
if (self.listening) {
do {
try startRecording()
}
catch {
}
} else {
audioEngine.stop()
recognitionRequest?.endAudio()
}
}
private func startRecording() throws {
// Cancel the previous task if it's running.
recognitionTask?.cancel()
self.recognitionTask = nil
// Configure the audio session for the app.
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
let inputNode = audioEngine.inputNode
// Create and configure the speech recognition request.
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
recognitionRequest.shouldReportPartialResults = true
// Keep speech recognition data on device
if #available(iOS 13, *) {
recognitionRequest.requiresOnDeviceRecognition = false
}
// Create a recognition task for the speech recognition session.
// Keep a reference to the task so that it can be canceled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
// Update the text view with the results.
self.textHeard = result.bestTranscription.formattedString
isFinal = result.isFinal
print("Text \(result.bestTranscription.formattedString)")
}
if error != nil || isFinal {
// Stop recognizing speech if there is a problem.
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.listening = false
}
}
// Configure the microphone input.
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
}
}

View File

@ -17,9 +17,11 @@
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>2</string>
<string>29</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>NSSpeechRecognitionUsageDescription</key>
<string>The app uses speech recognition to dictate text to your Apple IIgs</string>
<key>UIApplicationSceneManifest</key>
<dict>
<key>UIApplicationSupportsMultipleScenes</key>