[Code Snippets #2] iOS Speech Recognition

[Code Snippets #2] iOS Speech Recognition

·

4 min read

  • Import the import Speech framework on the top of your class:
import Speech
  • Declare variables:
let audioEngine = AVAudioEngine()
var speechRecognizer = SFSpeechRecognizer()
let recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
var recognitionTask: SFSpeechRecognitionTask?
var speechSynth = AVSpeechSynthesizer()
var isRecording = false
var isPlaying = false
var speechStr = ""
  • Inside a Button's action:
let locale = "\(Locale.current)"
speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: locale))
startRecording()
  • Function to listen and recognize speech:
func startRecording() {
    isRecording = !isRecording

    // Start recording
    if isRecording {
        // Init Audio Engine
        let node = audioEngine.inputNode
        let recordingFormat = node.outputFormat(forBus: 0)
        node.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, _ in
            self.recognitionRequest.append(buffer)
        }
        audioEngine.prepare()

        do { try audioEngine.start()
        } catch {
            self.stopRecording()
            print("There has been an audio engine error: \(error.localizedDescription)")
        }
        guard let myRecognizer = SFSpeechRecognizer() else {
            self.stopRecording()
            return print("Speech recognition is not supported for your current locale.")
        }
        if !myRecognizer.isAvailable {
            self.isRecording = false
            self.stopRecording()
            return print("Speech recognition is not currently available. Check back at a later time.")
        }

        // Show the recognized string
        recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { result, error in
            if let result = result {
                 // Get speech and display it as text in the TextView
                let bestString = result.bestTranscription.formattedString
                speechStr = bestString

            // error
            } else if let error = error {
                print("There has been a speech recognition error: \(error.localizedDescription)")
            }
        })

    // Stop Recording
    } else { stopRecording() }
}
  • Function called to stop recording:
func stopRecording() {    
    isRecording = false
    recognitionTask?.finish()
    recognitionTask = nil
    recognitionRequest.endAudio()
    audioEngine.stop()
    audioEngine.reset()
    audioEngine.inputNode.removeTap(onBus: 0)
}
  • Inside a Button's action:
isPlaying = !isPlaying
// Start playing
if isPlaying {
    // speechStr is not empty
    if speechStr != "" {
        let locale = "\(Locale.current)"

        // Set Audio Session
        let session = AVAudioSession.sharedInstance()
        do { try session.overrideOutputAudioPort(AVAudioSession.PortOverride.speaker)
            try session.setPreferredSampleRate(44100)
        } catch { print(error) }

        speechSynth.delegate = self
        let speechUtterance = AVSpeechUtterance(string: speechTxt.text!)
        speechUtterance.rate = 0.5
        speechUtterance.volume = 1.0
        speechUtterance.voice = AVSpeechSynthesisVoice(language: locale)
        speechSynth.speak(speechUtterance)

        // speechStr is empty...
        } else { print("Nothing to talk about here...") }

    // Stop playing
    } else {
        speechSynth.stopSpeaking(at: .immediate)
    }
`
  • Speech synth stopped speaking:
 func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        print("SpeechSynth stopped")
    }
  • Speech Recognition permissions:
func requestSpeechAuthorization() {
        SFSpeechRecognizer.requestAuthorization { authStatus in OperationQueue.main.addOperation {
            switch authStatus {
                case .authorized:
                    print("Speech recognition is authorized!")
                case .denied:
                    print("User denied access to speech recognition")
                case .restricted:
                    print("Speech recognition restricted on this device")
                case .notDetermined:
                    print("Speech recognition not yet authorized")
            default:break
            }
        }}
    }