apphouse

import { action, computed, makeObservable, observable } from 'mobx'; import { Recorder } from './Recorder'; import Microphone, { MicrophoneOption } from './Microphone'; import Visualizer from './Visualizer'; import AudioSource from './AudioSource'; import Player from './Player'; import Speech, { GrammarItemType } from './Speech'; import { audioLog } from './AudioLog'; export enum AudioMeterType { frequency = 'frequency', wave = 'wave', bar = 'bar' } interface NonMatchedMemos { text: string; url: string; } export default class ContinuousSpeechRecognitionRecorder extends Speech { audioSource: AudioSource; endRecording: boolean; language: string; matchingId?: string; microphone: Microphone; onAudioSegmentAvailable?: (blob: Blob, matchingId?: string) => void; otherMemos: NonMatchedMemos[]; player: Player; recIndex: number; recordedAudioBlob: Blob | undefined; recordedAudioUrl: string; recorder: Recorder; shouldAutoRestartOnEndedPrematurely: boolean; spokenText?: string; visualizer: Visualizer; visualizerId: string; voiceMemos: Record<string, string[]>; constructor( grammar: GrammarItemType[], visualizerId: string, onAudioSegmentAvailable?: (blob: Blob, matchingId?: string) => void, shouldAutoRestartOnEndedPrematurely?: boolean, lang?: string ) { super(grammar); this.audioSource = new AudioSource(); this.endRecording = false; this.language = lang || 'en'; this.matchingId = undefined; this.microphone = new Microphone(); this.onAudioSegmentAvailable = onAudioSegmentAvailable; this.otherMemos = []; this.player = new Player(); this.recIndex = 0; // for continuous recording, all recordings will be in the result array, we need to know where we are to be able to retrieve the correct result from the result array this.recorder = new Recorder(this.onRecordingComplete); this.shouldAutoRestartOnEndedPrematurely = shouldAutoRestartOnEndedPrematurely || false; this.spokenText = undefined; this.visualizer = new Visualizer(); this.visualizerId = visualizerId; this.voiceMemos = {}; if (typeof window != 'undefined') { this.recordedAudioBlob = new Blob(); } else { this.recordedAudioBlob = undefined; } this.recordedAudioUrl = ''; makeObservable(this, { shouldAutoRestartOnEndedPrematurely: observable, onAudioSegmentAvailable: observable, visualizerId: observable, language: observable, audioSource: observable, currentVoiceMemos: computed, matchingId: observable, onRecordingComplete: action, onResult: action, otherMemos: observable, recIndex: observable, recordedAudioBlob: observable, recordedAudioUrl: observable, recorder: observable, setRecordedAudioUrl: action, spokenText: observable, startRecording: action, stopRecording: action, visualizer: observable, voiceMemos: observable, showAudioMeter: observable, onInputChange: action, stopStreaming: action }); } get currentVoiceMemos() { return this.voiceMemos; } get audioUrl() { return this.recordedAudioUrl; } init = () => { this.recorder.init(this.microphone.stream); }; setRecordedAudioUrl = (url: string) => { this.recordedAudioUrl = url; }; onRecordingComplete = (audio: Blob) => { if (this.endRecording) { return; } this.recordedAudioBlob = audio; const url = URL.createObjectURL(audio); this.setRecordedAudioUrl(url); if (this.matchingId) { if (this.voiceMemos[this.matchingId]) { this.voiceMemos[this.matchingId] = [ ...this.voiceMemos[this.matchingId], url ]; } else { this.voiceMemos[this.matchingId] = [url]; } } else { if (this.spokenText) { this.otherMemos = [...this.otherMemos, { text: this.spokenText, url }]; } } if (this.onAudioSegmentAvailable) { this.onAudioSegmentAvailable(audio, this.matchingId); } this.startRecording(); }; start = () => { this.endRecording = false; this.spokenText = undefined; this.recognition.continuous = true; this.recognition.lang = this.language; this.recognition.onresult = this.onResult; this.recognition.start(); this.startRecording(); }; startRecording = () => { const started = this.microphone.startStream(); if (started) { started.then(() => { this.audioSource.initContext(); if (this.microphone.isReady) { const stream = this.microphone.stream; if (stream) { const source = this.audioSource.initWithStream(stream); source.then(() => { if (this.audioSource.analyzer && this.audioSource.hasSource) { this.recorder.init(stream); this.recorder.start(); const canvas = document.getElementById( this.visualizerId ) as HTMLCanvasElement; if (canvas) { this.visualizer.setCanvas(canvas); this.visualizer.visualizeFreqBar(this.audioSource.analyzer); } } }); } } }); } }; showAudioMeter = (meterType?: AudioMeterType) => { const started = this.microphone.startStream(); if (started) { started.then(() => { this.audioSource.initContext(); if (this.microphone.isReady) { const stream = this.microphone.stream; if (stream) { const source = this.audioSource.initWithStream(stream); source.then(() => { if (this.audioSource.analyzer && this.audioSource.hasSource) { const canvas = document.getElementById( this.visualizerId ) as HTMLCanvasElement; if (canvas) { this.visualizer.setCanvas(canvas); if (meterType === AudioMeterType.wave) { this.visualizer.visualizeOsciloscope( this.audioSource.analyzer ); } else if (meterType === AudioMeterType.bar) { this.visualizer.visualizeAudioMeter( this.audioSource.analyzer ); } else { this.visualizer.visualizeFreqBar(this.audioSource.analyzer); } } } }); } } }); } }; onInputChange = async (selectedInput: MicrophoneOption, record: boolean) => { await this.microphone.onMicrophoneChange(selectedInput); if (record) { this.startRecording(); } else { this.showAudioMeter(AudioMeterType.bar); } }; stopStreaming = () => { this.microphone.stopStreaming(); }; onResult = (event: any) => { // The SpeechRecognitionEvent results property returns a SpeechRecognitionResultList object // The SpeechRecognitionResultList object contains SpeechRecognitionResult objects. // It has a getter so it can be accessed like an array // The first [0] returns the SpeechRecognitionResult at the last position. // Each SpeechRecognitionResult object contains SpeechRecognitionAlternative objects that contain individual results. // These also have getters so they can be accessed like arrays. // The second [0] returns the SpeechRecognitionAlternative at position 0. // We then return the transcript property of the SpeechRecognitionAlternative object if (event.results[this.recIndex]) { const spokenText = event.results[this.recIndex][0].transcript; const result = Speech.cleanKey(spokenText); audioLog.onResult(result); let matchingId: any; this.spokenText = result; if (this.gramar[result]) { // perfect match found matchingId = this.gramar[result].id; audioLog.onSpeechGrammarMatch(true, matchingId, result); } else { // try to find closest match const closestMatchId = Speech.findClosestMatch( this.dictionaryLookupTable, result ); matchingId = closestMatchId; audioLog.onSpeechGrammarMatch(false, matchingId, result); } if (result) console.log( 'Confidence: ' + event.results[this.recIndex][0].confidence ); this.incrementIndex(); this.stopRecording(matchingId); } else { console.warn( 'ERROR:', this.recIndex, event.results, 'event.results[this.recIndex] is undefined' ); } }; onSpeechEnd = () => { audioLog.onSpeechEnd(); this.stopRecording(); if (this.shouldAutoRestartOnEndedPrematurely) { this.startRecording(); } }; stopRecording = (matchedItemId?: string) => { this.matchingId = matchedItemId; this.recorder.stop(); }; incrementIndex = () => { this.recIndex = this.recIndex + 1; }; stop = () => { this.endRecording = true; this.recorder.stop(); this.recognition.stop(); this.microphone.stopStreaming(); this.recIndex = 0; }; }