matrix-react-sdk

/* Copyright 2024 New Vector Ltd. Copyright 2021 The Matrix.org Foundation C.I.C. SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only Please see LICENSE files in the repository root for full details. */ import Recorder from "opus-recorder/dist/recorder.min.js"; import encoderPath from "opus-recorder/dist/encoderWorker.min.js"; import { SimpleObservable } from "matrix-widget-api"; import EventEmitter from "events"; import { logger } from "matrix-js-sdk/src/logger"; import MediaDeviceHandler from "../MediaDeviceHandler"; import { IDestroyable } from "../utils/IDestroyable"; import { Singleflight } from "../utils/Singleflight"; import { PayloadEvent, WORKLET_NAME } from "./consts"; import { UPDATE_EVENT } from "../stores/AsyncStore"; import { createAudioContext } from "./compat"; import { FixedRollingArray } from "../utils/FixedRollingArray"; import { clamp } from "../utils/numbers"; import recorderWorkletFactory from "./recorderWorkletFactory"; const CHANNELS = 1; // stereo isn't important export const SAMPLE_RATE = 48000; // 48khz is what WebRTC uses. 12khz is where we lose quality. const TARGET_MAX_LENGTH = 900; // 15 minutes in seconds. Somewhat arbitrary, though longer == larger files. const TARGET_WARN_TIME_LEFT = 10; // 10 seconds, also somewhat arbitrary. export const RECORDING_PLAYBACK_SAMPLES = 44; interface RecorderOptions { bitrate: number; encoderApplication: number; } export const voiceRecorderOptions: RecorderOptions = { bitrate: 24000, // recommended Opus bitrate for high-quality VoIP encoderApplication: 2048, // voice }; export const highQualityRecorderOptions: RecorderOptions = { bitrate: 96000, // recommended Opus bitrate for high-quality music/audio streaming encoderApplication: 2049, // full band audio }; export interface IRecordingUpdate { waveform: number[]; // floating points between 0 (low) and 1 (high). timeSeconds: number; // float } export enum RecordingState { Started = "started", EndingSoon = "ending_soon", // emits an object with a single numerical value: secondsLeft Ended = "ended", Uploading = "uploading", Uploaded = "uploaded", } export class VoiceRecording extends EventEmitter implements IDestroyable { private recorder?: Recorder; private recorderContext?: AudioContext; private recorderSource?: MediaStreamAudioSourceNode; private recorderStream?: MediaStream; private recorderWorklet?: AudioWorkletNode; private recorderProcessor?: ScriptProcessorNode; private recording = false; private observable?: SimpleObservable<IRecordingUpdate>; private targetMaxLength: number | null = TARGET_MAX_LENGTH; public amplitudes: number[] = []; // at each second mark, generated private liveWaveform = new FixedRollingArray(RECORDING_PLAYBACK_SAMPLES, 0); public onDataAvailable?: (data: ArrayBuffer) => void; public get contentType(): string { return "audio/ogg"; } public get durationSeconds(): number { if (!this.recorder || !this.recorderContext) throw new Error("Duration not available without a recording"); return this.recorderContext.currentTime; } public get isRecording(): boolean { return this.recording; } public emit(event: string, ...args: any[]): boolean { super.emit(event, ...args); super.emit(UPDATE_EVENT, event, ...args); return true; // we don't ever care if the event had listeners, so just return "yes" } public disableMaxLength(): void { this.targetMaxLength = null; } private shouldRecordInHighQuality(): boolean { // Non-voice use case is suspected when noise suppression is disabled by the user. // When recording complex audio, higher quality is required to avoid audio artifacts. // This is a really arbitrary decision, but it can be refined/replaced at any time. return !MediaDeviceHandler.getAudioNoiseSuppression(); } private async makeRecorder(): Promise<void> { try { this.recorderStream = await navigator.mediaDevices.getUserMedia({ audio: { channelCount: CHANNELS, deviceId: MediaDeviceHandler.getAudioInput(), autoGainControl: { ideal: MediaDeviceHandler.getAudioAutoGainControl() }, echoCancellation: { ideal: MediaDeviceHandler.getAudioEchoCancellation() }, noiseSuppression: { ideal: MediaDeviceHandler.getAudioNoiseSuppression() }, }, }); this.recorderContext = createAudioContext({ // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing) }); this.recorderSource = this.recorderContext.createMediaStreamSource(this.recorderStream); // Connect our inputs and outputs if (this.recorderContext.audioWorklet) { // Set up our worklet. We use this for timing information and waveform analysis: the // web audio API prefers this be done async to avoid holding the main thread with math. await recorderWorkletFactory(this.recorderContext); this.recorderWorklet = new AudioWorkletNode(this.recorderContext, WORKLET_NAME); this.recorderSource.connect(this.recorderWorklet); this.recorderWorklet.connect(this.recorderContext.destination); // Dev note: we can't use `addEventListener` for some reason. It just doesn't work. this.recorderWorklet.port.onmessage = (ev) => { switch (ev.data["ev"]) { case PayloadEvent.Timekeep: this.processAudioUpdate(ev.data["timeSeconds"]); break; case PayloadEvent.AmplitudeMark: // Sanity check to make sure we're adding about one sample per second if (ev.data["forIndex"] === this.amplitudes.length) { this.amplitudes.push(ev.data["amplitude"]); this.liveWaveform.pushValue(ev.data["amplitude"]); } break; } }; } else { // Safari fallback: use a processor node instead, buffered to 1024 bytes of data // like the worklet is. this.recorderProcessor = this.recorderContext.createScriptProcessor(1024, CHANNELS, CHANNELS); this.recorderSource.connect(this.recorderProcessor); this.recorderProcessor.connect(this.recorderContext.destination); this.recorderProcessor.addEventListener("audioprocess", this.onAudioProcess); } const recorderOptions = this.shouldRecordInHighQuality() ? highQualityRecorderOptions : voiceRecorderOptions; const { encoderApplication, bitrate } = recorderOptions; this.recorder = new Recorder({ encoderPath, // magic from webpack encoderSampleRate: SAMPLE_RATE, encoderApplication: encoderApplication, streamPages: true, // this speeds up the encoding process by using CPU over time encoderFrameSize: 20, // ms, arbitrary frame size we send to the encoder numberOfChannels: CHANNELS, sourceNode: this.recorderSource, encoderBitRate: bitrate, // We use low values for the following to ease CPU usage - the resulting waveform // is indistinguishable for a voice message. Note that the underlying library will // pick defaults which prefer the highest possible quality, CPU be damned. encoderComplexity: 3, // 0-10, 10 is slow and high quality. resampleQuality: 3, // 0-10, 10 is slow and high quality }); // not using EventEmitter here because it leads to detached bufferes this.recorder.ondataavailable = (data: ArrayBuffer) => this.onDataAvailable?.(data); } catch (e) { logger.error("Error starting recording: ", e); if (e instanceof DOMException) { // Unhelpful DOMExceptions are common - parse them sanely logger.error(`${e.name} (${e.code}): ${e.message}`); } // Clean up as best as possible if (this.recorderStream) this.recorderStream.getTracks().forEach((t) => t.stop()); if (this.recorderSource) this.recorderSource.disconnect(); if (this.recorder) this.recorder.close(); if (this.recorderContext) { // noinspection ES6MissingAwait - not important that we wait this.recorderContext.close(); } throw e; // rethrow so upstream can handle it } } public get liveData(): SimpleObservable<IRecordingUpdate> { if (!this.recording || !this.observable) throw new Error("No observable when not recording"); return this.observable; } public get isSupported(): boolean { return !!Recorder.isRecordingSupported(); } private onAudioProcess = (ev: AudioProcessingEvent): void => { this.processAudioUpdate(ev.playbackTime); // We skip the functionality of the worklet regarding waveform calculations: we // should get that information pretty quick during the playback info. }; private processAudioUpdate = (timeSeconds: number): void => { if (!this.recording) return; this.observable!.update({ waveform: this.liveWaveform.value.map((v) => clamp(v, 0, 1)), timeSeconds: timeSeconds, }); // Now that we've updated the data/waveform, let's do a time check. We don't want to // go horribly over the limit. We also emit a warning state if needed. // // We use the recorder's perspective of time to make sure we don't cut off the last // frame of audio, otherwise we end up with a 14:59 clip (899.68 seconds). This extra // safety can allow us to overshoot the target a bit, but at least when we say 15min // maximum we actually mean it. // // In testing, recorder time and worker time lag by about 400ms, which is roughly the // time needed to encode a sample/frame. // if (!this.targetMaxLength) { // skip time checks if max length has been disabled return; } const secondsLeft = TARGET_MAX_LENGTH - this.recorderSeconds!; if (secondsLeft < 0) { // go over to make sure we definitely capture that last frame // noinspection JSIgnoredPromiseFromCall - we aren't concerned with it overlapping this.stop(); } else if (secondsLeft <= TARGET_WARN_TIME_LEFT) { Singleflight.for(this, "ending_soon").do(() => { this.emit(RecordingState.EndingSoon, { secondsLeft }); return Singleflight.Void; }); } }; /** * {@link https://github.com/chris-rudmin/opus-recorder#instance-fields ref for recorderSeconds} */ public get recorderSeconds(): number | undefined { if (!this.recorder) return undefined; return this.recorder.encodedSamplePosition / 48000; } public async start(): Promise<void> { if (this.recording) { throw new Error("Recording already in progress"); } if (this.observable) { this.observable.close(); } this.observable = new SimpleObservable<IRecordingUpdate>(); await this.makeRecorder(); await this.recorder?.start(); this.recording = true; this.emit(RecordingState.Started); } public async stop(): Promise<void> { return Singleflight.for(this, "stop").do(async (): Promise<void> => { if (!this.recording) { throw new Error("No recording to stop"); } // Disconnect the source early to start shutting down resources await this.recorder!.stop(); // stop first to flush the last frame this.recorderSource!.disconnect(); if (this.recorderWorklet) this.recorderWorklet.disconnect(); if (this.recorderProcessor) { this.recorderProcessor.disconnect(); this.recorderProcessor.removeEventListener("audioprocess", this.onAudioProcess); } // close the context after the recorder so the recorder doesn't try to // connect anything to the context (this would generate a warning) await this.recorderContext!.close(); // Now stop all the media tracks so we can release them back to the user/OS this.recorderStream!.getTracks().forEach((t) => t.stop()); // Finally do our post-processing and clean up this.recording = false; await this.recorder!.close(); this.emit(RecordingState.Ended); }); } public destroy(): void { // noinspection JSIgnoredPromiseFromCall - not concerned about stop() being called async here this.stop(); this.removeAllListeners(); this.onDataAvailable = undefined; Singleflight.forgetAllFor(this); // noinspection JSIgnoredPromiseFromCall - not concerned about being called async here this.observable?.close(); } }