matrix-react-sdk

/* Copyright 2024 New Vector Ltd. Copyright 2021 The Matrix.org Foundation C.I.C. SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only Please see LICENSE files in the repository root for full details. */ import EventEmitter from "events"; import { SimpleObservable } from "matrix-widget-api"; import { logger } from "matrix-js-sdk/src/logger"; import { defer } from "matrix-js-sdk/src/utils"; import { UPDATE_EVENT } from "../stores/AsyncStore"; import { arrayFastResample } from "../utils/arrays"; import { IDestroyable } from "../utils/IDestroyable"; import { PlaybackClock } from "./PlaybackClock"; import { createAudioContext, decodeOgg } from "./compat"; import { clamp } from "../utils/numbers"; import { DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES } from "./consts"; import { PlaybackEncoder } from "../PlaybackEncoder"; export enum PlaybackState { Decoding = "decoding", Stopped = "stopped", // no progress on timeline Paused = "paused", // some progress on timeline Playing = "playing", // active progress through timeline } const THUMBNAIL_WAVEFORM_SAMPLES = 100; // arbitrary: [30,120] export interface PlaybackInterface { readonly currentState: PlaybackState; readonly liveData: SimpleObservable<number[]>; readonly timeSeconds: number; readonly durationSeconds: number; skipTo(timeSeconds: number): Promise<void>; } export class Playback extends EventEmitter implements IDestroyable, PlaybackInterface { /** * Stable waveform for representing a thumbnail of the media. Values are * guaranteed to be between zero and one, inclusive. */ public readonly thumbnailWaveform: number[]; private readonly context: AudioContext; private source?: AudioBufferSourceNode | MediaElementAudioSourceNode; private state = PlaybackState.Decoding; private audioBuf?: AudioBuffer; private element?: HTMLAudioElement; private resampledWaveform: number[]; private waveformObservable = new SimpleObservable<number[]>(); private readonly clock: PlaybackClock; private readonly fileSize: number; /** * Creates a new playback instance from a buffer. * @param {ArrayBuffer} buf The buffer containing the sound sample. * @param {number[]} seedWaveform Optional seed waveform to present until the proper waveform * can be calculated. Contains values between zero and one, inclusive. */ public constructor( private buf: ArrayBuffer, seedWaveform = DEFAULT_WAVEFORM, ) { super(); // Capture the file size early as reading the buffer will result in a 0-length buffer left behind this.fileSize = this.buf.byteLength; this.context = createAudioContext(); this.resampledWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, PLAYBACK_WAVEFORM_SAMPLES); this.thumbnailWaveform = arrayFastResample(seedWaveform ?? DEFAULT_WAVEFORM, THUMBNAIL_WAVEFORM_SAMPLES); this.waveformObservable.update(this.resampledWaveform); this.clock = new PlaybackClock(this.context); } /** * Size of the audio clip in bytes. May be zero if unknown. This is updated * when the playback goes through phase changes. */ public get sizeBytes(): number { return this.fileSize; } /** * Stable waveform for the playback. Values are guaranteed to be between * zero and one, inclusive. */ public get waveform(): number[] { return this.resampledWaveform; } public get waveformData(): SimpleObservable<number[]> { return this.waveformObservable; } public get clockInfo(): PlaybackClock { return this.clock; } public get liveData(): SimpleObservable<number[]> { return this.clock.liveData; } public get timeSeconds(): number { return this.clock.timeSeconds; } public get durationSeconds(): number { return this.clock.durationSeconds; } public get currentState(): PlaybackState { return this.state; } public get isPlaying(): boolean { return this.currentState === PlaybackState.Playing; } public emit(event: PlaybackState, ...args: any[]): boolean { this.state = event; super.emit(event, ...args); super.emit(UPDATE_EVENT, event, ...args); return true; // we don't ever care if the event had listeners, so just return "yes" } public destroy(): void { // Dev note: It's critical that we call stop() during cleanup to ensure that downstream callers // are aware of the final clock position before the user triggered an unload. // noinspection JSIgnoredPromiseFromCall - not concerned about being called async here this.stop(); this.removeAllListeners(); this.clock.destroy(); this.waveformObservable.close(); if (this.element) { URL.revokeObjectURL(this.element.src); this.element.remove(); } } public async prepare(): Promise<void> { // don't attempt to decode the media again // AudioContext.decodeAudioData detaches the array buffer `this.buf` // meaning it cannot be re-read if (this.state !== PlaybackState.Decoding) { return; } // The point where we use an audio element is fairly arbitrary, though we don't want // it to be too low. As of writing, voice messages want to show a waveform but audio // messages do not. Using an audio element means we can't show a waveform preview, so // we try to target the difference between a voice message file and large audio file. // Overall, the point of this is to avoid memory-related issues due to storing a massive // audio buffer in memory, as that can balloon to far greater than the input buffer's // byte length. if (this.buf.byteLength > 5 * 1024 * 1024) { // 5mb logger.log("Audio file too large: processing through <audio /> element"); this.element = document.createElement("AUDIO") as HTMLAudioElement; const deferred = defer<unknown>(); this.element.onloadeddata = deferred.resolve; this.element.onerror = deferred.reject; this.element.src = URL.createObjectURL(new Blob([this.buf])); await deferred.promise; // make sure the audio element is ready for us } else { // Safari compat: promise API not supported on this function this.audioBuf = await new Promise((resolve, reject) => { this.context.decodeAudioData( this.buf, (b) => resolve(b), async (e): Promise<void> => { try { // This error handler is largely for Safari as well, which doesn't support Opus/Ogg // very well. logger.error("Error decoding recording: ", e); logger.warn("Trying to re-encode to WAV instead..."); const wav = await decodeOgg(this.buf); // noinspection ES6MissingAwait - not needed when using callbacks this.context.decodeAudioData( wav, (b) => resolve(b), (e) => { logger.error("Still failed to decode recording: ", e); reject(e); }, ); } catch (e) { logger.error("Caught decoding error:", e); reject(e); } }, ); }); // Update the waveform to the real waveform once we have channel data to use. We don't // exactly trust the user-provided waveform to be accurate... this.resampledWaveform = await PlaybackEncoder.instance.getPlaybackWaveform( this.audioBuf.getChannelData(0), ); } this.waveformObservable.update(this.resampledWaveform); this.clock.flagLoadTime(); // must happen first because setting the duration fires a clock update this.clock.durationSeconds = this.element?.duration ?? this.audioBuf!.duration; // Signal that we're not decoding anymore. This is done last to ensure the clock is updated for // when the downstream callers try to use it. this.emit(PlaybackState.Stopped); // signal that we're not decoding anymore } private onPlaybackEnd = async (): Promise<void> => { await this.context.suspend(); this.emit(PlaybackState.Stopped); }; public async play(): Promise<void> { // We can't restart a buffer source, so we need to create a new one if we hit the end if (this.state === PlaybackState.Stopped) { this.disconnectSource(); this.makeNewSourceBuffer(); if (this.element) { await this.element.play(); } else { (this.source as AudioBufferSourceNode).start(); } } // We use the context suspend/resume functions because it allows us to pause a source // node, but that still doesn't help us when the source node runs out (see above). await this.context.resume(); this.clock.flagStart(); this.emit(PlaybackState.Playing); } private disconnectSource(): void { if (this.element) return; // leave connected, we can (and must) re-use it this.source?.disconnect(); this.source?.removeEventListener("ended", this.onPlaybackEnd); } private makeNewSourceBuffer(): void { if (this.element && this.source) return; // leave connected, we can (and must) re-use it if (this.element) { this.source = this.context.createMediaElementSource(this.element); } else { this.source = this.context.createBufferSource(); this.source.buffer = this.audioBuf ?? null; } this.source.addEventListener("ended", this.onPlaybackEnd); this.source.connect(this.context.destination); } public async pause(): Promise<void> { await this.context.suspend(); this.emit(PlaybackState.Paused); } public async stop(): Promise<void> { await this.onPlaybackEnd(); this.clock.flagStop(); } public async toggle(): Promise<void> { if (this.isPlaying) await this.pause(); else await this.play(); } public async skipTo(timeSeconds: number): Promise<void> { // Dev note: this function talks a lot about clock desyncs. There is a clock running // independently to the audio context and buffer so that accurate human-perceptible // time can be exposed. The PlaybackClock class has more information, but the short // version is that we need to line up the useful time (clip position) with the context // time, and avoid as many deviations as possible as otherwise the user could see the // wrong time, and we stop playback at the wrong time, etc. timeSeconds = clamp(timeSeconds, 0, this.clock.durationSeconds); // Track playing state so we don't cause seeking to start playing the track. const isPlaying = this.isPlaying; if (isPlaying) { // Pause first so we can get an accurate measurement of time await this.context.suspend(); } // We can't simply tell the context/buffer to jump to a time, so we have to // start a whole new buffer and start it from the new time offset. const now = this.context.currentTime; this.disconnectSource(); this.makeNewSourceBuffer(); // We have to resync the clock because it can get confused about where we're // at in the audio clip. this.clock.syncTo(now, timeSeconds); // Always start the source to queue it up. We have to do this now (and pause // quickly if we're not supposed to be playing) as otherwise the clock can desync // when it comes time to the user hitting play. After a couple jumps, the user // will have desynced the clock enough to be about 10-15 seconds off, while this // keeps it as close to perfect as humans can perceive. if (this.element) { this.element.currentTime = timeSeconds; } else { (this.source as AudioBufferSourceNode).start(now, timeSeconds); } // Dev note: it's critical that the code gap between `this.source.start()` and // `this.pause()` is as small as possible: we do not want to delay *anything* // as that could cause a clock desync, or a buggy feeling as a single note plays // during seeking. if (isPlaying) { // If we were playing before, continue the context so the clock doesn't desync. await this.context.resume(); } else { // As mentioned above, we'll have to pause the clip if we weren't supposed to // be playing it just yet. If we didn't have this, the audio clip plays but all // the states will be wrong: clock won't advance, pause state doesn't match the // blaring noise leaving the user's speakers, etc. // // Also as mentioned, if the code gap is small enough then this should be // executed immediately after the start time, leaving no feasible time for the // user's speakers to play any sound. await this.pause(); } } }