UNPKG

mediabunny

Version:

Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.

1,463 lines (1,262 loc) 57.1 kB
/*! * Copyright (c) 2025-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { AUDIO_CODECS, AudioCodec, buildAudioCodecString, buildVideoCodecString, getAudioEncoderConfigExtension, getVideoEncoderConfigExtension, inferCodecFromCodecString, parsePcmCodec, PCM_AUDIO_CODECS, PcmAudioCodec, Quality, SUBTITLE_CODECS, SubtitleCodec, VIDEO_CODECS, VideoCodec, } from './codec'; import { OutputAudioTrack, OutputSubtitleTrack, OutputTrack, OutputVideoTrack } from './output'; import { assert, assertNever, CallSerializer, clamp, promiseWithResolvers, setInt24, setUint24 } from './misc'; import { Muxer } from './muxer'; import { SubtitleParser } from './subtitles'; import { toAlaw, toUlaw } from './pcm'; import { CustomVideoEncoder, CustomAudioEncoder, customVideoEncoders, customAudioEncoders, } from './custom-coder'; import { EncodedPacket } from './packet'; import { AudioSample, VideoSample } from './sample'; /** * Base class for media sources. Media sources are used to add media samples to an output file. * @public */ export abstract class MediaSource { /** @internal */ _connectedTrack: OutputTrack | null = null; /** @internal */ _closingPromise: Promise<void> | null = null; /** @internal */ _closed = false; /** * @internal * A time offset in seconds that is added to all timestamps generated by this source. */ _timestampOffset = 0; /** @internal */ _ensureValidAdd() { if (!this._connectedTrack) { throw new Error('Source is not connected to an output track.'); } if (this._connectedTrack.output.state === 'canceled') { throw new Error('Output has been canceled.'); } if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') { throw new Error('Output has been finalized.'); } if (this._connectedTrack.output.state === 'pending') { throw new Error('Output has not started.'); } if (this._closed) { throw new Error('Source is closed.'); } } /** @internal */ async _start() {} /** @internal */ // eslint-disable-next-line @typescript-eslint/no-unused-vars async _flushAndClose(forceClose: boolean) {} /** * Closes this source. This prevents future samples from being added and signals to the output file that no further * samples will come in for this track. Calling `.close()` is optional but recommended after adding the * last sample - for improved performance and reduced memory usage. */ close() { if (this._closingPromise) { return; } const connectedTrack = this._connectedTrack; if (!connectedTrack) { throw new Error('Cannot call close without connecting the source to an output track.'); } if (connectedTrack.output.state === 'pending') { throw new Error('Cannot call close before output has been started.'); } this._closingPromise = (async () => { await this._flushAndClose(false); this._closed = true; if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') { return; } connectedTrack.output._muxer.onTrackClose(connectedTrack); })(); } /** @internal */ async _flushOrWaitForOngoingClose(forceClose: boolean) { if (this._closingPromise) { // Since closing also flushes, we don't want to do it twice return this._closingPromise; } else { return this._flushAndClose(forceClose); } } } /** * Base class for video sources - sources for video tracks. * @public */ export abstract class VideoSource extends MediaSource { /** @internal */ override _connectedTrack: OutputVideoTrack | null = null; /** @internal */ _codec: VideoCodec; constructor(codec: VideoCodec) { super(); if (!VIDEO_CODECS.includes(codec)) { throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`); } this._codec = codec; } } /** * The most basic video source; can be used to directly pipe encoded packets into the output file. * @public */ export class EncodedVideoPacketSource extends VideoSource { constructor(codec: VideoCodec) { super(codec); } /** * Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's * timestamp must be its *presentation timestamp*. B-frames are handled automatically. * * @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid * decoder config. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } if (packet.isMetadataOnly) { throw new TypeError('Metadata-only packets cannot be added.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('meta, when provided, must be an object.'); } this._ensureValidAdd(); return this._connectedTrack!.output._muxer.addEncodedVideoPacket(this._connectedTrack!, packet, meta); } } /** * Configuration object that controls video encoding. Can be used to set codec, quality, and more. * @public */ export type VideoEncodingConfig = { /** The video codec that should be used for encoding the video samples (frames). */ codec: VideoCodec; /** * The target bitrate for the encoded video, in bits per second. Alternatively, a subjective Quality can * be provided. */ bitrate: number | Quality; /** The latency mode used by the encoder; controls the performance-quality tradeoff. */ latencyMode?: VideoEncoderConfig['latencyMode']; /** * The interval, in seconds, of how often frames are encoded as a key frame. The default is 5 seconds. Frequent key * frames improve seeking behavior but increase file size. When using multiple video tracks, you should give them * all the same key frame interval. */ keyFrameInterval?: number; /** * The full codec string as specified in the WebCodecs Codec Registry. This string must match the codec * specified in `codec`. When not set, a fitting codec string will be constructed automatically by the library. */ fullCodecString?: string; /** Called for each successfully encoded packet. Both the packet and the encoding metadata are passed. */ onEncodedPacket?: (packet: EncodedPacket, meta: EncodedVideoChunkMetadata | undefined) => unknown; /** Called when the internal encoder config, as used by the WebCodecs API, is created. */ onEncoderConfig?: (config: VideoEncoderConfig) => unknown; }; const validateVideoEncodingConfig = (config: VideoEncodingConfig) => { if (!config || typeof config !== 'object') { throw new TypeError('Encoding config must be an object.'); } if (!VIDEO_CODECS.includes(config.codec)) { throw new TypeError(`Invalid video codec '${config.codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`); } if (!(config.bitrate instanceof Quality) && (!Number.isInteger(config.bitrate) || config.bitrate <= 0)) { throw new TypeError('config.bitrate must be a positive integer or a quality.'); } if (config.latencyMode !== undefined && !['quality', 'realtime'].includes(config.latencyMode)) { throw new TypeError('config.latencyMode, when provided, must be \'quality\' or \'realtime\'.'); } if ( config.keyFrameInterval !== undefined && (!Number.isFinite(config.keyFrameInterval) || config.keyFrameInterval < 0) ) { throw new TypeError('config.keyFrameInterval, when provided, must be a non-negative number.'); } if (config.fullCodecString !== undefined && typeof config.fullCodecString !== 'string') { throw new TypeError('config.fullCodecString, when provided, must be a string.'); } if (config.fullCodecString !== undefined && inferCodecFromCodecString(config.fullCodecString) !== config.codec) { throw new TypeError( `config.fullCodecString, when provided, must be a string that matches the specified codec` + ` (${config.codec}).`, ); } if (config.onEncodedPacket !== undefined && typeof config.onEncodedPacket !== 'function') { throw new TypeError('config.onEncodedChunk, when provided, must be a function.'); } if (config.onEncoderConfig !== undefined && typeof config.onEncoderConfig !== 'function') { throw new TypeError('config.onEncoderConfig, when provided, must be a function.'); } }; class VideoEncoderWrapper { private ensureEncoderPromise: Promise<void> | null = null; private encoderInitialized = false; private encoder: VideoEncoder | null = null; private muxer: Muxer | null = null; private lastMultipleOfKeyFrameInterval = -1; private lastWidth: number | null = null; private lastHeight: number | null = null; private customEncoder: CustomVideoEncoder | null = null; private customEncoderCallSerializer = new CallSerializer(); private customEncoderQueueSize = 0; /** * Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context. * However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught. * So, we keep track of the encoder error and throw it as soon as we get the chance. */ private encoderError: Error | null = null; constructor(private source: VideoSource, private encodingConfig: VideoEncodingConfig) {} async add(videoSample: VideoSample, shouldClose: boolean, encodeOptions?: VideoEncoderEncodeOptions) { try { this.checkForEncoderError(); this.source._ensureValidAdd(); // Ensure video sample size remains constant if (this.lastWidth !== null && this.lastHeight !== null) { if (videoSample.codedWidth !== this.lastWidth || videoSample.codedHeight !== this.lastHeight) { throw new Error( `Video sample size must remain constant. Expected ${this.lastWidth}x${this.lastHeight},` + ` got ${videoSample.codedWidth}x${videoSample.codedHeight}.`, ); } } else { this.lastWidth = videoSample.codedWidth; this.lastHeight = videoSample.codedHeight; } if (!this.encoderInitialized) { if (!this.ensureEncoderPromise) { void this.ensureEncoder(videoSample); } // No, this "if" statement is not useless. Sometimes, the above call to `ensureEncoder` might have // synchronously completed and the encoder is already initialized. In this case, we don't need to await // the promise anymore. This also fixes nasty async race condition bugs when multiple code paths are // calling this method: It's important that the call that initialized the encoder go through this // code first. if (!this.encoderInitialized) { await this.ensureEncoderPromise; } } assert(this.encoderInitialized); const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 5; const multipleOfKeyFrameInterval = Math.floor(videoSample.timestamp / keyFrameInterval); // Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks follow the same // "key frame" rhythm, because aligned key frames are required to start new fragments in ISOBMFF or clusters // in Matroska (or at least desirable). const finalEncodeOptions = { ...encodeOptions, keyFrame: encodeOptions?.keyFrame || keyFrameInterval === 0 || multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval, }; this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval; if (this.customEncoder) { this.customEncoderQueueSize++; // We clone the sample so it cannot be closed on us from the outside before it reaches the encoder const clonedSample = videoSample.clone(); const promise = this.customEncoderCallSerializer .call(() => this.customEncoder!.encode(clonedSample, finalEncodeOptions)) .then(() => this.customEncoderQueueSize--) .catch((error: Error) => this.encoderError ??= error) .finally(() => { clonedSample.close(); // `videoSample` gets closed in the finally block at the end of the method }); if (this.customEncoderQueueSize >= 4) { await promise; } } else { assert(this.encoder); const videoFrame = videoSample.toVideoFrame(); this.encoder.encode(videoFrame, finalEncodeOptions); videoFrame.close(); if (shouldClose) { videoSample.close(); } // We need to do this after sending the frame to the encoder as the frame otherwise might be closed if (this.encoder.encodeQueueSize >= 4) { await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true })); } } await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure } finally { if (shouldClose) { // Make sure it's always closed, even if there was an error videoSample.close(); } } } private async ensureEncoder(videoSample: VideoSample) { if (this.encoder) { return; } return this.ensureEncoderPromise = (async () => { const width = videoSample.codedWidth; const height = videoSample.codedHeight; const bitrate = this.encodingConfig.bitrate instanceof Quality ? this.encodingConfig.bitrate._toVideoBitrate(this.encodingConfig.codec, width, height) : this.encodingConfig.bitrate; const encoderConfig: VideoEncoderConfig = { codec: this.encodingConfig.fullCodecString ?? buildVideoCodecString( this.encodingConfig.codec, width, height, bitrate, ), width, height, bitrate, framerate: this.source._connectedTrack?.metadata.frameRate, latencyMode: this.encodingConfig.latencyMode, ...getVideoEncoderConfigExtension(this.encodingConfig.codec), }; this.encodingConfig.onEncoderConfig?.(encoderConfig); const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports( this.encodingConfig.codec, encoderConfig, )); if (MatchingCustomEncoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customEncoder = new MatchingCustomEncoder() as CustomVideoEncoder; // @ts-expect-error It's technically readonly this.customEncoder.codec = this.encodingConfig.codec; // @ts-expect-error It's technically readonly this.customEncoder.config = encoderConfig; // @ts-expect-error It's technically readonly this.customEncoder.onPacket = (packet, meta) => { if (!(packet instanceof EncodedPacket)) { throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('The second argument passed to onPacket must be an object or undefined.'); } this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta); }; await this.customEncoder.init(); } else { if (typeof VideoEncoder === 'undefined') { throw new Error('VideoEncoder is not supported by this browser.'); } const support = await VideoEncoder.isConfigSupported(encoderConfig); if (!support.supported) { throw new Error( `This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,` + ` ${encoderConfig.width}x${encoderConfig.height}) is not supported by this browser. Consider` + ` using another codec or changing your video parameters.`, ); } this.encoder = new VideoEncoder({ output: (chunk, meta) => { const packet = EncodedPacket.fromEncodedChunk(chunk); this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta); }, error: (error) => { error.stack = new Error().stack; // Provide a more useful stack trace this.encoderError ??= error; }, }); this.encoder.configure(encoderConfig); } assert(this.source._connectedTrack); this.muxer = this.source._connectedTrack.output._muxer; this.encoderInitialized = true; })(); } async flushAndClose(forceClose: boolean) { this.checkForEncoderError(); if (this.customEncoder) { if (!forceClose) { void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush()); } await this.customEncoderCallSerializer.call(() => this.customEncoder!.close()); } else if (this.encoder) { if (!forceClose) { await this.encoder.flush(); } this.encoder.close(); } this.checkForEncoderError(); } getQueueSize() { if (this.customEncoder) { return this.customEncoderQueueSize; } else { return this.encoder?.encodeQueueSize ?? 0; } } checkForEncoderError() { if (this.encoderError) { throw this.encoderError; } } } /** * This source can be used to add raw, unencoded video samples (frames) to an output video track. These frames will * automatically be encoded and then piped into the output. * @public */ export class VideoSampleSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; constructor(encodingConfig: VideoEncodingConfig) { validateVideoEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); } /** * Encodes a video sample (frame) and then adds it to the output. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(videoSample: VideoSample, encodeOptions?: VideoEncoderEncodeOptions) { if (!(videoSample instanceof VideoSample)) { throw new TypeError('videoSample must be a VideoSample.'); } return this._encoder.add(videoSample, false, encodeOptions); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * This source can be used to add video frames to the output track from a fixed canvas element. Since canvases are often * used for rendering, this source provides a convenient wrapper around VideoSampleSource. * @public */ export class CanvasSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** @internal */ private _canvas: HTMLCanvasElement | OffscreenCanvas; constructor(canvas: HTMLCanvasElement | OffscreenCanvas, encodingConfig: VideoEncodingConfig) { if ( !(typeof HTMLCanvasElement !== 'undefined' && canvas instanceof HTMLCanvasElement) && !(typeof OffscreenCanvas !== 'undefined' && canvas instanceof OffscreenCanvas) ) { throw new TypeError('canvas must be an HTMLCanvasElement or OffscreenCanvas.'); } validateVideoEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); this._canvas = canvas; } /** * Captures the current canvas state as a video sample (frame), encodes it and adds it to the output. * * @param timestamp - The timestamp of the sample, in seconds. * @param duration - The duration of the sample, in seconds. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(timestamp: number, duration = 0, encodeOptions?: VideoEncoderEncodeOptions) { if (!Number.isFinite(timestamp) || timestamp < 0) { throw new TypeError('timestamp must be a non-negative number.'); } if (!Number.isFinite(duration) || duration < 0) { throw new TypeError('duration must be a non-negative number.'); } const sample = new VideoSample(this._canvas, { timestamp, duration }); return this._encoder.add(sample, true, encodeOptions); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * Video source that encodes the frames of a MediaStreamVideoTrack and pipes them into the output. This is useful for * capturing live or real-time data such as webcams or screen captures. Frames will automatically start being captured * once the connected Output is started, and will keep being captured until the Output is finalized or this source * is closed. * @public */ export class MediaStreamVideoTrackSource extends VideoSource { /** @internal */ private _encoder: VideoEncoderWrapper; /** @internal */ private _abortController: AbortController | null = null; /** @internal */ private _track: MediaStreamVideoTrack; /** @internal */ private _workerTrackId: number | null = null; /** @internal */ private _workerListener: ((event: MessageEvent) => void) | null = null; /** @internal */ private _promiseWithResolvers = promiseWithResolvers(); /** @internal */ private _errorPromiseAccessed = false; /** A promise that rejects upon any error within this source. This promise never resolves. */ get errorPromise() { this._errorPromiseAccessed = true; return this._promiseWithResolvers.promise; } constructor(track: MediaStreamVideoTrack, encodingConfig: VideoEncodingConfig) { if (!(track instanceof MediaStreamTrack) || track.kind !== 'video') { throw new TypeError('track must be a video MediaStreamTrack.'); } validateVideoEncodingConfig(encodingConfig); encodingConfig = { ...encodingConfig, latencyMode: 'realtime', }; super(encodingConfig.codec); this._encoder = new VideoEncoderWrapper(this, encodingConfig); this._track = track; } /** @internal */ override async _start() { if (!this._errorPromiseAccessed) { console.warn( 'Make sure not to ignore the `errorPromise` field on MediaStreamVideoTrackSource, so that any internal' + ' errors get bubbled up properly.', ); } this._abortController = new AbortController(); let firstVideoFrameTimestamp: number | null = null; let errored = false; const onVideoFrame = (videoFrame: VideoFrame) => { if (errored) { videoFrame.close(); return; } if (firstVideoFrameTimestamp === null) { firstVideoFrameTimestamp = videoFrame.timestamp / 1e6; const muxer = this._connectedTrack!.output._muxer; if (muxer.firstMediaStreamTimestamp === null) { muxer.firstMediaStreamTimestamp = performance.now() / 1000; this._timestampOffset = -firstVideoFrameTimestamp; } else { this._timestampOffset = (performance.now() / 1000 - muxer.firstMediaStreamTimestamp) - firstVideoFrameTimestamp; } } if (this._encoder.getQueueSize() >= 4) { // Drop frames if the encoder is overloaded videoFrame.close(); return; } void this._encoder.add(new VideoSample(videoFrame), true) .catch((error) => { errored = true; this._abortController?.abort(); this._promiseWithResolvers.reject(error); if (this._workerTrackId !== null) { // Tell the worker to stop the track sendMessageToMediaStreamTrackProcessorWorker({ type: 'stopTrack', trackId: this._workerTrackId, }); } }); }; if (typeof MediaStreamTrackProcessor !== 'undefined') { // We can do it here directly, perfect const processor = new MediaStreamTrackProcessor({ track: this._track }); const consumer = new WritableStream<VideoFrame>({ write: onVideoFrame }); processor.readable.pipeTo(consumer, { signal: this._abortController.signal, }).catch((error) => { // Handle AbortError silently if (error instanceof DOMException && error.name === 'AbortError') return; this._promiseWithResolvers.reject(error); }); } else { // It might still be supported in a worker, so let's check that const supportedInWorker = await mediaStreamTrackProcessorIsSupportedInWorker(); if (supportedInWorker) { this._workerTrackId = nextMediaStreamTrackProcessorWorkerId++; sendMessageToMediaStreamTrackProcessorWorker({ type: 'videoTrack', trackId: this._workerTrackId, track: this._track, }, [this._track]); this._workerListener = (event: MessageEvent) => { const message = event.data as MediaStreamTrackProcessorWorkerMessage; if (message.type === 'videoFrame' && message.trackId === this._workerTrackId) { onVideoFrame(message.videoFrame); } else if (message.type === 'error' && message.trackId === this._workerTrackId) { this._promiseWithResolvers.reject(message.error); } }; mediaStreamTrackProcessorWorker!.addEventListener('message', this._workerListener); } else { throw new Error('MediaStreamTrackProcessor is required but not supported by this browser.'); } } } /** @internal */ override async _flushAndClose(forceClose: boolean) { if (this._abortController) { this._abortController.abort(); this._abortController = null; } if (this._workerTrackId !== null) { assert(this._workerListener); sendMessageToMediaStreamTrackProcessorWorker({ type: 'stopTrack', trackId: this._workerTrackId, }); // Wait for the worker to stop the track await new Promise<void>((resolve) => { const listener = (event: MessageEvent) => { const message = event.data as MediaStreamTrackProcessorWorkerMessage; if (message.type === 'trackStopped' && message.trackId === this._workerTrackId) { assert(this._workerListener); mediaStreamTrackProcessorWorker!.removeEventListener('message', this._workerListener); mediaStreamTrackProcessorWorker!.removeEventListener('message', listener); resolve(); } }; mediaStreamTrackProcessorWorker!.addEventListener('message', listener); }); } await this._encoder.flushAndClose(forceClose); } } /** * Base class for audio sources - sources for audio tracks. * @public */ export abstract class AudioSource extends MediaSource { /** @internal */ override _connectedTrack: OutputAudioTrack | null = null; /** @internal */ _codec: AudioCodec; constructor(codec: AudioCodec) { super(); if (!AUDIO_CODECS.includes(codec)) { throw new TypeError(`Invalid audio codec '${codec}'. Must be one of: ${AUDIO_CODECS.join(', ')}.`); } this._codec = codec; } } /** * The most basic audio source; can be used to directly pipe encoded packets into the output file. * @public */ export class EncodedAudioPacketSource extends AudioSource { constructor(codec: AudioCodec) { super(codec); } /** * Adds an encoded packet to the output audio track. Packets must be added in *decode order*. * * @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid * decoder config. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(packet: EncodedPacket, meta?: EncodedAudioChunkMetadata) { if (!(packet instanceof EncodedPacket)) { throw new TypeError('packet must be an EncodedPacket.'); } if (packet.isMetadataOnly) { throw new TypeError('Metadata-only packets cannot be added.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('meta, when provided, must be an object.'); } this._ensureValidAdd(); return this._connectedTrack!.output._muxer.addEncodedAudioPacket(this._connectedTrack!, packet, meta); } } /** * Configuration object that controls audio encoding. Can be used to set codec, quality, and more. * @public */ export type AudioEncodingConfig = { /** The audio codec that should be used for encoding the audio samples. */ codec: AudioCodec; /** * The target bitrate for the encoded audio, in bits per second. Alternatively, a subjective Quality can * be provided. Required for compressed audio codecs, unused for PCM codecs. */ bitrate?: number | Quality; /** * The full codec string as specified in the WebCodecs Codec Registry. This string must match the codec * specified in `codec`. When not set, a fitting codec string will be constructed automatically by the library. */ fullCodecString?: string; /** Called for each successfully encoded packet. Both the packet and the encoding metadata are passed. */ onEncodedPacket?: (packet: EncodedPacket, meta: EncodedAudioChunkMetadata | undefined) => unknown; /** Called when the internal encoder config, as used by the WebCodecs API, is created. */ onEncoderConfig?: (config: AudioEncoderConfig) => unknown; }; const validateAudioEncodingConfig = (config: AudioEncodingConfig) => { if (!config || typeof config !== 'object') { throw new TypeError('Encoding config must be an object.'); } if (!AUDIO_CODECS.includes(config.codec)) { throw new TypeError(`Invalid audio codec '${config.codec}'. Must be one of: ${AUDIO_CODECS.join(', ')}.`); } if ( config.bitrate === undefined && (!(PCM_AUDIO_CODECS as readonly string[]).includes(config.codec) || config.codec === 'flac') ) { throw new TypeError('config.bitrate must be provided for compressed audio codecs.'); } if ( config.bitrate !== undefined && !(config.bitrate instanceof Quality) && (!Number.isInteger(config.bitrate) || config.bitrate <= 0) ) { throw new TypeError('config.bitrate, when provided, must be a positive integer or a quality.'); } if (config.fullCodecString !== undefined && typeof config.fullCodecString !== 'string') { throw new TypeError('config.fullCodecString, when provided, must be a string.'); } if (config.fullCodecString !== undefined && inferCodecFromCodecString(config.fullCodecString) !== config.codec) { throw new TypeError( `config.fullCodecString, when provided, must be a string that matches the specified codec` + ` (${config.codec}).`, ); } if (config.onEncodedPacket !== undefined && typeof config.onEncodedPacket !== 'function') { throw new TypeError('config.onEncodedChunk, when provided, must be a function.'); } if (config.onEncoderConfig !== undefined && typeof config.onEncoderConfig !== 'function') { throw new TypeError('config.onEncoderConfig, when provided, must be a function.'); } }; class AudioEncoderWrapper { private ensureEncoderPromise: Promise<void> | null = null; private encoderInitialized = false; private encoder: AudioEncoder | null = null; private muxer: Muxer | null = null; private lastNumberOfChannels: number | null = null; private lastSampleRate: number | null = null; private isPcmEncoder = false; private outputSampleSize: number | null = null; private writeOutputValue: ((view: DataView, byteOffset: number, value: number) => void) | null = null; private customEncoder: CustomAudioEncoder | null = null; private customEncoderCallSerializer = new CallSerializer(); private customEncoderQueueSize = 0; /** * Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context. * However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught. * So, we keep track of the encoder error and throw it as soon as we get the chance. */ private encoderError: Error | null = null; constructor(private source: AudioSource, private encodingConfig: AudioEncodingConfig) {} async add(audioSample: AudioSample, shouldClose: boolean) { try { this.checkForEncoderError(); this.source._ensureValidAdd(); // Ensure audio parameters remain constant if (this.lastNumberOfChannels !== null && this.lastSampleRate !== null) { if ( audioSample.numberOfChannels !== this.lastNumberOfChannels || audioSample.sampleRate !== this.lastSampleRate ) { throw new Error( `Audio parameters must remain constant. Expected ${this.lastNumberOfChannels} channels at` + ` ${this.lastSampleRate} Hz, got ${audioSample.numberOfChannels} channels at` + ` ${audioSample.sampleRate} Hz.`, ); } } else { this.lastNumberOfChannels = audioSample.numberOfChannels; this.lastSampleRate = audioSample.sampleRate; } if (!this.encoderInitialized) { if (!this.ensureEncoderPromise) { void this.ensureEncoder(audioSample); } // No, this "if" statement is not useless. Sometimes, the above call to `ensureEncoder` might have // synchronously completed and the encoder is already initialized. In this case, we don't need to await // the promise anymore. This also fixes nasty async race condition bugs when multiple code paths are // calling this method: It's important that the call that initialized the encoder go through this // code first. if (!this.encoderInitialized) { await this.ensureEncoderPromise; } } assert(this.encoderInitialized); if (this.customEncoder) { this.customEncoderQueueSize++; // We clone the sample so it cannot be closed on us from the outside before it reaches the encoder const clonedSample = audioSample.clone(); const promise = this.customEncoderCallSerializer .call(() => this.customEncoder!.encode(clonedSample)) .then(() => this.customEncoderQueueSize--) .catch((error: Error) => this.encoderError ??= error) .finally(() => { clonedSample.close(); // `audioSample` gets closed in the finally block at the end of the method }); if (this.customEncoderQueueSize >= 4) { await promise; } await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure } else if (this.isPcmEncoder) { await this.doPcmEncoding(audioSample, shouldClose); } else { assert(this.encoder); const audioData = audioSample.toAudioData(); this.encoder.encode(audioData); audioData.close(); if (shouldClose) { audioSample.close(); } if (this.encoder.encodeQueueSize >= 4) { await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true })); } await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure } } finally { if (shouldClose) { // Make sure it's always closed, even if there was an error audioSample.close(); } } } private async doPcmEncoding(audioSample: AudioSample, shouldClose: boolean) { assert(this.outputSampleSize); assert(this.writeOutputValue); // Need to extract data from the audio data before we close it const { numberOfChannels, numberOfFrames, sampleRate, timestamp } = audioSample; const CHUNK_SIZE = 2048; const outputs: { frameCount: number; view: DataView; }[] = []; // Prepare all of the output buffers, each being bounded by CHUNK_SIZE so we don't generate huge packets for (let frame = 0; frame < numberOfFrames; frame += CHUNK_SIZE) { const frameCount = Math.min(CHUNK_SIZE, audioSample.numberOfFrames - frame); const outputSize = frameCount * numberOfChannels * this.outputSampleSize; const outputBuffer = new ArrayBuffer(outputSize); const outputView = new DataView(outputBuffer); outputs.push({ frameCount, view: outputView }); } const allocationSize = audioSample.allocationSize(({ planeIndex: 0, format: 'f32-planar' })); const floats = new Float32Array(allocationSize / Float32Array.BYTES_PER_ELEMENT); for (let i = 0; i < numberOfChannels; i++) { audioSample.copyTo(floats, { planeIndex: i, format: 'f32-planar' }); for (let j = 0; j < outputs.length; j++) { const { frameCount, view } = outputs[j]!; for (let k = 0; k < frameCount; k++) { this.writeOutputValue( view, (k * numberOfChannels + i) * this.outputSampleSize, floats[j * CHUNK_SIZE + k]!, ); } } } if (shouldClose) { audioSample.close(); } const meta: EncodedAudioChunkMetadata = { decoderConfig: { codec: this.encodingConfig.codec, numberOfChannels, sampleRate, }, }; for (let i = 0; i < outputs.length; i++) { const { frameCount, view } = outputs[i]!; const outputBuffer = view.buffer; const startFrame = i * CHUNK_SIZE; const packet = new EncodedPacket( new Uint8Array(outputBuffer), 'key', timestamp + startFrame / sampleRate, frameCount / sampleRate, ); this.encodingConfig.onEncodedPacket?.(packet, meta); await this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta); // With backpressure } } private ensureEncoder(audioSample: AudioSample) { if (this.encoderInitialized) { return; } return this.ensureEncoderPromise = (async () => { const { numberOfChannels, sampleRate } = audioSample; const bitrate = this.encodingConfig.bitrate instanceof Quality ? this.encodingConfig.bitrate._toAudioBitrate(this.encodingConfig.codec) : this.encodingConfig.bitrate; const encoderConfig: AudioEncoderConfig = { codec: this.encodingConfig.fullCodecString ?? buildAudioCodecString( this.encodingConfig.codec, numberOfChannels, sampleRate, ), numberOfChannels, sampleRate, bitrate, ...getAudioEncoderConfigExtension(this.encodingConfig.codec), }; this.encodingConfig.onEncoderConfig?.(encoderConfig); const MatchingCustomEncoder = customAudioEncoders.find(x => x.supports( this.encodingConfig.codec, encoderConfig, )); if (MatchingCustomEncoder) { // @ts-expect-error "Can't create instance of abstract class 🤓" this.customEncoder = new MatchingCustomEncoder() as CustomAudioEncoder; // @ts-expect-error It's technically readonly this.customEncoder.codec = this.encodingConfig.codec; // @ts-expect-error It's technically readonly this.customEncoder.config = encoderConfig; // @ts-expect-error It's technically readonly this.customEncoder.onPacket = (packet, meta) => { if (!(packet instanceof EncodedPacket)) { throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.'); } if (meta !== undefined && (!meta || typeof meta !== 'object')) { throw new TypeError('The second argument passed to onPacket must be an object or undefined.'); } this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta); }; await this.customEncoder.init(); } else if ((PCM_AUDIO_CODECS as readonly string[]).includes(this.encodingConfig.codec)) { this.initPcmEncoder(); } else { if (typeof AudioEncoder === 'undefined') { throw new Error('AudioEncoder is not supported by this browser.'); } const support = await AudioEncoder.isConfigSupported(encoderConfig); if (!support.supported) { throw new Error( `This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,` + ` ${encoderConfig.numberOfChannels} channels, ${encoderConfig.sampleRate} Hz) is not` + ` supported by this browser. Consider using another codec or changing your audio parameters.`, ); } this.encoder = new AudioEncoder({ output: (chunk, meta) => { const packet = EncodedPacket.fromEncodedChunk(chunk); this.encodingConfig.onEncodedPacket?.(packet, meta); void this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta); }, error: (error) => { error.stack = new Error().stack; // Provide a more useful stack trace this.encoderError ??= error; }, }); this.encoder.configure(encoderConfig); } assert(this.source._connectedTrack); this.muxer = this.source._connectedTrack.output._muxer; this.encoderInitialized = true; })(); } private initPcmEncoder() { this.isPcmEncoder = true; const codec = this.encodingConfig.codec as PcmAudioCodec; const { dataType, sampleSize, littleEndian } = parsePcmCodec(codec); this.outputSampleSize = sampleSize; // All these functions receive a float sample as input and map it into the desired format switch (sampleSize) { case 1: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => view.setUint8(byteOffset, clamp((value + 1) * 127.5, 0, 255)); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => { view.setInt8(byteOffset, clamp(Math.round(value * 128), -128, 127)); }; } else if (dataType === 'ulaw') { this.writeOutputValue = (view, byteOffset, value) => { const int16 = clamp(Math.floor(value * 32767), -32768, 32767); view.setUint8(byteOffset, toUlaw(int16)); }; } else if (dataType === 'alaw') { this.writeOutputValue = (view, byteOffset, value) => { const int16 = clamp(Math.floor(value * 32767), -32768, 32767); view.setUint8(byteOffset, toAlaw(int16)); }; } else { assert(false); } }; break; case 2: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => view.setUint16(byteOffset, clamp((value + 1) * 32767.5, 0, 65535), littleEndian); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => view.setInt16(byteOffset, clamp(Math.round(value * 32767), -32768, 32767), littleEndian); } else { assert(false); } }; break; case 3: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => setUint24(view, byteOffset, clamp((value + 1) * 8388607.5, 0, 16777215), littleEndian); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => setInt24( view, byteOffset, clamp(Math.round(value * 8388607), -8388608, 8388607), littleEndian, ); } else { assert(false); } }; break; case 4: { if (dataType === 'unsigned') { this.writeOutputValue = (view, byteOffset, value) => view.setUint32(byteOffset, clamp((value + 1) * 2147483647.5, 0, 4294967295), littleEndian); } else if (dataType === 'signed') { this.writeOutputValue = (view, byteOffset, value) => view.setInt32( byteOffset, clamp(Math.round(value * 2147483647), -2147483648, 2147483647), littleEndian, ); } else if (dataType === 'float') { this.writeOutputValue = (view, byteOffset, value) => view.setFloat32(byteOffset, value, littleEndian); } else { assert(false); } }; break; case 8: { if (dataType === 'float') { this.writeOutputValue = (view, byteOffset, value) => view.setFloat64(byteOffset, value, littleEndian); } else { assert(false); } }; break; default: { assertNever(sampleSize); assert(false); }; } } async flushAndClose(forceClose: boolean) { this.checkForEncoderError(); if (this.customEncoder) { if (!forceClose) { void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush()); } await this.customEncoderCallSerializer.call(() => this.customEncoder!.close()); } else if (this.encoder) { if (!forceClose) { await this.encoder.flush(); } this.encoder.close(); } this.checkForEncoderError(); } getQueueSize() { if (this.customEncoder) { return this.customEncoderQueueSize; } else if (this.isPcmEncoder) { return 0; } else { return this.encoder?.encodeQueueSize ?? 0; } } checkForEncoderError() { if (this.encoderError) { throw this.encoderError; } } } /** * This source can be used to add raw, unencoded audio samples to an output audio track. These samples will * automatically be encoded and then piped into the output. * @public */ export class AudioSampleSource extends AudioSource { /** @internal */ private _encoder: AudioEncoderWrapper; constructor(encodingConfig: AudioEncodingConfig) { validateAudioEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new AudioEncoderWrapper(this, encodingConfig); } /** * Encodes an audio sample and then adds it to the output. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(audioSample: AudioSample) { if (!(audioSample instanceof AudioSample)) { throw new TypeError('audioSample must be an AudioSample.'); } return this._encoder.add(audioSample, false); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * This source can be used to add audio data from an AudioBuffer to the output track. This is useful when working with * the Web Audio API. * @public */ export class AudioBufferSource extends AudioSource { /** @internal */ private _encoder: AudioEncoderWrapper; /** @internal */ private _accumulatedTime = 0; constructor(encodingConfig: AudioEncodingConfig) { validateAudioEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new AudioEncoderWrapper(this, encodingConfig); } /** * Converts an AudioBuffer to audio samples, encodes them and adds them to the output. The first AudioBuffer will * be played at timestamp 0, and any subsequent AudioBuffer will have a timestamp equal to the total duration of * all previous AudioBuffers. * * @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise * to respect writer and encoder backpressure. */ add(audioBuffer: AudioBuffer) { if (!(audioBuffer instanceof AudioBuffer)) { throw new TypeError('audioBuffer must be an AudioBuffer.'); } const audioSamples = AudioSample.fromAudioBuffer(audioBuffer, this._accumulatedTime); const promises = audioSamples.map(sample => this._encoder.add(sample, true)); this._accumulatedTime += audioBuffer.duration; return Promise.all(promises); } /** @internal */ override _flushAndClose(forceClose: boolean) { return this._encoder.flushAndClose(forceClose); } } /** * Audio source that encodes the data of a MediaStreamAudioTrack and pipes it into the output. This is useful for * capturing live or real-time audio such as microphones or audio from other media elements. Audio will automatically * start being captured once the connected Output is started, and will keep being captured until the Output is * finalized or this source is closed. * @public */ export class MediaStreamAudioTrackSource extends AudioSource { /** @internal */ private _encoder: AudioEncoderWrapper; /** @internal */ private _abortController: AbortController | null = null; /** @internal */ private _track: MediaStreamAudioTrack; /** @internal */ private _audioContext: AudioContext | null = null; /** @internal */ private _scriptProcessorNode: ScriptProcessorNode | null = null; // Deprecated but goated /** @internal */ private _promiseWithResolvers = promiseWithResolvers(); /** @internal */ private _errorPromiseAccessed = false; /** A promise that rejects upon any error within this source. This promise never resolves. */ get errorPromise() { this._errorPromiseAccessed = true; return this._promiseWithResolvers.promise; } constructor(track: MediaStreamAudioTrack, encodingConfig: AudioEncodingConfig) { if (!(track instanceof MediaStreamTrack) || track.kind !== 'audio') { throw new TypeError('track must be an audio MediaStreamTrack.'); } validateAudioEncodingConfig(encodingConfig); super(encodingConfig.codec); this._encoder = new AudioEncoderWrapper(this, encodingConfig); this._track = track; } /** @internal */ override async _start() { if (!this._errorPromiseAccessed) { console.warn( 'Make sure not to ignore the `errorPromise` field on MediaStreamVideoTrackSource, so that any internal' + ' errors get bubbled up properly.', ); } this._abortController = new AbortController(); if (typeof MediaStreamTrackProcessor !== 'undefined') { // Great, MediaStreamTrackProcessor is supported, this is the preferred way of doing things let firstAudioDataTimestamp: number | null = null; const processor = new MediaStreamTrackProcessor({ track: this._track }); const consumer = new WritableStream<AudioData>({ write: (audioData) => { if (firstAudioDataTimestamp === null) { firstAudioDataTimestamp = audioData.timestamp / 1e6; const muxer = this._connectedTrack!.output._muxer; if (muxer.firstMediaStreamTimestamp === null) { muxer.firstMediaStreamTimestamp = performance.now() / 1000; this._timestampOffset = -firstAudioDataTimestamp; } else { this._timestampOffset = (performance.now() / 1000 - muxer.firstMediaStreamTimestamp) - firstAudioDataTimestamp; } } if (this._encoder.getQueueSize() >= 4) { // Drop data if the encoder is overloaded audioData.close(); return; } void this._encoder.add(new AudioSample(audioData), true) .catch((error) => { this._abortController?.abort(); this._promiseWithResolvers.reject(error); }); }, }); processor.readable.pipeTo(consumer, { signal: this._abortController.signal, }).catch((error) => { // Handle AbortError silently if (error instanceof DOMException && error.name === 'AbortError') return; this._promiseWithResolvers.reject(error); }); } else { // Let's fall back to an AudioContext approach // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access const AudioContext = window.AudioContext || (window as any).webkitAudioContext; this._audioContext = new AudioContext({ sampleRate: this._track.getSettings().sampleRate }); const sourceNode = this._audioContext.createMediaStreamSource(new MediaStream([this._track])); this._scriptProcessorNode = this._audioContext.createScriptProcessor(4096); if (this._audioContext.state === 'suspended') { await this._audioContext.resume(); } sourceNode.connect(this._scriptProcessorNode); this._scriptProcessorNode.connect(thi