mediabunny
Version:
Pure TypeScript media toolkit for reading, writing, and converting media files, directly in the browser.
1,463 lines (1,262 loc) • 57.1 kB
text/typescript
/*!
* Copyright (c) 2025-present, Vanilagy and contributors
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
import {
AUDIO_CODECS,
AudioCodec,
buildAudioCodecString,
buildVideoCodecString,
getAudioEncoderConfigExtension,
getVideoEncoderConfigExtension,
inferCodecFromCodecString,
parsePcmCodec,
PCM_AUDIO_CODECS,
PcmAudioCodec,
Quality,
SUBTITLE_CODECS,
SubtitleCodec,
VIDEO_CODECS,
VideoCodec,
} from './codec';
import { OutputAudioTrack, OutputSubtitleTrack, OutputTrack, OutputVideoTrack } from './output';
import { assert, assertNever, CallSerializer, clamp, promiseWithResolvers, setInt24, setUint24 } from './misc';
import { Muxer } from './muxer';
import { SubtitleParser } from './subtitles';
import { toAlaw, toUlaw } from './pcm';
import {
CustomVideoEncoder,
CustomAudioEncoder,
customVideoEncoders,
customAudioEncoders,
} from './custom-coder';
import { EncodedPacket } from './packet';
import { AudioSample, VideoSample } from './sample';
/**
* Base class for media sources. Media sources are used to add media samples to an output file.
* @public
*/
export abstract class MediaSource {
/** @internal */
_connectedTrack: OutputTrack | null = null;
/** @internal */
_closingPromise: Promise<void> | null = null;
/** @internal */
_closed = false;
/**
* @internal
* A time offset in seconds that is added to all timestamps generated by this source.
*/
_timestampOffset = 0;
/** @internal */
_ensureValidAdd() {
if (!this._connectedTrack) {
throw new Error('Source is not connected to an output track.');
}
if (this._connectedTrack.output.state === 'canceled') {
throw new Error('Output has been canceled.');
}
if (this._connectedTrack.output.state === 'finalizing' || this._connectedTrack.output.state === 'finalized') {
throw new Error('Output has been finalized.');
}
if (this._connectedTrack.output.state === 'pending') {
throw new Error('Output has not started.');
}
if (this._closed) {
throw new Error('Source is closed.');
}
}
/** @internal */
async _start() {}
/** @internal */
// eslint-disable-next-line @typescript-eslint/no-unused-vars
async _flushAndClose(forceClose: boolean) {}
/**
* Closes this source. This prevents future samples from being added and signals to the output file that no further
* samples will come in for this track. Calling `.close()` is optional but recommended after adding the
* last sample - for improved performance and reduced memory usage.
*/
close() {
if (this._closingPromise) {
return;
}
const connectedTrack = this._connectedTrack;
if (!connectedTrack) {
throw new Error('Cannot call close without connecting the source to an output track.');
}
if (connectedTrack.output.state === 'pending') {
throw new Error('Cannot call close before output has been started.');
}
this._closingPromise = (async () => {
await this._flushAndClose(false);
this._closed = true;
if (connectedTrack.output.state === 'finalizing' || connectedTrack.output.state === 'finalized') {
return;
}
connectedTrack.output._muxer.onTrackClose(connectedTrack);
})();
}
/** @internal */
async _flushOrWaitForOngoingClose(forceClose: boolean) {
if (this._closingPromise) {
// Since closing also flushes, we don't want to do it twice
return this._closingPromise;
} else {
return this._flushAndClose(forceClose);
}
}
}
/**
* Base class for video sources - sources for video tracks.
* @public
*/
export abstract class VideoSource extends MediaSource {
/** @internal */
override _connectedTrack: OutputVideoTrack | null = null;
/** @internal */
_codec: VideoCodec;
constructor(codec: VideoCodec) {
super();
if (!VIDEO_CODECS.includes(codec)) {
throw new TypeError(`Invalid video codec '${codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`);
}
this._codec = codec;
}
}
/**
* The most basic video source; can be used to directly pipe encoded packets into the output file.
* @public
*/
export class EncodedVideoPacketSource extends VideoSource {
constructor(codec: VideoCodec) {
super(codec);
}
/**
* Adds an encoded packet to the output video track. Packets must be added in *decode order*, while a packet's
* timestamp must be its *presentation timestamp*. B-frames are handled automatically.
*
* @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid
* decoder config.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(packet: EncodedPacket, meta?: EncodedVideoChunkMetadata) {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
if (packet.isMetadataOnly) {
throw new TypeError('Metadata-only packets cannot be added.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('meta, when provided, must be an object.');
}
this._ensureValidAdd();
return this._connectedTrack!.output._muxer.addEncodedVideoPacket(this._connectedTrack!, packet, meta);
}
}
/**
* Configuration object that controls video encoding. Can be used to set codec, quality, and more.
* @public
*/
export type VideoEncodingConfig = {
/** The video codec that should be used for encoding the video samples (frames). */
codec: VideoCodec;
/**
* The target bitrate for the encoded video, in bits per second. Alternatively, a subjective Quality can
* be provided.
*/
bitrate: number | Quality;
/** The latency mode used by the encoder; controls the performance-quality tradeoff. */
latencyMode?: VideoEncoderConfig['latencyMode'];
/**
* The interval, in seconds, of how often frames are encoded as a key frame. The default is 5 seconds. Frequent key
* frames improve seeking behavior but increase file size. When using multiple video tracks, you should give them
* all the same key frame interval.
*/
keyFrameInterval?: number;
/**
* The full codec string as specified in the WebCodecs Codec Registry. This string must match the codec
* specified in `codec`. When not set, a fitting codec string will be constructed automatically by the library.
*/
fullCodecString?: string;
/** Called for each successfully encoded packet. Both the packet and the encoding metadata are passed. */
onEncodedPacket?: (packet: EncodedPacket, meta: EncodedVideoChunkMetadata | undefined) => unknown;
/** Called when the internal encoder config, as used by the WebCodecs API, is created. */
onEncoderConfig?: (config: VideoEncoderConfig) => unknown;
};
const validateVideoEncodingConfig = (config: VideoEncodingConfig) => {
if (!config || typeof config !== 'object') {
throw new TypeError('Encoding config must be an object.');
}
if (!VIDEO_CODECS.includes(config.codec)) {
throw new TypeError(`Invalid video codec '${config.codec}'. Must be one of: ${VIDEO_CODECS.join(', ')}.`);
}
if (!(config.bitrate instanceof Quality) && (!Number.isInteger(config.bitrate) || config.bitrate <= 0)) {
throw new TypeError('config.bitrate must be a positive integer or a quality.');
}
if (config.latencyMode !== undefined && !['quality', 'realtime'].includes(config.latencyMode)) {
throw new TypeError('config.latencyMode, when provided, must be \'quality\' or \'realtime\'.');
}
if (
config.keyFrameInterval !== undefined
&& (!Number.isFinite(config.keyFrameInterval) || config.keyFrameInterval < 0)
) {
throw new TypeError('config.keyFrameInterval, when provided, must be a non-negative number.');
}
if (config.fullCodecString !== undefined && typeof config.fullCodecString !== 'string') {
throw new TypeError('config.fullCodecString, when provided, must be a string.');
}
if (config.fullCodecString !== undefined && inferCodecFromCodecString(config.fullCodecString) !== config.codec) {
throw new TypeError(
`config.fullCodecString, when provided, must be a string that matches the specified codec`
+ ` (${config.codec}).`,
);
}
if (config.onEncodedPacket !== undefined && typeof config.onEncodedPacket !== 'function') {
throw new TypeError('config.onEncodedChunk, when provided, must be a function.');
}
if (config.onEncoderConfig !== undefined && typeof config.onEncoderConfig !== 'function') {
throw new TypeError('config.onEncoderConfig, when provided, must be a function.');
}
};
class VideoEncoderWrapper {
private ensureEncoderPromise: Promise<void> | null = null;
private encoderInitialized = false;
private encoder: VideoEncoder | null = null;
private muxer: Muxer | null = null;
private lastMultipleOfKeyFrameInterval = -1;
private lastWidth: number | null = null;
private lastHeight: number | null = null;
private customEncoder: CustomVideoEncoder | null = null;
private customEncoderCallSerializer = new CallSerializer();
private customEncoderQueueSize = 0;
/**
* Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context.
* However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught.
* So, we keep track of the encoder error and throw it as soon as we get the chance.
*/
private encoderError: Error | null = null;
constructor(private source: VideoSource, private encodingConfig: VideoEncodingConfig) {}
async add(videoSample: VideoSample, shouldClose: boolean, encodeOptions?: VideoEncoderEncodeOptions) {
try {
this.checkForEncoderError();
this.source._ensureValidAdd();
// Ensure video sample size remains constant
if (this.lastWidth !== null && this.lastHeight !== null) {
if (videoSample.codedWidth !== this.lastWidth || videoSample.codedHeight !== this.lastHeight) {
throw new Error(
`Video sample size must remain constant. Expected ${this.lastWidth}x${this.lastHeight},`
+ ` got ${videoSample.codedWidth}x${videoSample.codedHeight}.`,
);
}
} else {
this.lastWidth = videoSample.codedWidth;
this.lastHeight = videoSample.codedHeight;
}
if (!this.encoderInitialized) {
if (!this.ensureEncoderPromise) {
void this.ensureEncoder(videoSample);
}
// No, this "if" statement is not useless. Sometimes, the above call to `ensureEncoder` might have
// synchronously completed and the encoder is already initialized. In this case, we don't need to await
// the promise anymore. This also fixes nasty async race condition bugs when multiple code paths are
// calling this method: It's important that the call that initialized the encoder go through this
// code first.
if (!this.encoderInitialized) {
await this.ensureEncoderPromise;
}
}
assert(this.encoderInitialized);
const keyFrameInterval = this.encodingConfig.keyFrameInterval ?? 5;
const multipleOfKeyFrameInterval = Math.floor(videoSample.timestamp / keyFrameInterval);
// Ensure a key frame every keyFrameInterval seconds. It is important that all video tracks follow the same
// "key frame" rhythm, because aligned key frames are required to start new fragments in ISOBMFF or clusters
// in Matroska (or at least desirable).
const finalEncodeOptions = {
...encodeOptions,
keyFrame: encodeOptions?.keyFrame
|| keyFrameInterval === 0
|| multipleOfKeyFrameInterval !== this.lastMultipleOfKeyFrameInterval,
};
this.lastMultipleOfKeyFrameInterval = multipleOfKeyFrameInterval;
if (this.customEncoder) {
this.customEncoderQueueSize++;
// We clone the sample so it cannot be closed on us from the outside before it reaches the encoder
const clonedSample = videoSample.clone();
const promise = this.customEncoderCallSerializer
.call(() => this.customEncoder!.encode(clonedSample, finalEncodeOptions))
.then(() => this.customEncoderQueueSize--)
.catch((error: Error) => this.encoderError ??= error)
.finally(() => {
clonedSample.close();
// `videoSample` gets closed in the finally block at the end of the method
});
if (this.customEncoderQueueSize >= 4) {
await promise;
}
} else {
assert(this.encoder);
const videoFrame = videoSample.toVideoFrame();
this.encoder.encode(videoFrame, finalEncodeOptions);
videoFrame.close();
if (shouldClose) {
videoSample.close();
}
// We need to do this after sending the frame to the encoder as the frame otherwise might be closed
if (this.encoder.encodeQueueSize >= 4) {
await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true }));
}
}
await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure
} finally {
if (shouldClose) {
// Make sure it's always closed, even if there was an error
videoSample.close();
}
}
}
private async ensureEncoder(videoSample: VideoSample) {
if (this.encoder) {
return;
}
return this.ensureEncoderPromise = (async () => {
const width = videoSample.codedWidth;
const height = videoSample.codedHeight;
const bitrate = this.encodingConfig.bitrate instanceof Quality
? this.encodingConfig.bitrate._toVideoBitrate(this.encodingConfig.codec, width, height)
: this.encodingConfig.bitrate;
const encoderConfig: VideoEncoderConfig = {
codec: this.encodingConfig.fullCodecString ?? buildVideoCodecString(
this.encodingConfig.codec,
width,
height,
bitrate,
),
width,
height,
bitrate,
framerate: this.source._connectedTrack?.metadata.frameRate,
latencyMode: this.encodingConfig.latencyMode,
...getVideoEncoderConfigExtension(this.encodingConfig.codec),
};
this.encodingConfig.onEncoderConfig?.(encoderConfig);
const MatchingCustomEncoder = customVideoEncoders.find(x => x.supports(
this.encodingConfig.codec,
encoderConfig,
));
if (MatchingCustomEncoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customEncoder = new MatchingCustomEncoder() as CustomVideoEncoder;
// @ts-expect-error It's technically readonly
this.customEncoder.codec = this.encodingConfig.codec;
// @ts-expect-error It's technically readonly
this.customEncoder.config = encoderConfig;
// @ts-expect-error It's technically readonly
this.customEncoder.onPacket = (packet, meta) => {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('The second argument passed to onPacket must be an object or undefined.');
}
this.encodingConfig.onEncodedPacket?.(packet, meta);
void this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta);
};
await this.customEncoder.init();
} else {
if (typeof VideoEncoder === 'undefined') {
throw new Error('VideoEncoder is not supported by this browser.');
}
const support = await VideoEncoder.isConfigSupported(encoderConfig);
if (!support.supported) {
throw new Error(
`This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,`
+ ` ${encoderConfig.width}x${encoderConfig.height}) is not supported by this browser. Consider`
+ ` using another codec or changing your video parameters.`,
);
}
this.encoder = new VideoEncoder({
output: (chunk, meta) => {
const packet = EncodedPacket.fromEncodedChunk(chunk);
this.encodingConfig.onEncodedPacket?.(packet, meta);
void this.muxer!.addEncodedVideoPacket(this.source._connectedTrack!, packet, meta);
},
error: (error) => {
error.stack = new Error().stack; // Provide a more useful stack trace
this.encoderError ??= error;
},
});
this.encoder.configure(encoderConfig);
}
assert(this.source._connectedTrack);
this.muxer = this.source._connectedTrack.output._muxer;
this.encoderInitialized = true;
})();
}
async flushAndClose(forceClose: boolean) {
this.checkForEncoderError();
if (this.customEncoder) {
if (!forceClose) {
void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush());
}
await this.customEncoderCallSerializer.call(() => this.customEncoder!.close());
} else if (this.encoder) {
if (!forceClose) {
await this.encoder.flush();
}
this.encoder.close();
}
this.checkForEncoderError();
}
getQueueSize() {
if (this.customEncoder) {
return this.customEncoderQueueSize;
} else {
return this.encoder?.encodeQueueSize ?? 0;
}
}
checkForEncoderError() {
if (this.encoderError) {
throw this.encoderError;
}
}
}
/**
* This source can be used to add raw, unencoded video samples (frames) to an output video track. These frames will
* automatically be encoded and then piped into the output.
* @public
*/
export class VideoSampleSource extends VideoSource {
/** @internal */
private _encoder: VideoEncoderWrapper;
constructor(encodingConfig: VideoEncodingConfig) {
validateVideoEncodingConfig(encodingConfig);
super(encodingConfig.codec);
this._encoder = new VideoEncoderWrapper(this, encodingConfig);
}
/**
* Encodes a video sample (frame) and then adds it to the output.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(videoSample: VideoSample, encodeOptions?: VideoEncoderEncodeOptions) {
if (!(videoSample instanceof VideoSample)) {
throw new TypeError('videoSample must be a VideoSample.');
}
return this._encoder.add(videoSample, false, encodeOptions);
}
/** @internal */
override _flushAndClose(forceClose: boolean) {
return this._encoder.flushAndClose(forceClose);
}
}
/**
* This source can be used to add video frames to the output track from a fixed canvas element. Since canvases are often
* used for rendering, this source provides a convenient wrapper around VideoSampleSource.
* @public
*/
export class CanvasSource extends VideoSource {
/** @internal */
private _encoder: VideoEncoderWrapper;
/** @internal */
private _canvas: HTMLCanvasElement | OffscreenCanvas;
constructor(canvas: HTMLCanvasElement | OffscreenCanvas, encodingConfig: VideoEncodingConfig) {
if (
!(typeof HTMLCanvasElement !== 'undefined' && canvas instanceof HTMLCanvasElement)
&& !(typeof OffscreenCanvas !== 'undefined' && canvas instanceof OffscreenCanvas)
) {
throw new TypeError('canvas must be an HTMLCanvasElement or OffscreenCanvas.');
}
validateVideoEncodingConfig(encodingConfig);
super(encodingConfig.codec);
this._encoder = new VideoEncoderWrapper(this, encodingConfig);
this._canvas = canvas;
}
/**
* Captures the current canvas state as a video sample (frame), encodes it and adds it to the output.
*
* @param timestamp - The timestamp of the sample, in seconds.
* @param duration - The duration of the sample, in seconds.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(timestamp: number, duration = 0, encodeOptions?: VideoEncoderEncodeOptions) {
if (!Number.isFinite(timestamp) || timestamp < 0) {
throw new TypeError('timestamp must be a non-negative number.');
}
if (!Number.isFinite(duration) || duration < 0) {
throw new TypeError('duration must be a non-negative number.');
}
const sample = new VideoSample(this._canvas, { timestamp, duration });
return this._encoder.add(sample, true, encodeOptions);
}
/** @internal */
override _flushAndClose(forceClose: boolean) {
return this._encoder.flushAndClose(forceClose);
}
}
/**
* Video source that encodes the frames of a MediaStreamVideoTrack and pipes them into the output. This is useful for
* capturing live or real-time data such as webcams or screen captures. Frames will automatically start being captured
* once the connected Output is started, and will keep being captured until the Output is finalized or this source
* is closed.
* @public
*/
export class MediaStreamVideoTrackSource extends VideoSource {
/** @internal */
private _encoder: VideoEncoderWrapper;
/** @internal */
private _abortController: AbortController | null = null;
/** @internal */
private _track: MediaStreamVideoTrack;
/** @internal */
private _workerTrackId: number | null = null;
/** @internal */
private _workerListener: ((event: MessageEvent) => void) | null = null;
/** @internal */
private _promiseWithResolvers = promiseWithResolvers();
/** @internal */
private _errorPromiseAccessed = false;
/** A promise that rejects upon any error within this source. This promise never resolves. */
get errorPromise() {
this._errorPromiseAccessed = true;
return this._promiseWithResolvers.promise;
}
constructor(track: MediaStreamVideoTrack, encodingConfig: VideoEncodingConfig) {
if (!(track instanceof MediaStreamTrack) || track.kind !== 'video') {
throw new TypeError('track must be a video MediaStreamTrack.');
}
validateVideoEncodingConfig(encodingConfig);
encodingConfig = {
...encodingConfig,
latencyMode: 'realtime',
};
super(encodingConfig.codec);
this._encoder = new VideoEncoderWrapper(this, encodingConfig);
this._track = track;
}
/** @internal */
override async _start() {
if (!this._errorPromiseAccessed) {
console.warn(
'Make sure not to ignore the `errorPromise` field on MediaStreamVideoTrackSource, so that any internal'
+ ' errors get bubbled up properly.',
);
}
this._abortController = new AbortController();
let firstVideoFrameTimestamp: number | null = null;
let errored = false;
const onVideoFrame = (videoFrame: VideoFrame) => {
if (errored) {
videoFrame.close();
return;
}
if (firstVideoFrameTimestamp === null) {
firstVideoFrameTimestamp = videoFrame.timestamp / 1e6;
const muxer = this._connectedTrack!.output._muxer;
if (muxer.firstMediaStreamTimestamp === null) {
muxer.firstMediaStreamTimestamp = performance.now() / 1000;
this._timestampOffset = -firstVideoFrameTimestamp;
} else {
this._timestampOffset = (performance.now() / 1000 - muxer.firstMediaStreamTimestamp)
- firstVideoFrameTimestamp;
}
}
if (this._encoder.getQueueSize() >= 4) {
// Drop frames if the encoder is overloaded
videoFrame.close();
return;
}
void this._encoder.add(new VideoSample(videoFrame), true)
.catch((error) => {
errored = true;
this._abortController?.abort();
this._promiseWithResolvers.reject(error);
if (this._workerTrackId !== null) {
// Tell the worker to stop the track
sendMessageToMediaStreamTrackProcessorWorker({
type: 'stopTrack',
trackId: this._workerTrackId,
});
}
});
};
if (typeof MediaStreamTrackProcessor !== 'undefined') {
// We can do it here directly, perfect
const processor = new MediaStreamTrackProcessor({ track: this._track });
const consumer = new WritableStream<VideoFrame>({ write: onVideoFrame });
processor.readable.pipeTo(consumer, {
signal: this._abortController.signal,
}).catch((error) => {
// Handle AbortError silently
if (error instanceof DOMException && error.name === 'AbortError') return;
this._promiseWithResolvers.reject(error);
});
} else {
// It might still be supported in a worker, so let's check that
const supportedInWorker = await mediaStreamTrackProcessorIsSupportedInWorker();
if (supportedInWorker) {
this._workerTrackId = nextMediaStreamTrackProcessorWorkerId++;
sendMessageToMediaStreamTrackProcessorWorker({
type: 'videoTrack',
trackId: this._workerTrackId,
track: this._track,
}, [this._track]);
this._workerListener = (event: MessageEvent) => {
const message = event.data as MediaStreamTrackProcessorWorkerMessage;
if (message.type === 'videoFrame' && message.trackId === this._workerTrackId) {
onVideoFrame(message.videoFrame);
} else if (message.type === 'error' && message.trackId === this._workerTrackId) {
this._promiseWithResolvers.reject(message.error);
}
};
mediaStreamTrackProcessorWorker!.addEventListener('message', this._workerListener);
} else {
throw new Error('MediaStreamTrackProcessor is required but not supported by this browser.');
}
}
}
/** @internal */
override async _flushAndClose(forceClose: boolean) {
if (this._abortController) {
this._abortController.abort();
this._abortController = null;
}
if (this._workerTrackId !== null) {
assert(this._workerListener);
sendMessageToMediaStreamTrackProcessorWorker({
type: 'stopTrack',
trackId: this._workerTrackId,
});
// Wait for the worker to stop the track
await new Promise<void>((resolve) => {
const listener = (event: MessageEvent) => {
const message = event.data as MediaStreamTrackProcessorWorkerMessage;
if (message.type === 'trackStopped' && message.trackId === this._workerTrackId) {
assert(this._workerListener);
mediaStreamTrackProcessorWorker!.removeEventListener('message', this._workerListener);
mediaStreamTrackProcessorWorker!.removeEventListener('message', listener);
resolve();
}
};
mediaStreamTrackProcessorWorker!.addEventListener('message', listener);
});
}
await this._encoder.flushAndClose(forceClose);
}
}
/**
* Base class for audio sources - sources for audio tracks.
* @public
*/
export abstract class AudioSource extends MediaSource {
/** @internal */
override _connectedTrack: OutputAudioTrack | null = null;
/** @internal */
_codec: AudioCodec;
constructor(codec: AudioCodec) {
super();
if (!AUDIO_CODECS.includes(codec)) {
throw new TypeError(`Invalid audio codec '${codec}'. Must be one of: ${AUDIO_CODECS.join(', ')}.`);
}
this._codec = codec;
}
}
/**
* The most basic audio source; can be used to directly pipe encoded packets into the output file.
* @public
*/
export class EncodedAudioPacketSource extends AudioSource {
constructor(codec: AudioCodec) {
super(codec);
}
/**
* Adds an encoded packet to the output audio track. Packets must be added in *decode order*.
*
* @param meta - Additional metadata from the encoder. You should pass this for the first call, including a valid
* decoder config.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(packet: EncodedPacket, meta?: EncodedAudioChunkMetadata) {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('packet must be an EncodedPacket.');
}
if (packet.isMetadataOnly) {
throw new TypeError('Metadata-only packets cannot be added.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('meta, when provided, must be an object.');
}
this._ensureValidAdd();
return this._connectedTrack!.output._muxer.addEncodedAudioPacket(this._connectedTrack!, packet, meta);
}
}
/**
* Configuration object that controls audio encoding. Can be used to set codec, quality, and more.
* @public
*/
export type AudioEncodingConfig = {
/** The audio codec that should be used for encoding the audio samples. */
codec: AudioCodec;
/**
* The target bitrate for the encoded audio, in bits per second. Alternatively, a subjective Quality can
* be provided. Required for compressed audio codecs, unused for PCM codecs.
*/
bitrate?: number | Quality;
/**
* The full codec string as specified in the WebCodecs Codec Registry. This string must match the codec
* specified in `codec`. When not set, a fitting codec string will be constructed automatically by the library.
*/
fullCodecString?: string;
/** Called for each successfully encoded packet. Both the packet and the encoding metadata are passed. */
onEncodedPacket?: (packet: EncodedPacket, meta: EncodedAudioChunkMetadata | undefined) => unknown;
/** Called when the internal encoder config, as used by the WebCodecs API, is created. */
onEncoderConfig?: (config: AudioEncoderConfig) => unknown;
};
const validateAudioEncodingConfig = (config: AudioEncodingConfig) => {
if (!config || typeof config !== 'object') {
throw new TypeError('Encoding config must be an object.');
}
if (!AUDIO_CODECS.includes(config.codec)) {
throw new TypeError(`Invalid audio codec '${config.codec}'. Must be one of: ${AUDIO_CODECS.join(', ')}.`);
}
if (
config.bitrate === undefined
&& (!(PCM_AUDIO_CODECS as readonly string[]).includes(config.codec) || config.codec === 'flac')
) {
throw new TypeError('config.bitrate must be provided for compressed audio codecs.');
}
if (
config.bitrate !== undefined
&& !(config.bitrate instanceof Quality)
&& (!Number.isInteger(config.bitrate) || config.bitrate <= 0)
) {
throw new TypeError('config.bitrate, when provided, must be a positive integer or a quality.');
}
if (config.fullCodecString !== undefined && typeof config.fullCodecString !== 'string') {
throw new TypeError('config.fullCodecString, when provided, must be a string.');
}
if (config.fullCodecString !== undefined && inferCodecFromCodecString(config.fullCodecString) !== config.codec) {
throw new TypeError(
`config.fullCodecString, when provided, must be a string that matches the specified codec`
+ ` (${config.codec}).`,
);
}
if (config.onEncodedPacket !== undefined && typeof config.onEncodedPacket !== 'function') {
throw new TypeError('config.onEncodedChunk, when provided, must be a function.');
}
if (config.onEncoderConfig !== undefined && typeof config.onEncoderConfig !== 'function') {
throw new TypeError('config.onEncoderConfig, when provided, must be a function.');
}
};
class AudioEncoderWrapper {
private ensureEncoderPromise: Promise<void> | null = null;
private encoderInitialized = false;
private encoder: AudioEncoder | null = null;
private muxer: Muxer | null = null;
private lastNumberOfChannels: number | null = null;
private lastSampleRate: number | null = null;
private isPcmEncoder = false;
private outputSampleSize: number | null = null;
private writeOutputValue: ((view: DataView, byteOffset: number, value: number) => void) | null = null;
private customEncoder: CustomAudioEncoder | null = null;
private customEncoderCallSerializer = new CallSerializer();
private customEncoderQueueSize = 0;
/**
* Encoders typically throw their errors "out of band", meaning asynchronously in some other execution context.
* However, we want to surface these errors to the user within the normal control flow, so they don't go uncaught.
* So, we keep track of the encoder error and throw it as soon as we get the chance.
*/
private encoderError: Error | null = null;
constructor(private source: AudioSource, private encodingConfig: AudioEncodingConfig) {}
async add(audioSample: AudioSample, shouldClose: boolean) {
try {
this.checkForEncoderError();
this.source._ensureValidAdd();
// Ensure audio parameters remain constant
if (this.lastNumberOfChannels !== null && this.lastSampleRate !== null) {
if (
audioSample.numberOfChannels !== this.lastNumberOfChannels
|| audioSample.sampleRate !== this.lastSampleRate
) {
throw new Error(
`Audio parameters must remain constant. Expected ${this.lastNumberOfChannels} channels at`
+ ` ${this.lastSampleRate} Hz, got ${audioSample.numberOfChannels} channels at`
+ ` ${audioSample.sampleRate} Hz.`,
);
}
} else {
this.lastNumberOfChannels = audioSample.numberOfChannels;
this.lastSampleRate = audioSample.sampleRate;
}
if (!this.encoderInitialized) {
if (!this.ensureEncoderPromise) {
void this.ensureEncoder(audioSample);
}
// No, this "if" statement is not useless. Sometimes, the above call to `ensureEncoder` might have
// synchronously completed and the encoder is already initialized. In this case, we don't need to await
// the promise anymore. This also fixes nasty async race condition bugs when multiple code paths are
// calling this method: It's important that the call that initialized the encoder go through this
// code first.
if (!this.encoderInitialized) {
await this.ensureEncoderPromise;
}
}
assert(this.encoderInitialized);
if (this.customEncoder) {
this.customEncoderQueueSize++;
// We clone the sample so it cannot be closed on us from the outside before it reaches the encoder
const clonedSample = audioSample.clone();
const promise = this.customEncoderCallSerializer
.call(() => this.customEncoder!.encode(clonedSample))
.then(() => this.customEncoderQueueSize--)
.catch((error: Error) => this.encoderError ??= error)
.finally(() => {
clonedSample.close();
// `audioSample` gets closed in the finally block at the end of the method
});
if (this.customEncoderQueueSize >= 4) {
await promise;
}
await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure
} else if (this.isPcmEncoder) {
await this.doPcmEncoding(audioSample, shouldClose);
} else {
assert(this.encoder);
const audioData = audioSample.toAudioData();
this.encoder.encode(audioData);
audioData.close();
if (shouldClose) {
audioSample.close();
}
if (this.encoder.encodeQueueSize >= 4) {
await new Promise(resolve => this.encoder!.addEventListener('dequeue', resolve, { once: true }));
}
await this.muxer!.mutex.currentPromise; // Allow the writer to apply backpressure
}
} finally {
if (shouldClose) {
// Make sure it's always closed, even if there was an error
audioSample.close();
}
}
}
private async doPcmEncoding(audioSample: AudioSample, shouldClose: boolean) {
assert(this.outputSampleSize);
assert(this.writeOutputValue);
// Need to extract data from the audio data before we close it
const { numberOfChannels, numberOfFrames, sampleRate, timestamp } = audioSample;
const CHUNK_SIZE = 2048;
const outputs: {
frameCount: number;
view: DataView;
}[] = [];
// Prepare all of the output buffers, each being bounded by CHUNK_SIZE so we don't generate huge packets
for (let frame = 0; frame < numberOfFrames; frame += CHUNK_SIZE) {
const frameCount = Math.min(CHUNK_SIZE, audioSample.numberOfFrames - frame);
const outputSize = frameCount * numberOfChannels * this.outputSampleSize;
const outputBuffer = new ArrayBuffer(outputSize);
const outputView = new DataView(outputBuffer);
outputs.push({ frameCount, view: outputView });
}
const allocationSize = audioSample.allocationSize(({ planeIndex: 0, format: 'f32-planar' }));
const floats = new Float32Array(allocationSize / Float32Array.BYTES_PER_ELEMENT);
for (let i = 0; i < numberOfChannels; i++) {
audioSample.copyTo(floats, { planeIndex: i, format: 'f32-planar' });
for (let j = 0; j < outputs.length; j++) {
const { frameCount, view } = outputs[j]!;
for (let k = 0; k < frameCount; k++) {
this.writeOutputValue(
view,
(k * numberOfChannels + i) * this.outputSampleSize,
floats[j * CHUNK_SIZE + k]!,
);
}
}
}
if (shouldClose) {
audioSample.close();
}
const meta: EncodedAudioChunkMetadata = {
decoderConfig: {
codec: this.encodingConfig.codec,
numberOfChannels,
sampleRate,
},
};
for (let i = 0; i < outputs.length; i++) {
const { frameCount, view } = outputs[i]!;
const outputBuffer = view.buffer;
const startFrame = i * CHUNK_SIZE;
const packet = new EncodedPacket(
new Uint8Array(outputBuffer),
'key',
timestamp + startFrame / sampleRate,
frameCount / sampleRate,
);
this.encodingConfig.onEncodedPacket?.(packet, meta);
await this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta); // With backpressure
}
}
private ensureEncoder(audioSample: AudioSample) {
if (this.encoderInitialized) {
return;
}
return this.ensureEncoderPromise = (async () => {
const { numberOfChannels, sampleRate } = audioSample;
const bitrate = this.encodingConfig.bitrate instanceof Quality
? this.encodingConfig.bitrate._toAudioBitrate(this.encodingConfig.codec)
: this.encodingConfig.bitrate;
const encoderConfig: AudioEncoderConfig = {
codec: this.encodingConfig.fullCodecString ?? buildAudioCodecString(
this.encodingConfig.codec,
numberOfChannels,
sampleRate,
),
numberOfChannels,
sampleRate,
bitrate,
...getAudioEncoderConfigExtension(this.encodingConfig.codec),
};
this.encodingConfig.onEncoderConfig?.(encoderConfig);
const MatchingCustomEncoder = customAudioEncoders.find(x => x.supports(
this.encodingConfig.codec,
encoderConfig,
));
if (MatchingCustomEncoder) {
// @ts-expect-error "Can't create instance of abstract class 🤓"
this.customEncoder = new MatchingCustomEncoder() as CustomAudioEncoder;
// @ts-expect-error It's technically readonly
this.customEncoder.codec = this.encodingConfig.codec;
// @ts-expect-error It's technically readonly
this.customEncoder.config = encoderConfig;
// @ts-expect-error It's technically readonly
this.customEncoder.onPacket = (packet, meta) => {
if (!(packet instanceof EncodedPacket)) {
throw new TypeError('The first argument passed to onPacket must be an EncodedPacket.');
}
if (meta !== undefined && (!meta || typeof meta !== 'object')) {
throw new TypeError('The second argument passed to onPacket must be an object or undefined.');
}
this.encodingConfig.onEncodedPacket?.(packet, meta);
void this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta);
};
await this.customEncoder.init();
} else if ((PCM_AUDIO_CODECS as readonly string[]).includes(this.encodingConfig.codec)) {
this.initPcmEncoder();
} else {
if (typeof AudioEncoder === 'undefined') {
throw new Error('AudioEncoder is not supported by this browser.');
}
const support = await AudioEncoder.isConfigSupported(encoderConfig);
if (!support.supported) {
throw new Error(
`This specific encoder configuration (${encoderConfig.codec}, ${encoderConfig.bitrate} bps,`
+ ` ${encoderConfig.numberOfChannels} channels, ${encoderConfig.sampleRate} Hz) is not`
+ ` supported by this browser. Consider using another codec or changing your audio parameters.`,
);
}
this.encoder = new AudioEncoder({
output: (chunk, meta) => {
const packet = EncodedPacket.fromEncodedChunk(chunk);
this.encodingConfig.onEncodedPacket?.(packet, meta);
void this.muxer!.addEncodedAudioPacket(this.source._connectedTrack!, packet, meta);
},
error: (error) => {
error.stack = new Error().stack; // Provide a more useful stack trace
this.encoderError ??= error;
},
});
this.encoder.configure(encoderConfig);
}
assert(this.source._connectedTrack);
this.muxer = this.source._connectedTrack.output._muxer;
this.encoderInitialized = true;
})();
}
private initPcmEncoder() {
this.isPcmEncoder = true;
const codec = this.encodingConfig.codec as PcmAudioCodec;
const { dataType, sampleSize, littleEndian } = parsePcmCodec(codec);
this.outputSampleSize = sampleSize;
// All these functions receive a float sample as input and map it into the desired format
switch (sampleSize) {
case 1: {
if (dataType === 'unsigned') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setUint8(byteOffset, clamp((value + 1) * 127.5, 0, 255));
} else if (dataType === 'signed') {
this.writeOutputValue = (view, byteOffset, value) => {
view.setInt8(byteOffset, clamp(Math.round(value * 128), -128, 127));
};
} else if (dataType === 'ulaw') {
this.writeOutputValue = (view, byteOffset, value) => {
const int16 = clamp(Math.floor(value * 32767), -32768, 32767);
view.setUint8(byteOffset, toUlaw(int16));
};
} else if (dataType === 'alaw') {
this.writeOutputValue = (view, byteOffset, value) => {
const int16 = clamp(Math.floor(value * 32767), -32768, 32767);
view.setUint8(byteOffset, toAlaw(int16));
};
} else {
assert(false);
}
}; break;
case 2: {
if (dataType === 'unsigned') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setUint16(byteOffset, clamp((value + 1) * 32767.5, 0, 65535), littleEndian);
} else if (dataType === 'signed') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setInt16(byteOffset, clamp(Math.round(value * 32767), -32768, 32767), littleEndian);
} else {
assert(false);
}
}; break;
case 3: {
if (dataType === 'unsigned') {
this.writeOutputValue = (view, byteOffset, value) =>
setUint24(view, byteOffset, clamp((value + 1) * 8388607.5, 0, 16777215), littleEndian);
} else if (dataType === 'signed') {
this.writeOutputValue = (view, byteOffset, value) =>
setInt24(
view,
byteOffset,
clamp(Math.round(value * 8388607), -8388608, 8388607),
littleEndian,
);
} else {
assert(false);
}
}; break;
case 4: {
if (dataType === 'unsigned') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setUint32(byteOffset, clamp((value + 1) * 2147483647.5, 0, 4294967295), littleEndian);
} else if (dataType === 'signed') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setInt32(
byteOffset,
clamp(Math.round(value * 2147483647), -2147483648, 2147483647),
littleEndian,
);
} else if (dataType === 'float') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setFloat32(byteOffset, value, littleEndian);
} else {
assert(false);
}
}; break;
case 8: {
if (dataType === 'float') {
this.writeOutputValue = (view, byteOffset, value) =>
view.setFloat64(byteOffset, value, littleEndian);
} else {
assert(false);
}
}; break;
default: {
assertNever(sampleSize);
assert(false);
};
}
}
async flushAndClose(forceClose: boolean) {
this.checkForEncoderError();
if (this.customEncoder) {
if (!forceClose) {
void this.customEncoderCallSerializer.call(() => this.customEncoder!.flush());
}
await this.customEncoderCallSerializer.call(() => this.customEncoder!.close());
} else if (this.encoder) {
if (!forceClose) {
await this.encoder.flush();
}
this.encoder.close();
}
this.checkForEncoderError();
}
getQueueSize() {
if (this.customEncoder) {
return this.customEncoderQueueSize;
} else if (this.isPcmEncoder) {
return 0;
} else {
return this.encoder?.encodeQueueSize ?? 0;
}
}
checkForEncoderError() {
if (this.encoderError) {
throw this.encoderError;
}
}
}
/**
* This source can be used to add raw, unencoded audio samples to an output audio track. These samples will
* automatically be encoded and then piped into the output.
* @public
*/
export class AudioSampleSource extends AudioSource {
/** @internal */
private _encoder: AudioEncoderWrapper;
constructor(encodingConfig: AudioEncodingConfig) {
validateAudioEncodingConfig(encodingConfig);
super(encodingConfig.codec);
this._encoder = new AudioEncoderWrapper(this, encodingConfig);
}
/**
* Encodes an audio sample and then adds it to the output.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(audioSample: AudioSample) {
if (!(audioSample instanceof AudioSample)) {
throw new TypeError('audioSample must be an AudioSample.');
}
return this._encoder.add(audioSample, false);
}
/** @internal */
override _flushAndClose(forceClose: boolean) {
return this._encoder.flushAndClose(forceClose);
}
}
/**
* This source can be used to add audio data from an AudioBuffer to the output track. This is useful when working with
* the Web Audio API.
* @public
*/
export class AudioBufferSource extends AudioSource {
/** @internal */
private _encoder: AudioEncoderWrapper;
/** @internal */
private _accumulatedTime = 0;
constructor(encodingConfig: AudioEncodingConfig) {
validateAudioEncodingConfig(encodingConfig);
super(encodingConfig.codec);
this._encoder = new AudioEncoderWrapper(this, encodingConfig);
}
/**
* Converts an AudioBuffer to audio samples, encodes them and adds them to the output. The first AudioBuffer will
* be played at timestamp 0, and any subsequent AudioBuffer will have a timestamp equal to the total duration of
* all previous AudioBuffers.
*
* @returns A Promise that resolves once the output is ready to receive more samples. You should await this Promise
* to respect writer and encoder backpressure.
*/
add(audioBuffer: AudioBuffer) {
if (!(audioBuffer instanceof AudioBuffer)) {
throw new TypeError('audioBuffer must be an AudioBuffer.');
}
const audioSamples = AudioSample.fromAudioBuffer(audioBuffer, this._accumulatedTime);
const promises = audioSamples.map(sample => this._encoder.add(sample, true));
this._accumulatedTime += audioBuffer.duration;
return Promise.all(promises);
}
/** @internal */
override _flushAndClose(forceClose: boolean) {
return this._encoder.flushAndClose(forceClose);
}
}
/**
* Audio source that encodes the data of a MediaStreamAudioTrack and pipes it into the output. This is useful for
* capturing live or real-time audio such as microphones or audio from other media elements. Audio will automatically
* start being captured once the connected Output is started, and will keep being captured until the Output is
* finalized or this source is closed.
* @public
*/
export class MediaStreamAudioTrackSource extends AudioSource {
/** @internal */
private _encoder: AudioEncoderWrapper;
/** @internal */
private _abortController: AbortController | null = null;
/** @internal */
private _track: MediaStreamAudioTrack;
/** @internal */
private _audioContext: AudioContext | null = null;
/** @internal */
private _scriptProcessorNode: ScriptProcessorNode | null = null; // Deprecated but goated
/** @internal */
private _promiseWithResolvers = promiseWithResolvers();
/** @internal */
private _errorPromiseAccessed = false;
/** A promise that rejects upon any error within this source. This promise never resolves. */
get errorPromise() {
this._errorPromiseAccessed = true;
return this._promiseWithResolvers.promise;
}
constructor(track: MediaStreamAudioTrack, encodingConfig: AudioEncodingConfig) {
if (!(track instanceof MediaStreamTrack) || track.kind !== 'audio') {
throw new TypeError('track must be an audio MediaStreamTrack.');
}
validateAudioEncodingConfig(encodingConfig);
super(encodingConfig.codec);
this._encoder = new AudioEncoderWrapper(this, encodingConfig);
this._track = track;
}
/** @internal */
override async _start() {
if (!this._errorPromiseAccessed) {
console.warn(
'Make sure not to ignore the `errorPromise` field on MediaStreamVideoTrackSource, so that any internal'
+ ' errors get bubbled up properly.',
);
}
this._abortController = new AbortController();
if (typeof MediaStreamTrackProcessor !== 'undefined') {
// Great, MediaStreamTrackProcessor is supported, this is the preferred way of doing things
let firstAudioDataTimestamp: number | null = null;
const processor = new MediaStreamTrackProcessor({ track: this._track });
const consumer = new WritableStream<AudioData>({
write: (audioData) => {
if (firstAudioDataTimestamp === null) {
firstAudioDataTimestamp = audioData.timestamp / 1e6;
const muxer = this._connectedTrack!.output._muxer;
if (muxer.firstMediaStreamTimestamp === null) {
muxer.firstMediaStreamTimestamp = performance.now() / 1000;
this._timestampOffset = -firstAudioDataTimestamp;
} else {
this._timestampOffset = (performance.now() / 1000 - muxer.firstMediaStreamTimestamp)
- firstAudioDataTimestamp;
}
}
if (this._encoder.getQueueSize() >= 4) {
// Drop data if the encoder is overloaded
audioData.close();
return;
}
void this._encoder.add(new AudioSample(audioData), true)
.catch((error) => {
this._abortController?.abort();
this._promiseWithResolvers.reject(error);
});
},
});
processor.readable.pipeTo(consumer, {
signal: this._abortController.signal,
}).catch((error) => {
// Handle AbortError silently
if (error instanceof DOMException && error.name === 'AbortError') return;
this._promiseWithResolvers.reject(error);
});
} else {
// Let's fall back to an AudioContext approach
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-member-access
const AudioContext = window.AudioContext || (window as any).webkitAudioContext;
this._audioContext = new AudioContext({ sampleRate: this._track.getSettings().sampleRate });
const sourceNode = this._audioContext.createMediaStreamSource(new MediaStream([this._track]));
this._scriptProcessorNode = this._audioContext.createScriptProcessor(4096);
if (this._audioContext.state === 'suspended') {
await this._audioContext.resume();
}
sourceNode.connect(this._scriptProcessorNode);
this._scriptProcessorNode.connect(thi