libavjs-webcodecs-polyfill
Version:
A WebCodecs polyfill (ponyfill, really), using libav.js
618 lines (507 loc) • 23 kB
text/typescript
/*
* This file is part of the libav.js WebCodecs Polyfill implementation. The
* interface implemented is derived from the W3C standard. No attribution is
* required when using this library.
*
* Copyright (c) 2021-2024 Yahweasel
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
import * as ad from "./audio-data";
import * as adec from "./audio-decoder";
import * as eac from "./encoded-audio-chunk";
import * as et from "./event-target";
import * as libavs from "./avloader";
import * as misc from "./misc";
import type * as LibAVJS from "@libav.js/types";
export class AudioEncoder extends et.DequeueEventTarget {
constructor(init: AudioEncoderInit) {
super();
// 1. Let e be a new AudioEncoder object.
// 2. Assign a new queue to [[control message queue]].
this._p = Promise.all([]);
// 3. Assign false to [[message queue blocked]].
// (unused in polyfill)
// 4. Assign null to [[codec implementation]].
this._libav = null;
this._codec = this._c = this._frame = this._pkt = 0;
this._filter_in_ctx = this._filter_out_ctx = null;
this._filter_graph = this._buffersrc_ctx = this._buffersink_ctx = 0;
/* 5. Assign the result of starting a new parallel queue to
* [[codec work queue]]. */
// (shared queue)
// 6. Assign false to [[codec saturated]].
// (saturation unneeded in the polyfill)
// 7. Assign init.output to [[output callback]].
this._output = init.output;
// 8. Assign init.error to [[error callback]].
this._error = init.error;
// 9. Assign null to [[active encoder config]].
// 10. Assign null to [[active output config]].
// (both part of the codec)
// 11. Assign "unconfigured" to [[state]]
this.state = "unconfigured";
// 12. Assign 0 to [[encodeQueueSize]].
this.encodeQueueSize = 0;
// 13. Assign a new list to [[pending flush promises]].
// 14. Assign false to [[dequeue event scheduled]].
// (shared queue)
// 15. Return e.
}
/* NOTE: These should technically be readonly, but I'm implementing them as
* plain fields, so they're writable */
state: misc.CodecState;
encodeQueueSize: number;
private _output: EncodedAudioChunkOutputCallback;
private _error: misc.WebCodecsErrorCallback;
// Metadata argument for output
private _outputMetadata: EncodedAudioChunkMetadata | null = null;
private _outputMetadataFilled: boolean = false;
// Event queue
private _p: Promise<unknown>;
// LibAV state
private _libav: LibAVJS.LibAV | null;
private _codec: number;
private _c: number;
private _frame: number;
private _pkt: number;
private _pts: number | null = null;
private _filter_in_ctx: LibAVJS.FilterIOSettings | null;
private _filter_out_ctx: LibAVJS.FilterIOSettings | null;
private _filter_graph: number;
private _buffersrc_ctx: number;
private _buffersink_ctx: number;
configure(config: AudioEncoderConfig): void {
const self = this;
// 1. If config is not a valid AudioEncoderConfig, throw a TypeError.
// NOTE: We don't support sophisticated codec string parsing (yet)
// 2. If [[state]] is "closed", throw an InvalidStateError.
if (this.state === "closed")
throw new DOMException("Encoder is closed", "InvalidStateError");
// Free any internal state
if (this._libav)
this._p = this._p.then(() => this._free());
// 3. Set [[state]] to "configured".
this.state = "configured";
// 4. Queue a control message to configure the encoder using config.
this._p = this._p.then(async function() {
/* 1. Let supported be the result of running the Check
* Configuration Support algorithm with config. */
const supported = libavs.encoder(config.codec, config);
// Get the output metadata now
self._outputMetadata = { decoderConfig: {
codec: config.codec,
// Rest will be filled in when we get data
sampleRate: 0,
numberOfChannels: 0
}};
self._outputMetadataFilled = false;
/* 2. If supported is false, queue a task to run the Close
* AudioEncoder algorithm with NotSupportedError and abort these
* steps. */
if (!supported) {
self._closeAudioEncoder(new DOMException("Unsupported codec", "NotSupportedError"));
return;
}
/* 3. If needed, assign [[codec implementation]] with an
* implementation supporting config. */
// 4. Configure [[codec implementation]] with config.
const libav = self._libav = await libavs.get();
// And initialize
let frame_size: number;
[self._codec, self._c, self._frame, self._pkt, frame_size] =
await libav.ff_init_encoder(supported.codec, supported);
self._pts = null;
await libav.AVCodecContext_time_base_s(
self._c, 1, supported.ctx!.sample_rate!
);
// Be ready to set up the filter
self._filter_out_ctx = {
sample_rate: supported.ctx!.sample_rate,
sample_fmt: supported.ctx!.sample_fmt,
channel_layout: supported.ctx!.channel_layout,
frame_size
};
// 5. queue a task to run the following steps:
// 1. Assign false to [[message queue blocked]].
// 2. Queue a task to Process the control message queue.
// (shared queue)
}).catch(this._error);
}
// Our own algorithm, close libav
private async _free() {
if (this._filter_graph) {
await this._libav!.avfilter_graph_free_js(this._filter_graph);
this._filter_in_ctx = this._filter_out_ctx = null;
this._filter_graph = this._buffersrc_ctx = this._buffersink_ctx =
0;
}
if (this._c) {
await this._libav!.ff_free_encoder(this._c, this._frame, this._pkt);
this._codec = this._c = this._frame = this._pkt = 0;
}
if (this._libav) {
libavs.free(this._libav);
this._libav = null;
}
}
private _closeAudioEncoder(exception: DOMException) {
// 1. Run the Reset AudioEncoder algorithm with exception.
this._resetAudioEncoder(exception);
// 2. Set [[state]] to "closed".
this.state = "closed";
/* 3. Clear [[codec implementation]] and release associated system
* resources. */
this._p = this._p.then(() => this._free());
/* 4. If exception is not an AbortError DOMException, invoke the
* [[error callback]] with exception. */
if (exception.name !== "AbortError")
this._p = this._p.then(() => { this._error(exception); });
}
private _resetAudioEncoder(exception: DOMException) {
// 1. If [[state]] is "closed", throw an InvalidStateError.
if (this.state === "closed")
throw new DOMException("Encoder closed", "InvalidStateError");
// 2. Set [[state]] to "unconfigured".
this.state = "unconfigured";
// ... really, we're just going to free it now
this._p = this._p.then(() => this._free());
}
encode(data: ad.AudioData): void {
/* 1. If the value of data’s [[Detached]] internal slot is true, throw
* a TypeError. */
if (data._libavGetData() === null)
throw new TypeError("Detached");
// 2. If [[state]] is not "configured", throw an InvalidStateError.
if (this.state !== "configured")
throw new DOMException("Unconfigured", "InvalidStateError");
/* 3. Let dataClone hold the result of running the Clone AudioData
* algorithm with data. */
const dataClone = data.clone();
// 4. Increment [[encodeQueueSize]].
this.encodeQueueSize++;
// 5. Queue a control message to encode dataClone.
this._p = this._p.then(async () => {
const libav = this._libav!;
const c = this._c;
const pkt = this._pkt;
const framePtr = this._frame;
let encodedOutputs: LibAVJS.Packet[] | null = null;
/* 3. Decrement [[encodeQueueSize]] and run the Schedule Dequeue
* Event algorithm. */
this.encodeQueueSize--;
this.dispatchEvent(new CustomEvent("dequeue"));
/* 1. Attempt to use [[codec implementation]] to encode the media
* resource described by dataClone. */
try {
// Arrange the data
let raw: any = dataClone._libavGetData();
const nb_samples = dataClone.numberOfFrames;
if (!ad.isInterleaved(dataClone.format)) {
let split = [];
for (let i = 0; i < dataClone.numberOfChannels; i++)
split.push(raw.subarray(i * nb_samples, (i + 1) * nb_samples));
raw = split;
}
// Convert the format
let format: number;
switch (dataClone.format) {
case "u8":
format = libav.AV_SAMPLE_FMT_U8;
break;
case "s16":
format = libav.AV_SAMPLE_FMT_S16;
break;
case "s32":
format = libav.AV_SAMPLE_FMT_S32;
break;
case "f32":
format = libav.AV_SAMPLE_FMT_FLT;
break;
case "u8-planar":
format = libav.AV_SAMPLE_FMT_U8P;
break;
case "s16-planar":
format = libav.AV_SAMPLE_FMT_S16P;
break;
case "s32-planar":
format = libav.AV_SAMPLE_FMT_S32P;
break;
case "f32-planar":
format = libav.AV_SAMPLE_FMT_FLTP;
break;
default:
throw new TypeError("Invalid AudioSampleFormat");
}
// Convert the timestamp
const ptsFull = Math.floor(dataClone.timestamp / 1000);
const [pts, ptshi] = libav.f64toi64(ptsFull);
// Convert the channel layout
const cc = dataClone.numberOfChannels;
const channel_layout = (cc === 1) ? 4 : ((1<<cc)-1);
// Make the frame
const sample_rate = dataClone.sampleRate;
const frame: LibAVJS.Frame = {
data: raw,
format, pts, ptshi, channel_layout, sample_rate
};
// Check if the filter needs to be reconfigured
let preOutputs: LibAVJS.Packet[] | null = null;
if (this._filter_in_ctx) {
const filter_ctx = this._filter_in_ctx;
if (filter_ctx.sample_fmt !== frame.format ||
filter_ctx.channel_layout !== frame.channel_layout ||
filter_ctx.sample_rate !== frame.sample_rate) {
// Need a new filter! First, get anything left in the filter
let fframes = await this._filter([], true);
// Can't send partial frames through the encoder
fframes = fframes.filter(x => {
let frame_size: number;
if (x.data[0].length) {
// Planar
frame_size = x.data[0].length;
} else {
frame_size = x.data.length / x.channels!;
}
return frame_size === this._filter_out_ctx!.frame_size;
});
if (fframes.length) {
preOutputs =
await libav.ff_encode_multi(c, framePtr, pkt, fframes);
}
await libav.avfilter_graph_free_js(this._filter_graph);
this._filter_in_ctx = null;
this._filter_graph = this._buffersrc_ctx =
this._buffersink_ctx = 0;
}
}
// Set up the filter
if (!this._filter_graph) {
const filter_ctx = this._filter_in_ctx = {
sample_rate: frame.sample_rate!,
sample_fmt: frame.format,
channel_layout: frame.channel_layout!
};
[this._filter_graph, this._buffersrc_ctx, this._buffersink_ctx] =
await libav.ff_init_filter_graph("aresample", filter_ctx,
this._filter_out_ctx!);
}
// Filter
const fframes = await this._filter([frame]);
// And encode
encodedOutputs =
await libav.ff_encode_multi(c, framePtr, pkt, fframes);
if (preOutputs)
encodedOutputs = preOutputs.concat(encodedOutputs);
if (encodedOutputs.length && !this._outputMetadataFilled &&
fframes && fframes.length)
await this._getOutputMetadata(fframes[0]);
/* 2. If encoding results in an error, queue a task on the control
* thread event loop to run the Close AudioEncoder algorithm with
* EncodingError. */
} catch (ex) {
this._p = this._p.then(() => {
this._closeAudioEncoder(<DOMException> ex);
});
}
/* 3. If [[codec saturated]] equals true and
* [[codec implementation]] is no longer saturated, queue a task
* to perform the following steps: */
// 1. Assign false to [[codec saturated]].
// 2. Process the control message queue.
// (no saturation)
/* 4. Let encoded outputs be a list of encoded audio data outputs
* emitted by [[codec implementation]]. */
/* 5. If encoded outputs is not empty, queue a task to run the
* Output EncodedAudioChunks algorithm with encoded outputs. */
if (encodedOutputs)
this._outputEncodedAudioChunks(encodedOutputs);
}).catch(this._error);
}
// Internal: Filter the given audio
private async _filter(frames: LibAVJS.Frame[], fin: boolean = false) {
/* The specification does not state how timestamps should be related
* between input and output. It's obvious that the timestamps should
* increase at the appropriate rate based on the number of samples seen,
* but where they should start is not stated. Google Chrome starts with
* the timestamp of the first input frame, and ignores all other input
* frame timestamps. We follow that convention as well. */
if (frames.length && this._pts === null)
this._pts = (frames[0].pts || 0);
const fframes =
await this._libav!.ff_filter_multi(this._buffersrc_ctx,
this._buffersink_ctx, this._frame, frames, fin);
for (const frame of fframes) {
frame.pts = this._pts!;
frame.ptshi = 0;
this._pts! += frame.nb_samples!;
}
return fframes;
}
// Internal: Get output metadata
private async _getOutputMetadata(frame: LibAVJS.Frame) {
const libav = this._libav!;
const c = this._c;
const extradataPtr = await libav.AVCodecContext_extradata(c);
const extradata_size = await libav.AVCodecContext_extradata_size(c);
let extradata: Uint8Array | null = null;
if (extradataPtr && extradata_size)
extradata = await libav.copyout_u8(extradataPtr, extradata_size);
this._outputMetadata!.decoderConfig.sampleRate = frame.sample_rate!;
this._outputMetadata!.decoderConfig.numberOfChannels = frame.channels!;
if (extradata)
this._outputMetadata!.decoderConfig.description = extradata;
this._outputMetadataFilled = true;
}
private _outputEncodedAudioChunks(packets: LibAVJS.Packet[]) {
const libav = this._libav!;
const sampleRate = this._filter_out_ctx!.sample_rate!;
for (const packet of packets) {
// 1. data
const data = packet.data;
// 2. type
const type: eac.EncodedAudioChunkType =
(packet.flags! & 1) ? "key" : "delta";
// 3. timestamp
let timestamp = libav.i64tof64(packet.pts!, packet.ptshi!);
timestamp = Math.floor(timestamp / sampleRate * 1000000);
// 4. duration
let duration: number | undefined;
if (typeof packet.duration !== "undefined") {
duration = libav.i64tof64(packet.duration, packet.durationhi || 0);
duration = Math.floor(duration / sampleRate * 1000000);
}
const chunk = new eac.EncodedAudioChunk({
data, type, timestamp, duration
});
if (this._outputMetadataFilled)
this._output(chunk, this._outputMetadata || void 0);
else
this._output(chunk);
}
}
flush(): Promise<void> {
/* 1. If [[state]] is not "configured", return a promise rejected with
* InvalidStateError DOMException. */
if (this.state !== "configured")
throw new DOMException("Invalid state", "InvalidStateError");
// 2. Let promise be a new Promise.
// 3. Append promise to [[pending flush promises]].
// 4. Queue a control message to flush the codec with promise.
// 5. Process the control message queue.
// 6. Return promise.
const ret = this._p.then(async () => {
if (!this._c)
return;
/* 1. Signal [[codec implementation]] to emit all internal pending
* outputs. */
// Make sure any last data is flushed
const libav = this._libav!;
const c = this._c;
const frame = this._frame;
const pkt = this._pkt;
const buffersrc_ctx = this._buffersrc_ctx;
const buffersink_ctx = this._buffersink_ctx;
let encodedOutputs: LibAVJS.Packet[] | null = null;
try {
let fframes: LibAVJS.Frame[] | null = null;
if (buffersrc_ctx)
fframes = await this._filter([], true);
encodedOutputs =
await libav.ff_encode_multi(c, frame, pkt, fframes || [],
true);
if (!this._outputMetadataFilled && fframes && fframes.length)
await this._getOutputMetadata(fframes[0]);
} catch (ex) {
this._p = this._p.then(() => {
this._closeAudioEncoder(<DOMException> ex);
});
}
/* 2. Let encoded outputs be a list of encoded audio data outputs
* emitted by [[codec implementation]]. */
// 3. Queue a task to perform these steps:
{
/* 1. If encoded outputs is not empty, run the Output
* EncodedAudioChunks algorithm with encoded outputs. */
if (encodedOutputs)
this._outputEncodedAudioChunks(encodedOutputs);
// 2. Remove promise from [[pending flush promises]].
// 3. Resolve promise.
// (shared queue)
}
});
this._p = ret;
return ret;
}
reset(): void {
this._resetAudioEncoder(new DOMException("Reset", "AbortError"));
}
close(): void {
this._closeAudioEncoder(new DOMException("Close", "AbortError"));
}
static async isConfigSupported(
config: AudioEncoderConfig
): Promise<AudioEncoderSupport> {
const enc = libavs.encoder(config.codec, config);
let supported = false;
if (enc) {
const libav = await libavs.get();
try {
const [, c, frame, pkt] =
await libav.ff_init_encoder(enc.codec, enc);
await libav.ff_free_encoder(c, frame, pkt);
supported = true;
} catch (ex) {}
await libavs.free(libav);
}
return {
supported,
config: misc.cloneConfig(
config,
["codec", "sampleRate", "numberOfChannels", "bitrate"]
)
};
}
}
export interface AudioEncoderInit {
output: EncodedAudioChunkOutputCallback;
error: misc.WebCodecsErrorCallback;
}
export interface EncodedAudioChunkMetadata {
decoderConfig: adec.AudioDecoderConfig;
}
export type EncodedAudioChunkOutputCallback =
(output: eac.EncodedAudioChunk, metadata?: EncodedAudioChunkMetadata) => void;
export interface AudioEncoderConfig {
codec: string | {libavjs: libavs.LibAVJSCodec};
sampleRate?: number;
numberOfChannels?: number;
bitrate?: number;
// Opus-specific
opus?: {
format?: "opus",
frameDuration?: number,
complexity?: number,
packetlossperc?: number,
useinbandfec?: boolean,
usedtx?: boolean
},
// FLAC-specific
flac?: {
blockSize?: number,
compressLevel?: number
}
}
export interface AudioEncoderSupport {
supported: boolean;
config: AudioEncoderConfig;
}