mediabunny

/*! * Copyright (c) 2026-present, Vanilagy and contributors * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import type { FileHandle } from 'node:fs/promises'; import { assert, binarySearchLessOrEqual, clamp, closedIntervalsOverlap, FilePath, isNumber, isWebKit, MaybePromise, mergeRequestInit, polyfillSymbolDispose, promiseWithResolvers, retriedFetch, toDataView, toUint8Array, wait, EventEmitter, } from './misc'; import * as nodeAlias from './node'; import { InputDisposedError } from './input'; polyfillSymbolDispose(); const node = typeof nodeAlias !== 'undefined' ? nodeAlias // Aliasing it prevents some bundler warnings : undefined!; export type ReadResult = { bytes: Uint8Array; view: DataView; /** The offset of the bytes in the file. */ offset: number; }; export const DEFAULT_MIN_READ_POSITION = 0; export const DEFAULT_MAX_READ_POSITION = Infinity; /** * The events emitted by a {@link Source}, with each key being an event name and its value being the event data. * @group Input sources * @public */ export type SourceEvents = { /** Emitted each time data is retrieved from the source. */ read: { /** The start of the retrieved range, inclusive. */ start: number; /** The end of the retrieved range, exclusive. */ end: number; }; }; let sourceFinalizationRegistry: FinalizationRegistry<() => unknown> | null = null; if (typeof FinalizationRegistry !== 'undefined') { sourceFinalizationRegistry = new FinalizationRegistry((cleanup) => { cleanup(); }); } /** * The source base class, representing a resource from which bytes can be read. * @group Input sources * @public */ export abstract class Source extends EventEmitter<SourceEvents> { /** @internal */ abstract _getFileSize(): number | null | undefined; /** @internal */ abstract _read( start: number, end: number, minReadPosition: number, maxReadPosition: number, ): MaybePromise<ReadResult | null>; /** @internal */ abstract _dispose(): void; /** @internal */ _disposed = false; /** @internal */ _refCount = 0; /** * Used internally to mark if a source stems from an HLS reading operation. Used to suppress certain warnings. * @internal */ _usedForHls = false; /** * FinalizationRegistry for rogue refs to this source that didn't get freed. It lives on the Source itself so that * in case the Source transitively points back to itself and forms a cycle (for example through a custom * CustomSource callback) that we're not leaking memory. * @internal */ _refFinalizationRegistry: FinalizationRegistry<Source> | null = null; /** @internal */ private _sizePromise: Promise<number | null> | null = null; constructor() { super(); if (typeof FinalizationRegistry !== 'undefined') { this._refFinalizationRegistry = new FinalizationRegistry((source) => { source._decrementRefCount(); }); } } /** * Resolves with the total size of the file in bytes. This function is memoized, meaning only the first call * will retrieve the size. * * Returns null if the source is unsized. */ async getSizeOrNull() { if (this._disposed) { throw new InputDisposedError(); } return this._sizePromise ??= (async () => { let size = this._getFileSize(); if (size !== undefined) { return size; } await this._read(0, 1, DEFAULT_MIN_READ_POSITION, DEFAULT_MAX_READ_POSITION); size = this._getFileSize(); assert(size !== undefined); return size; })(); } /** * Resolves with the total size of the file in bytes. This function is memoized, meaning only the first call * will retrieve the size. * * Throws an error if the source is unsized. */ async getSize() { if (this._disposed) { throw new InputDisposedError(); } const result = await this.getSizeOrNull(); if (result === null) { throw new Error('Cannot determine the size of an unsized source.'); } return result; } /** * Returns a new {@link RangedSource} that maps data onto this source using the given offset and length. If a length * is not provided, the ranged source spans until the end of this source's data. * * Useful for reading files that are embedded within larger files. */ slice(offset: number, length?: number) { if (!Number.isInteger(offset) || offset < 0) { throw new TypeError('offset must be a non-negative integer.'); } if (length !== undefined && (!Number.isInteger(length) || length < 0)) { throw new TypeError('length, when provided, must be a non-negative integer.'); } return new RangedSource(this, offset, length); } /** * Called each time data is retrieved from the source. Will be called with the retrieved range (end exclusive). * * @deprecated Use `source.on('read', ({ start, end }) => ...)` instead. */ onread: ((start: number, end: number) => unknown) | null = null; /** @internal */ _dispatchRead(start: number, end: number) { // eslint-disable-next-line @typescript-eslint/no-deprecated this.onread?.(start, end); this._emit('read', { start, end }); } /** * Creates a new `SourceRef` pointing to this source. You are expected to call `.free()` on said `SourceRef` when * you're done with it. */ ref() { return new SourceRef(this); } /** @internal */ _incrementRefCount() { this._refCount++; } /** @internal */ _decrementRefCount() { this._refCount--; if (this._refCount === 0) { this._dispose(); this._disposed = true; } } } /** * A reference to a {@link Source}, used to manage a source's lifecycle. Creating a `SourceRef` via {@link Source.ref} * increases that source's internal reference count. As long as a source has a non-zero reference count, it is assumed * to still be in use. Once all references are freed via {@link SourceRef.free}, the source gets disposed. * * @group Input sources * @public */ export class SourceRef<S extends Source = Source> implements Disposable { /** @internal */ private _source: S | null; /** @internal */ private _freed = false; /** @internal */ constructor(source: S) { if (source._disposed) { throw new Error('Cannot ref a disposed source.'); } source._incrementRefCount(); source._refFinalizationRegistry?.register(this, source, this); this._source = source; } /** The {@link Source} this ref references. Accessing this field throws an error after having freed the ref. */ get source() { if (!this._source) { throw new Error('Can\'t get source; ref has already been freed.'); } return this._source; } /** Whether or not this reference has been freed via {@link SourceRef.free}. */ get freed() { return this._freed; } /** * Frees the ref, decrementing the source's internal reference count. If the source's internal reference count * reaches zero, it gets disposed. To catch bugs, this method throws if the ref is already freed. */ free() { if (this._freed) { throw new Error('Illegal operation: double free on SourceRef.'); } const source = this.source; assert(source._refCount > 0); source._decrementRefCount(); source._refFinalizationRegistry?.unregister(this); this._freed = true; this._source = null; } /** * Calls {@link SourceRef.free}. */ [Symbol.dispose]() { if (!this.freed) { this.free(); } } } /** * A source which can create new sources from file paths. Required for multi-file inputs such as HLS playlists. * @public * @group Input sources */ export abstract class PathedSource extends Source { constructor( /** The path that points to the root file; the entry file of the media. */ public rootPath: FilePath, /** The callback that is called for each requested file; must return a {@link Source} or {@link SourceRef}. */ public requestHandler: (request: SourceRequest) => MaybePromise<Source | SourceRef>, ) { if (typeof rootPath !== 'string') { throw new TypeError('rootPath must be a string.'); } if (typeof requestHandler !== 'function') { throw new TypeError('requestHandler must be a function.'); } super(); } /** @internal */ _resolveRequest(request: SourceRequest): MaybePromise<SourceRef> { const result = this.requestHandler(request); const handle = (result: Source | SourceRef) => { if (!(result instanceof Source || result instanceof SourceRef)) { throw new TypeError('requestHandler must return or resolve to a Source or SourceRef.'); } const ref = result instanceof Source ? result.ref() : result; ref.source._usedForHls ||= this._usedForHls; return ref; }; if (result instanceof Promise) { return result.then(handle); } else { return handle(result); } } } /** * A request for a {@link Source} at the given path. * @group Input sources * @public */ export type SourceRequest = { /** The requested file path. */ path: FilePath; /** Whether the requested file is the root file. */ isRoot: boolean; }; export const sourceRequestsAreEqual = (a: SourceRequest, b: SourceRequest) => { return a.path === b.path; }; /** * A custom multi-file source where each file is uniquely identified by a {@link FilePath} and can be resolved to * an arbitrary {@link Source}. * * @public * @group Input sources */ export class CustomPathedSource extends PathedSource { /** @internal */ _root: SourceRef | null = null; /** @internal */ _rootRequest: Promise<SourceRef> | null = null; /** @internal */ override _read( start: number, end: number, minReadPosition: number, maxReadPosition: number, ): MaybePromise<ReadResult | null> { if (!this._root) { if (!this._rootRequest) { const result = this._resolveRequest({ path: this.rootPath, isRoot: true }); const handle = (result: Source | SourceRef) => { const ref = result instanceof Source ? result.ref() : result; this._root = ref; this._rootRequest = null; return ref; }; if (result instanceof Promise) { this._rootRequest = result.then(handle); } else { handle(result); assert(this._root); } } if (this._rootRequest) { return this._rootRequest.then(ref => ref.source._read(start, end, minReadPosition, maxReadPosition)); } } return this._root!.source._read(start, end, minReadPosition, maxReadPosition); } /** @internal */ override _getFileSize(): number | null | undefined { if (this._root) { return this._root.source._getFileSize(); } return undefined; } /** @internal */ override _dispose(): void { if (this._root) { this._root.free(); } else if (this._rootRequest) { void this._rootRequest .then(ref => ref.free()); } } } /** * A source backed by an ArrayBuffer or ArrayBufferView, with the entire file held in memory. * @group Input sources * @public */ export class BufferSource extends Source { /** @internal */ _bytes: Uint8Array; /** @internal */ _view: DataView; /** @internal */ _onreadCalled = false; /** * Creates a new {@link BufferSource} backed by the specified `ArrayBuffer`, `SharedArrayBuffer`, * or `ArrayBufferView`. */ constructor(buffer: AllowSharedBufferSource) { if ( !(buffer instanceof ArrayBuffer) && !(typeof SharedArrayBuffer !== 'undefined' && buffer instanceof SharedArrayBuffer) && !ArrayBuffer.isView(buffer) ) { throw new TypeError('buffer must be an ArrayBuffer, SharedArrayBuffer, or ArrayBufferView.'); } super(); this._bytes = toUint8Array(buffer); this._view = toDataView(buffer); } /** @internal */ _getFileSize(): number { return this._bytes.byteLength; } /** @internal */ _read(): ReadResult { if (!this._onreadCalled) { // We just say the first read retrieves all bytes from the source (which, I mean, it does) this._dispatchRead(0, this._bytes.byteLength); this._onreadCalled = true; } return { bytes: this._bytes, view: this._view, offset: 0, }; } /** @internal */ _dispose() {} } /** * Options for {@link BlobSource}. * @group Input sources * @public */ export type BlobSourceOptions = { /** The maximum number of bytes the cache is allowed to hold in memory. Defaults to 8 MiB. */ maxCacheSize?: number; /** * Defaults to `true`. When `true`, Mediabunny will acquire a `ReadableStream` reader internally to efficiently read * data from the blob. Since this can lead to errors in some (very) rare cases due to browser bugs, you can set this * field to `false` to try a slower but more stable reading method. */ useStreamReader?: boolean; }; /** * A source backed by a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). Since a * [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File) is also a `Blob`, this is the source to use when * reading files off the disk. * @group Input sources * @public */ export class BlobSource extends Source { /** @internal */ _blob: Blob; /** @internal */ _options: BlobSourceOptions; /** @internal */ _orchestrator: ReadOrchestrator; /** * Creates a new {@link BlobSource} backed by the specified * [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). */ constructor(blob: Blob, options: BlobSourceOptions = {}) { if (!(blob instanceof Blob)) { throw new TypeError('blob must be a Blob.'); } if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if ( options.maxCacheSize !== undefined && (!isNumber(options.maxCacheSize) || options.maxCacheSize < 0) ) { throw new TypeError('options.maxCacheSize, when provided, must be a non-negative number.'); } if (options.useStreamReader !== undefined && typeof options.useStreamReader !== 'boolean') { throw new TypeError('options.useStreamReader, when provided, must be a boolean.'); } super(); this._blob = blob; this._options = options; this._orchestrator = new ReadOrchestrator({ maxCacheSize: options.maxCacheSize ?? (8 * 2 ** 20 /* 8 MiB */), maxWorkerCount: 4, runWorker: this._runWorker.bind(this), prefetchProfile: PREFETCH_PROFILES.fileSystem, }); this._orchestrator.fileSize = blob.size; } /** @internal */ _getFileSize(): number { return this._orchestrator.fileSize!; // Faster than blob.size } /** @internal */ _read( start: number, end: number, minReadPosition: number, maxReadPosition: number, ): MaybePromise<ReadResult | null> { return this._orchestrator.read(start, end, minReadPosition, maxReadPosition); } /** @internal */ _readers = new WeakMap<ReadWorker, ReadableStreamDefaultReader<Uint8Array> | null>(); /** @internal */ private async _runWorker(worker: ReadWorker) { assert(worker.strictTarget); let reader = this._readers.get(worker); if (reader === undefined) { // https://github.com/Vanilagy/mediabunny/issues/184 // WebKit has critical bugs with blob.stream(): // - WebKitBlobResource error 1 when streaming large files // - Memory buildup and reload loops on iOS (network process crashes) // - ReadableStream stalls under backpressure (especially video) // Affects Safari and all iOS browsers (Chrome, Firefox, etc.). // Use arrayBuffer() fallback for WebKit browsers. if ('stream' in this._blob && !isWebKit() && this._options.useStreamReader !== false) { // Get a reader of the blob starting at the required offset, and then keep it around const slice = this._blob.slice(worker.currentPos); reader = slice.stream().getReader(); } else { // We'll need to use more primitive ways reader = null; } this._readers.set(worker, reader); } while (worker.currentPos < worker.targetPos && !worker.aborted) { if (reader) { const { done, value } = await reader.read(); if (done) { this._orchestrator.onWorkerFinished(worker); throw new Error('Blob reader stopped unexpectedly before all requested data was read.'); } if (worker.aborted) { break; } this._dispatchRead(worker.currentPos, worker.currentPos + value.length); this._orchestrator.supplyWorkerData(worker, value); } else { const data = await this._blob.slice(worker.currentPos, worker.targetPos).arrayBuffer(); if (worker.aborted) { break; } this._dispatchRead(worker.currentPos, worker.currentPos + data.byteLength); this._orchestrator.supplyWorkerData(worker, new Uint8Array(data)); } } this._orchestrator.signalWorkerStoppedRunning(worker); if (worker.aborted) { // MDN: "Calling this method signals a loss of interest in the stream by a consumer." await reader?.cancel(); } } /** @internal */ _dispose() { this._orchestrator.dispose(); } } const URL_SOURCE_MIN_LOAD_AMOUNT = 0.5 * 2 ** 20; // 0.5 MiB const DEFAULT_RETRY_DELAY = ((previousAttempts, error, src) => { // Check if this could be a CORS error. If so, we cannot recover from it and // should not attempt to retry. // CORS errors are intentionally not opaque, so we need to rely on heuristics. const couldBeCorsError = error instanceof Error && ( error.message.includes('Failed to fetch') // Chrome || error.message.includes('Load failed') // Safari || error.message.includes('NetworkError when attempting to fetch resource') // Firefox ) && typeof window !== 'undefined'; // CORS only happens in browser environments if (couldBeCorsError) { let originOfSrc: string | null = null; // Checking if the origin is different, because only then a CORS error could originate try { if (typeof window !== 'undefined' && typeof window.location !== 'undefined') { originOfSrc = new URL(src instanceof Request ? src.url : src, window.location.href).origin; } } catch { // URL parse failed } // If user is offline, it is probably not a CORS error. const isOnline = typeof navigator !== 'undefined' && typeof navigator.onLine === 'boolean' ? navigator.onLine : true; if (isOnline && originOfSrc !== null && originOfSrc !== window.location.origin) { console.warn( `Request will not be retried because a CORS error was suspected due to different origins. You can` + ` modify this behavior by providing your own function for the 'getRetryDelay' option.`, ); return null; } } return Math.min(2 ** (previousAttempts - 2), 16); }) satisfies UrlSourceOptions['getRetryDelay']; const warnedOrigins = new Set<string>(); /** * Options for {@link UrlSource}. * @group Input sources * @public */ export type UrlSourceOptions = { /** * The [`RequestInit`](https://developer.mozilla.org/en-US/docs/Web/API/RequestInit) used by the Fetch API. Can be * used to further control the requests, such as setting custom headers. * * All fields will work except for `signal` and `headers.Range`; these will be overridden by Mediabunny. If you want * to cancel ongoing requests, use {@link Input.dispose}. */ requestInit?: RequestInit; /** * A function that returns the delay (in seconds) before retrying a failed request. The function is called * with the number of previous, unsuccessful attempts, as well as with the error with which the previous request * failed. If the function returns `null`, no more retries will be made. * * By default, it uses an exponential backoff algorithm that never gives up unless * a CORS error is suspected (`fetch()` did reject, `navigator.onLine` is true and origin is different). */ getRetryDelay?: (previousAttempts: number, error: unknown, url: string | URL | Request) => number | null; /** The maximum number of bytes the cache is allowed to hold in memory. Defaults to 64 MiB. */ maxCacheSize?: number; /** The maximum number of parallel requests to use for fetching. Defaults to 2. */ parallelism?: number; /** * A WHATWG-compatible fetch function. You can use this field to polyfill the `fetch` function, add missing * features, or use a custom implementation. */ fetchFn?: typeof fetch; }; /** * A source backed by a URL. This is useful for reading data from the network. Requests will be made using an optimized * reading and prefetching pattern to minimize request count and latency. * @group Input sources * @public */ export class UrlSource extends PathedSource { /** @internal */ _url: string | URL | Request; /** @internal */ _getRetryDelay: (previousAttempts: number, error: unknown, url: string | URL | Request) => number | null; /** @internal */ _options: UrlSourceOptions; /** @internal */ _orchestrator: ReadOrchestrator; /** * Note that this value being true does NOT mean the file size can't change anymore; it just signals that we have at * least checked if we know the file size or not. * @internal */ _fileSizeDetermined = false; /** * Creates a new {@link UrlSource} backed by the resource at the specified URL. * * When passing a `Request` instance, note that the `signal` and `headers.Range` options will be overridden by * Mediabunny. If you want to cancel ongoing requests, use {@link Input.dispose}. */ constructor( url: string | URL | Request, options: UrlSourceOptions = {}, ) { if ( typeof url !== 'string' && !(url instanceof URL) && !(typeof Request !== 'undefined' && url instanceof Request) ) { throw new TypeError('url must be a string, URL or Request.'); } if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if (options.requestInit !== undefined && (!options.requestInit || typeof options.requestInit !== 'object')) { throw new TypeError('options.requestInit, when provided, must be an object.'); } if (options.getRetryDelay !== undefined && typeof options.getRetryDelay !== 'function') { throw new TypeError('options.getRetryDelay, when provided, must be a function.'); } if ( options.maxCacheSize !== undefined && (!isNumber(options.maxCacheSize) || options.maxCacheSize < 0) ) { throw new TypeError('options.maxCacheSize, when provided, must be a non-negative number.'); } if (options.parallelism !== undefined && (!Number.isInteger(options.parallelism) || options.parallelism < 1)) { throw new TypeError('options.parallelism, when provided, must be a positive number.'); } if (options.fetchFn !== undefined && typeof options.fetchFn !== 'function') { throw new TypeError('options.fetchFn, when provided, must be a function.'); // Won't bother validating this function beyond this } const urlString = url instanceof Request ? url.url : url instanceof URL ? url.href : url; super(urlString, request => new UrlSource(request.path, this._options)); this._url = url; this._options = options; this._getRetryDelay = options.getRetryDelay ?? DEFAULT_RETRY_DELAY; // Most files in the real-world have a single sequential access pattern, but having two in parallel can // also happen const DEFAULT_PARALLELISM = 2; this._orchestrator = new ReadOrchestrator({ maxCacheSize: options.maxCacheSize ?? (64 * 2 ** 20 /* 64 MiB */), maxWorkerCount: options.parallelism ?? DEFAULT_PARALLELISM, runWorker: this._runWorker.bind(this), prefetchProfile: PREFETCH_PROFILES.network, }); } /** @internal */ _getFileSize(): number | null | undefined { return this._fileSizeDetermined ? this._orchestrator.fileSize : undefined; } /** @internal */ _read( start: number, end: number, minReadPosition: number, maxReadPosition: number, ): MaybePromise<ReadResult | null> { return this._orchestrator.read(start, end, minReadPosition, maxReadPosition); } /** @internal */ private async _runWorker(worker: ReadWorker) { // The outer loop is for resuming a request if it dies mid-response while (true) { const abortController = new AbortController(); const response = await retriedFetch( this._options.fetchFn ?? fetch, this._url, mergeRequestInit(this._options.requestInit ?? {}, { headers: { // Always sending a range request is a good way to probe if the server supports them Range: `bytes=${worker.currentPos}-`, }, signal: abortController.signal, }), this._getRetryDelay, () => this._disposed, ); if (!response.ok) { // eslint-disable-next-line @typescript-eslint/no-base-to-string throw new Error(`Error fetching ${String(this._url)}: ${response.status} ${response.statusText}`); } outer: if (this._orchestrator.fileSize === null) { // See if we can deduce the file size from the response const contentRange = response.headers.get('Content-Range'); if (contentRange) { const match = /\/(\d+)/.exec(contentRange); if (match) { this._orchestrator.supplyFileSize(Number(match[1])); break outer; } } const contentLength = response.headers.get('Content-Length'); if (contentLength) { // Note: For range requests, this is _technically_ not correct, as the range response could contain // less data than was requested. In practice, it seems most servers don't do this though, and the // Content-Length header actually contains the length until the end of the file. this._orchestrator.supplyFileSize(worker.currentPos + Number(contentLength)); } } this._fileSizeDetermined = true; // Yes, this is correct even if file size is still null if (response.status !== 206) { if (!this._usedForHls) { const url = new URL( this._url instanceof Request ? this._url.url : this._url, typeof window !== 'undefined' ? window.location.href : undefined, ); if ( url.origin !== 'null' // Don't show the warning for M3U8 playlist files, it's irrelevant for those && !(url.pathname.endsWith('.m3u8') || url.pathname.endsWith('.m3u')) ) { if (!warnedOrigins.has(url.origin)) { console.log(this._usedForHls, this._url, url.pathname); console.warn( `HTTP server (origin ${url.origin}) did not respond to a range request with 206 Partial` + ' Content, meaning the entire resource will now be downloaded. To enable efficient' + ' media file streaming across a network, please make sure your server supports' + ' range requests.', ); warnedOrigins.add(url.origin); } } } worker.currentPos = 0; this._orchestrator.options.maxCacheSize = Infinity; // 🤷 if (this._orchestrator.fileSize !== null) { worker.targetPos = this._orchestrator.fileSize; } else { // The server is dumb, doesn't even surface the content length, but we'll work with it. worker.targetPos = Infinity; worker.strictTarget = false; } this._orchestrator.consolidateEverythingIntoOneWorker(worker); } if (!response.body) { throw new Error( 'Missing HTTP response body stream. The used fetch function must provide the response body as a' + ' ReadableStream.', ); } const reader = response.body.getReader(); while (true) { if (worker.currentPos >= worker.targetPos || worker.aborted) { abortController.abort(); this._orchestrator.signalWorkerStoppedRunning(worker); return; } let readResult: ReadableStreamReadResult<Uint8Array>; try { readResult = await reader.read(); } catch (error) { if (this._disposed) { // No need to try to retry throw error; } const retryDelayInSeconds = this._getRetryDelay(1, error, this._url); if (retryDelayInSeconds !== null) { console.error('Error while reading response stream. Attempting to resume.', error); await wait(1000 * retryDelayInSeconds); break; } else { throw error; } } if (worker.aborted) { continue; // Cleanup happens in next iteration } const { done, value } = readResult; if (done) { if (worker.currentPos >= worker.targetPos) { // All data was delivered, we're good this._orchestrator.onWorkerFinished(worker); return; } if (worker.strictTarget) { // The response stopped early, before the target. This can happen if server decides to cap range // requests arbitrarily, even if the request had an uncapped end. In this case, let's fetch the // rest of the data using a new request. break; } else { // Assume we have simply reached the end of the resource this._orchestrator.onWorkerFinished(worker); return; } } this._dispatchRead(worker.currentPos, worker.currentPos + value.length); this._orchestrator.supplyWorkerData(worker, value); } } // The previous UrlSource had logic for circumventing https://issues.chromium.org/issues/436025873; I haven't // been able to observe this bug with the new UrlSource (maybe because we're using response streaming), so the // logic for that has vanished for now. Leaving a comment here if this becomes relevant again. } /** @internal */ _dispose() { this._orchestrator.dispose(); } } /** * Options for {@link FilePathSource}. * @group Input sources * @public */ export type FilePathSourceOptions = { /** The maximum number of bytes the cache is allowed to hold in memory. Defaults to 8 MiB. */ maxCacheSize?: number; }; /** * A source backed by a path to a file. Intended for server-side usage in Node, Bun, or Deno. * * Make sure to call `.dispose()` on the corresponding {@link Input} when done to explicitly free the internal file * handle acquired by this source. * @group Input sources * @public */ export class FilePathSource extends PathedSource { /** @internal */ _customSource: CustomSource; /** @internal */ _fileHandle: FileHandle | null = null; /** Creates a new {@link FilePathSource} backed by the file at the specified file path. */ constructor(filePath: string, options: FilePathSourceOptions = {}) { if (typeof filePath !== 'string') { throw new TypeError('filePath must be a string.'); } if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if ( options.maxCacheSize !== undefined && (!isNumber(options.maxCacheSize) || options.maxCacheSize < 0) ) { throw new TypeError('options.maxCacheSize, when provided, must be a non-negative number.'); } if (!node.fs) { throw new Error( 'FilePathSource is only available in server-side environments (Node.js, Bun, Deno).', ); } super(filePath, request => new FilePathSource(request.path, options)); // Let's back this source with a CustomSource, makes the implementation very simple this._customSource = new CustomSource({ getSize: async () => { const fileHandle = await node.fs.open(filePath, 'r'); this._fileHandle = fileHandle; sourceFinalizationRegistry?.register(this, () => { // If it's not closed, Node prints annoying warnings void fileHandle.close(); }, this); const stats = await fileHandle.stat(); return stats.size; }, read: async (start, end) => { assert(this._fileHandle); const buffer = new Uint8Array(end - start); await this._fileHandle.read(buffer, 0, end - start, start); return buffer; }, maxCacheSize: options.maxCacheSize, prefetchProfile: 'fileSystem', }); } /** @internal */ _read( start: number, end: number, minReadPosition: number, maxReadPosition: number, ): MaybePromise<ReadResult | null> { return this._customSource._read(start, end, minReadPosition, maxReadPosition); } /** @internal */ _getFileSize(): number | null | undefined { return this._customSource._getFileSize(); } /** @internal */ _dispose() { this._customSource._dispose(); if (this._fileHandle) { void this._fileHandle.close(); this._fileHandle = null; sourceFinalizationRegistry?.unregister(this); } } } /** * Options for defining a {@link CustomSource}. * @group Input sources * @public */ export type CustomSourceOptions = { /** * Called when the size of the entire file is requested. Must return or resolve to the size in bytes. This function * is guaranteed to be called before `read`. */ getSize: () => MaybePromise<number>; /** * Called when data is requested. Must return or resolve to the bytes from the specified byte range, or a stream * that yields these bytes. * * You are guaranteed that `0 <= start < end < fileSize`. */ read: (start: number, end: number) => MaybePromise<Uint8Array | ReadableStream<Uint8Array>>; /** * Called when the {@link Input} driven by this source is disposed. */ dispose?: () => unknown; /** The maximum number of bytes the cache is allowed to hold in memory. Defaults to 8 MiB. */ maxCacheSize?: number; /** * Specifies the prefetch profile that the reader should use with this source. A prefetch profile specifies the * pattern with which bytes outside of the requested range are preloaded to reduce latency for future reads. * * - `'none'` (default): No prefetching; only the data needed in the moment is requested. * - `'fileSystem'`: File system-optimized prefetching: a small amount of data is prefetched bidirectionally, * aligned with page boundaries. * - `'network'`: Network-optimized prefetching, or more generally, prefetching optimized for any high-latency * environment: tries to minimize the amount of read calls and aggressively prefetches data when sequential access * patterns are detected. */ prefetchProfile?: 'none' | 'fileSystem' | 'network'; }; /** * A general-purpose, callback-driven source that can get its data from anywhere. Use this source to implement your own * custom source if the other sources don't cover your case. * @group Input sources * @public */ export class CustomSource extends Source { /** @internal */ _options: CustomSourceOptions; /** @internal */ _orchestrator: ReadOrchestrator; /** Creates a new {@link CustomSource} whose behavior is specified by `options`. */ constructor(options: CustomSourceOptions) { if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if (typeof options.getSize !== 'function') { throw new TypeError('options.getSize must be a function.'); } if (typeof options.read !== 'function') { throw new TypeError('options.read must be a function.'); } if (options.dispose !== undefined && typeof options.dispose !== 'function') { throw new TypeError('options.dispose, when provided, must be a function.'); } if ( options.maxCacheSize !== undefined && (!isNumber(options.maxCacheSize) || options.maxCacheSize < 0) ) { throw new TypeError('options.maxCacheSize, when provided, must be a non-negative number.'); } if (options.prefetchProfile && !['none', 'fileSystem', 'network'].includes(options.prefetchProfile)) { throw new TypeError( 'options.prefetchProfile, when provided, must be one of \'none\', \'fileSystem\' or \'network\'.', ); } super(); this._options = options; this._orchestrator = new ReadOrchestrator({ maxCacheSize: options.maxCacheSize ?? (8 * 2 ** 20 /* 8 MiB */), maxWorkerCount: 2, // Fixed for now, *should* be fine prefetchProfile: PREFETCH_PROFILES[options.prefetchProfile ?? 'none'], runWorker: this._runWorker.bind(this), }); } /** @internal */ _getFileSize(): number | null | undefined { return this._orchestrator.fileSize ?? undefined; } /** @internal */ _read( start: number, end: number, minReadPosition: number, maxReadPosition: number, ): MaybePromise<ReadResult | null> { if (this._orchestrator.fileSize !== null) { return this._orchestrator.read(start, end, minReadPosition, maxReadPosition); } const result = this._options.getSize(); if (result instanceof Promise) { return result.then((size) => { if (!Number.isInteger(size) || size < 0) { throw new TypeError('options.getSize must return or resolve to a non-negative integer.'); } this._orchestrator.fileSize = size; return this._orchestrator.read(start, end, minReadPosition, maxReadPosition); }); } else { if (!Number.isInteger(result) || result < 0) { throw new TypeError('options.getSize must return or resolve to a non-negative integer.'); } this._orchestrator.fileSize = result; return this._orchestrator.read(start, end, minReadPosition, maxReadPosition); } } /** @internal */ private async _runWorker(worker: ReadWorker) { while (worker.currentPos < worker.targetPos && !worker.aborted) { const originalCurrentPos = worker.currentPos; const originalTargetPos = worker.targetPos; let data = this._options.read(worker.currentPos, originalTargetPos); if (data instanceof Promise) data = await data; if (worker.aborted) { break; } if (data instanceof Uint8Array) { data = toUint8Array(data); // Normalize things like Node.js Buffer to Uint8Array if (data.length !== originalTargetPos - worker.currentPos) { // Yes, we're that strict throw new Error( `options.read returned a Uint8Array with unexpected length: Requested ${ originalTargetPos - worker.currentPos } bytes, but got ${data.length}.`, ); } this._dispatchRead(worker.currentPos, worker.currentPos + data.length); this._orchestrator.supplyWorkerData(worker, data); } else if (data instanceof ReadableStream) { const reader = data.getReader(); while (worker.currentPos < originalTargetPos && !worker.aborted) { const { done, value } = await reader.read(); if (done) { if (worker.currentPos < originalTargetPos) { // Yes, we're *that* strict throw new Error( `ReadableStream returned by options.read ended before supplying enough data.` + ` Requested ${originalTargetPos - originalCurrentPos} bytes, but got ${ worker.currentPos - originalCurrentPos }`, ); } break; } if (!(value instanceof Uint8Array)) { throw new TypeError('ReadableStream returned by options.read must yield Uint8Array chunks.'); } if (worker.aborted) { break; } const data = toUint8Array(value); // Normalize things like Node.js Buffer to Uint8Array this._dispatchRead(worker.currentPos, worker.currentPos + data.length); this._orchestrator.supplyWorkerData(worker, data); } } else { throw new TypeError('options.read must return or resolve to a Uint8Array or a ReadableStream.'); } } this._orchestrator.signalWorkerStoppedRunning(worker); } /** @internal */ _dispose() { this._orchestrator.dispose(); this._options.dispose?.(); } } /** * An alias for {@link CustomSource}. * @deprecated This name is misleading and will be removed in a future release. Please use {@link CustomSource} instead. * * @group Input sources * @public */ export const StreamSource = CustomSource; /** * An alias for {@link CustomSourceOptions}. * @deprecated This name is misleading and will be removed in a future release. Please use * {@link CustomSourceOptions} instead. * * @group Input sources * @public */ export type StreamSourceOptions = CustomSourceOptions; type ReadableStreamSourcePendingSlice = { start: number; end: number; bytes: Uint8Array; resolve: (bytes: ReadResult | null) => void; reject: (error: unknown) => void; }; /** * Options for {@link ReadableStreamSource}. * @group Input sources * @public */ export type ReadableStreamSourceOptions = { /** The maximum number of bytes the cache is allowed to hold in memory. Defaults to 32 MiB. */ maxCacheSize?: number; }; /** * A source backed by a [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) of * `Uint8Array`, representing an append-only byte stream of unknown length. This is the source to use for incrementally * streaming in input files that are still being constructed and whose size we don't yet know, like for example the * output chunks of [MediaRecorder](https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder). * * This source is *unsized*, meaning calls to `.getSize()` will throw and readers are more limited due to the * lack of random file access. You should only use this source with sequential access patterns, such as reading all * packets from start to end. This source does not work well with random access patterns unless you increase its * max cache size. * * @group Input sources * @public */ export class ReadableStreamSource extends Source { /** @internal */ _stream: ReadableStream<Uint8Array>; /** @internal */ _reader: ReadableStreamDefaultReader<Uint8Array> | null = null; /** @internal */ _cache: CacheEntry[] = []; /** @internal */ _maxCacheSize: number; /** @internal */ _pendingSlices: ReadableStreamSourcePendingSlice[] = []; /** @internal */ _currentIndex = 0; /** @internal */ _targetIndex = 0; /** @internal */ _maxRequestedIndex = 0; /** @internal */ _endIndex: number | null = null; /** @internal */ _pulling = false; /** Creates a new {@link ReadableStreamSource} backed by the specified `ReadableStream<Uint8Array>`. */ constructor(stream: ReadableStream<Uint8Array>, options: ReadableStreamSourceOptions = {}) { if (!(stream instanceof ReadableStream)) { throw new TypeError('stream must be a ReadableStream.'); } if (!options || typeof options !== 'object') { throw new TypeError('options must be an object.'); } if ( options.maxCacheSize !== undefined && (!isNumber(options.maxCacheSize) || options.maxCacheSize < 0) ) { throw new TypeError('options.maxCacheSize, when provided, must be a non-negative number.'); } super(); this._stream = stream; this._maxCacheSize = options.maxCacheSize ?? (32 * 2 ** 20 /* 32 MiB */); } /** @internal */ _getFileSize(): number | null { return this._endIndex; // Starts out as null, meaning this source is unsized } /** @internal */ _read(start: number, end: number): MaybePromise<ReadResult | null> { if (this._endIndex !== null && end > this._endIndex) { return null; } this._maxRequestedIndex = Math.max(this._maxRequestedIndex, end); const cacheStartIndex = binarySearchLessOrEqual(this._cache, start, x => x.start); const cacheStartEntry = cacheStartIndex !== -1 ? this._cache[cacheStartIndex]! : null; if (cacheStartEntry && cacheStartEntry.start <= start && end <= cacheStartEntry.end) { // The request can be satisfied with a single cache entry return { bytes: cacheStartEntry.bytes, view: cacheStartEntry.view, offset: cacheStartEntry.start, }; } let lastEnd = start; const bytes = new Uint8Array(end - start); if (cacheStartIndex !== -1) { // Walk over the cache to see if we can satisfy the request using multiple cache entries for (let i = cacheStartIndex; i < this._cache.length; i++) { const cacheEntry = this._cache[i]!; if (cacheEntry.start >= end) { break; } const cappedStart = Math.max(start, cacheEntry.start); if (cappedStart > lastEnd) { // We're too far behind this._throwDueToCacheMiss(); } const cappedEnd = Math.min(end, cacheEntry.end); if (cappedStart < cappedEnd) { bytes.set( cacheEntry.bytes.subarray(cappedStart - cacheEntry.start, cappedEnd - cacheEntry.start), cappedStart - start, ); lastEnd = cappedEnd; } } } if (lastEnd === end) { return { bytes, view: toDataView(bytes), offset: start, }; } // We need to pull more data if (this._currentIndex > lastEnd) { // We're too far behind this._throwDueToCacheMiss(); } const { promise, resolve, reject } = promiseWithResolvers<ReadResult | null>(); this._pendingSlices.push({ start, end, bytes, resolve, reject, }); this._targetIndex = Math.max(this._targetIndex, end); // Start pulling from the stream if we're not already doing it if (!this._pulling) { this._pulling = true; void this._pull() .catch((error) => { this._pulling = false; if (this._pendingSlices.length > 0) { this._pendingSlices.forEach(x => x.reject(error)); // Make sure to propagate any errors this._pendingSlices.length = 0; } else { throw error; // So it doesn't get swallowed } }); } return promise; } /** @internal */ _throwDueToCacheMiss() { throw new Error( 'Read is before the cached region. With ReadableStreamSource, you must access the data more' + ' sequentially or increase the size of its cache.', ); } /** @internal */ async _pull() { this._reader ??= this._stream.getReader(); // This is the loop that keeps pulling data from the stream until a target index is reached, filling requests // in the process while (this._currentIndex < this._targetIndex && !this._disposed) { const { done, value } = await this._reader.read(); if (done) { for (const pendingSlice of this._pendingSlices) { pendingSlice.resolve(null); } this._pendingSlices.length = 0; this._endIndex = this._currentIndex; // We know how long the file is now! break; } const startIndex = this._currentIndex; const endIndex = this._currentIndex + value.byteLength; this._dispatchRead(startIndex, endIndex); // Fill the pending slices with the data for (let i = 0; i < this._pendingSlices.length; i++) { const pendingSlice = this._pendingSlices[i]!; const cappedStart = Math.max(startIndex, pendingSlice.start); const cappedEnd = Math.min(endIndex, pendingSlice.end); if (cappedStart < cappedEnd) { pendingSlice.bytes.set( value.subarray(cappedStart - startIndex, cappedEnd - startIndex), cappedStart - pendingSlice.start, ); if (cappedEnd === pendingSlice.end) { // Pending slice fully filled pendingSlice.resolve({ bytes: pendingSlice.bytes, view: toDataView(pendingSlice.bytes), offset: pendingSlice.start, }); this._pendingSlices.splice(i, 1); i--; } } } this._cache.push({ start: startIndex, end: endIndex, bytes: value, view: toDataView(value), age: 0, // Unused }); // Do cache eviction, based on the distance from the last-requested index. It's important that we do it like // this and not based on where the reader is at, because if the reader is fast, we'll unnecessarily evict // data that we still might need. while (this._cache.length > 0) { const firstEntry = this._cache[0]!; const distance = this._maxRequestedIndex - firstEntry.end; if (distance <= this._maxCacheSize) { break; } this._cache.shift(); } this._currentIndex += value.byteLength; } this._pulling = false; } /** @internal */ _dispose() { this._pendingSlices.length = 0; this._cache.length = 0; void this._reader?.cancel(); } } type PrefetchProfile = (start: number, end: number, workers: ReadWorker[]) => { start: number; end: number; }; const PREFETCH_PROFILES = { none: (start, end) => ({ start, end }), fileSystem: (start, end) => { const padding = 2 ** 16; start = Math.floor((start - padding) / padding) * padding; end = Math.ceil((end + padding) / padding) * padding; return { start, end }; }, network: (start, end, workers) => { // Add a slight bit of start padding because backwards reading is painful const paddingStart = 2 ** 16; start = Math.max(0, Math.floor((start - paddingStart) / paddingStart) * paddingStart); // Remote resources have extreme latency (relatively speaking), so the benefit from intelligent // prefetching is great. The network prefetch strategy is as follows: When we notice // successive reads to a worker's read region, we prefetch more data at the end of that region, // growing exponentially (up to a cap). This performs well for real-world use cases: Either we read a // small part of the file once and then never need it again, in which case the requested about of data // is small. Or, we're repeatedly doing a sequential access pattern (common in media files), in which // case we can become more and more confident to prefetch more and more data. for (const worker of workers) { const maxExtensionAmount = 8 * 2 ** 20; // 8 MiB // When the read region cross the threshold point, we trigger a prefetch. This point is typically // in the middle of the worker's read region, or a fixed offset from the end if the region has grown // really large. const thresholdPoint = Math.max( (worker.startPos + worker.targetPos) / 2, worker.targetPos - maxExtensionAmount, ); if (closedIntervalsOverlap( start, end, thresholdPoint, worker.targetPos, )) { const size = worker.targetPos - worker.startPos; // If we extend by maxExtensionAmount const a = Math.ceil((size + 1) / maxExtensionAmount) * maxExtensionAmount; // If we extend to the next power of 2 const b = 2 ** Math.ceil(Math.log2(size + 1)); const extent = Math.min(b, a); end = Math.max(end, worker.startPos + extent); } } end = Math.max(end, start + URL_SOURCE_MIN_LOAD_AMOUNT); return { start, end, }; }, } satisfies Record<string, PrefetchProfile>; type PendingSlice = { start: number; bytes: Uint8Array; holes: Hole[]; resolve: (bytes: Uint8Array | null) => void; reject: (error: unknown) => void; }; type Hole = { start: number; end: number; }; type CacheEntry = { start: number; end: number; bytes: Uint8Array; view: DataView; age: number; }; type ReadWorker = { startPos: number; currentPos: number; targetPos: number; /** The target is considered _strict_ when it is an error for the worker to terminate before reaching the target. */ strictTarget: boolean; running: boolean; aborted: boolean; pendingSlices: PendingSlice[]; age: number; }; /** * Godclass for orchestrating complex, cached read operations. The reading model is as follows: Any reading task is * delegated to a *worker*, which is a