UNPKG

@ipld/car

Version:

Content Addressable aRchive format reader and writer

212 lines (199 loc) 5.95 kB
import fs from 'fs' import { Readable } from 'stream' import { CID } from 'multiformats/cid' import { CarIndexer } from './indexer.js' import { CarReader as NodeCarReader } from './reader.js' /** * @typedef {import('fs').promises.FileHandle} FileHandle * @typedef {import('./api').Block} Block * @typedef {import('./api').BlockIndex} BlockIndex * @typedef {import('./api').CarReader} CarReaderIface * @typedef {import('./reader-browser').CarReader} CarReader * @typedef {{ blockLength:number, blockOffset:number }} RawLocation */ /** * A form of {@link CarReader} that pre-indexes a CAR archive from a file and * provides random access to blocks within the file using the index data. This * function is **only available in Node.js** and not a browser environment. * * For large CAR files, using this form of `CarReader` can be singificantly more * efficient in terms of memory. The index consists of a list of `CID`s and * their location within the archive (see {@link CarIndexer}). For large numbers * of blocks, this index can also occupy a significant amount of memory. In some * cases it may be necessary to expand the memory capacity of a Node.js instance * to allow this index to fit. (e.g. by running with * `NODE_OPTIONS="--max-old-space-size=16384"`). * * As an `CarIndexedReader` instance maintains an open file descriptor for its * CAR file, an additional {@link CarReader#close} method is attached. This * _must_ be called to have full clean-up of resources after use. * * Load this class with either * `import { CarIndexedReader } from '@ipld/car/indexed-reader'` * (`const { CarIndexedReader } = require('@ipld/car/indexed-reader')`). Or * `import { CarIndexedReader } from '@ipld/car'` * (`const { CarIndexedReader } = require('@ipld/car')`). The former will likely * result in smaller bundle sizes where this is important. * * @name CarIndexedReader * @class * @implements {CarReaderIface} * @extends {CarReader} * @property {number} version The version number of the CAR referenced by this * reader (should be `1`). */ export class CarIndexedReader { /** * @param {number} version * @param {string} path * @param {CID[]} roots * @param {Map<string, RawLocation>} index * @param {string[]} order */ constructor (version, path, roots, index, order) { this._version = version this._path = path this._roots = roots this._index = index this._order = order this._fd = null } get version () { return this._version } /** * See {@link CarReader#getRoots} * * @function * @memberof CarIndexedReader * @instance * @async * @returns {Promise<CID[]>} */ async getRoots () { return this._roots } /** * See {@link CarReader#has} * * @function * @memberof CarIndexedReader * @instance * @async * @param {CID} key * @returns {Promise<boolean>} */ async has (key) { return this._index.has(key.toString()) } /** * See {@link CarReader#get} * * @function * @memberof CarIndexedReader * @instance * @async * @param {CID} key * @returns {Promise<Block | undefined>} */ async get (key) { const blockIndex = this._index.get(key.toString()) if (!blockIndex) { return undefined } if (!this._fd) { this._fd = await fs.promises.open(this._path, 'r') } const readIndex = { cid: key, length: 0, offset: 0, blockLength: blockIndex.blockLength, blockOffset: blockIndex.blockOffset } return NodeCarReader.readRaw(this._fd, readIndex) } /** * See {@link CarReader#blocks} * * @function * @memberof CarIndexedReader * @instance * @async * @generator * @returns {AsyncGenerator<Block>} */ async * blocks () { for (const cidStr of this._order) { const block = await this.get(CID.parse(cidStr)) /* c8 ignore next 3 */ if (!block) { throw new Error('Unexpected internal error') } yield block } } /** * See {@link CarReader#cids} * * @function * @memberof CarIndexedReader * @instance * @async * @generator * @returns {AsyncGenerator<CID>} */ async * cids () { for (const cidStr of this._order) { yield CID.parse(cidStr) } } /** * Close the underlying file descriptor maintained by this `CarIndexedReader`. * This must be called for proper resource clean-up to occur. * * @function * @memberof CarIndexedReader * @instance * @async * @returns {Promise<void>} */ async close () { if (this._fd) { return this._fd.close() } } /** * Instantiate an {@link CarIndexedReader} from a file with the provided * `path`. The CAR file is first indexed with a full path that collects `CID`s * and block locations. This index is maintained in memory. Subsequent reads * operate on a read-only file descriptor, fetching the block from its in-file * location. * * For large archives, the initial indexing may take some time. The returned * `Promise` will resolve only after this is complete. * * @async * @static * @memberof CarIndexedReader * @param {string} path * @returns {Promise<CarIndexedReader>} */ static async fromFile (path) { if (typeof path !== 'string') { throw new TypeError('fromFile() requires a file path string') } const iterable = await CarIndexer.fromIterable(Readable.from(fs.createReadStream(path))) /** @type {Map<string, RawLocation>} */ const index = new Map() /** @type {string[]} */ const order = [] for await (const { cid, blockLength, blockOffset } of iterable) { const cidStr = cid.toString() index.set(cidStr, { blockLength, blockOffset }) order.push(cidStr) } return new CarIndexedReader(iterable.version, path, await iterable.getRoots(), index, order) } } export const __browser = false