UNPKG

@ipld/car

Version:

Content Addressable aRchive format reader and writer

348 lines (316 loc) 9.93 kB
import { decode as decodeDagCbor } from '@ipld/dag-cbor' import { CID } from 'multiformats/cid' import * as Digest from 'multiformats/hashes/digest' import { CIDV0_BYTES, decodeV2Header, decodeVarint, getMultihashLength, V2_HEADER_LENGTH } from './decoder-common.js' import { CarV1HeaderOrV2Pragma } from './header-validator.js' /** * @typedef {import('./api').Block} Block * @typedef {import('./api').BlockHeader} BlockHeader * @typedef {import('./api').BlockIndex} BlockIndex * @typedef {import('./coding').BytesReader} BytesReader * @typedef {import('./coding').CarHeader} CarHeader * @typedef {import('./coding').CarV2Header} CarV2Header * @typedef {import('./coding').CarV2FixedHeader} CarV2FixedHeader * @typedef {import('./coding').CarDecoder} CarDecoder */ /** * Reads header data from a `BytesReader`. The header may either be in the form * of a `CarHeader` or `CarV2Header` depending on the CAR being read. * * @name async decoder.readHeader(reader) * @param {BytesReader} reader * @param {number} [strictVersion] * @returns {Promise<CarHeader|CarV2Header>} */ export async function readHeader (reader, strictVersion) { const length = decodeVarint(await reader.upTo(8), reader) if (length === 0) { throw new Error('Invalid CAR header (zero length)') } const header = await reader.exactly(length, true) const block = decodeDagCbor(header) if (CarV1HeaderOrV2Pragma.toTyped(block) === undefined) { throw new Error('Invalid CAR header format') } if ((block.version !== 1 && block.version !== 2) || (strictVersion !== undefined && block.version !== strictVersion)) { throw new Error(`Invalid CAR version: ${block.version}${strictVersion !== undefined ? ` (expected ${strictVersion})` : ''}`) } if (block.version === 1) { // CarV1HeaderOrV2Pragma makes roots optional, let's make it mandatory if (!Array.isArray(block.roots)) { throw new Error('Invalid CAR header format') } return block } // version 2 if (block.roots !== undefined) { throw new Error('Invalid CAR header format') } const v2Header = decodeV2Header(await reader.exactly(V2_HEADER_LENGTH, true)) reader.seek(v2Header.dataOffset - reader.pos) const v1Header = await readHeader(reader, 1) return Object.assign(v1Header, v2Header) } /** * @param {BytesReader} reader * @returns {Promise<CID>} */ async function readCid (reader) { const first = await reader.exactly(2, false) if (first[0] === CIDV0_BYTES.SHA2_256 && first[1] === CIDV0_BYTES.LENGTH) { // cidv0 32-byte sha2-256 const bytes = await reader.exactly(34, true) const multihash = Digest.decode(bytes) return CID.create(0, CIDV0_BYTES.DAG_PB, multihash) } const version = decodeVarint(await reader.upTo(8), reader) if (version !== 1) { throw new Error(`Unexpected CID version (${version})`) } const codec = decodeVarint(await reader.upTo(8), reader) const bytes = await reader.exactly(getMultihashLength(await reader.upTo(8)), true) const multihash = Digest.decode(bytes) return CID.create(version, codec, multihash) } /** * Reads the leading data of an individual block from CAR data from a * `BytesReader`. Returns a `BlockHeader` object which contains * `{ cid, length, blockLength }` which can be used to either index the block * or read the block binary data. * * @name async decoder.readBlockHead(reader) * @param {BytesReader} reader * @returns {Promise<BlockHeader>} */ export async function readBlockHead (reader) { // length includes a CID + Binary, where CID has a variable length // we have to deal with const start = reader.pos let length = decodeVarint(await reader.upTo(8), reader) if (length === 0) { throw new Error('Invalid CAR section (zero length)') } length += (reader.pos - start) const cid = await readCid(reader) const blockLength = length - Number(reader.pos - start) // subtract CID length return { cid, length, blockLength } } /** * @param {BytesReader} reader * @returns {Promise<Block>} */ async function readBlock (reader) { const { cid, blockLength } = await readBlockHead(reader) const bytes = await reader.exactly(blockLength, true) return { bytes, cid } } /** * @param {BytesReader} reader * @returns {Promise<BlockIndex>} */ async function readBlockIndex (reader) { const offset = reader.pos const { cid, length, blockLength } = await readBlockHead(reader) const index = { cid, length, blockLength, offset, blockOffset: reader.pos } reader.seek(index.blockLength) return index } /** * Creates a `CarDecoder` from a `BytesReader`. The `CarDecoder` is as async * interface that will consume the bytes from the `BytesReader` to yield a * `header()` and either `blocks()` or `blocksIndex()` data. * * @name decoder.createDecoder(reader) * @param {BytesReader} reader * @returns {CarDecoder} */ export function createDecoder (reader) { const headerPromise = (async () => { const header = await readHeader(reader) if (header.version === 2) { const v1length = reader.pos - header.dataOffset reader = limitReader(reader, header.dataSize - v1length) } return header })() return { header: () => headerPromise, async * blocks () { await headerPromise while ((await reader.upTo(8)).length > 0) { yield await readBlock(reader) } }, async * blocksIndex () { await headerPromise while ((await reader.upTo(8)).length > 0) { yield await readBlockIndex(reader) } } } } /** * Creates a `BytesReader` from a `Uint8Array`. * * @name decoder.bytesReader(bytes) * @param {Uint8Array} bytes * @returns {BytesReader} */ export function bytesReader (bytes) { let pos = 0 /** @type {BytesReader} */ return { async upTo (length) { const out = bytes.subarray(pos, pos + Math.min(length, bytes.length - pos)) return out }, async exactly (length, seek = false) { if (length > bytes.length - pos) { throw new Error('Unexpected end of data') } const out = bytes.subarray(pos, pos + length) if (seek) { pos += length } return out }, seek (length) { pos += length }, get pos () { return pos } } } /** * @ignore * reusable reader for streams and files, we just need a way to read an * additional chunk (of some undetermined size) and a way to close the * reader when finished * @param {() => Promise<Uint8Array|null>} readChunk * @returns {BytesReader} */ export function chunkReader (readChunk /*, closer */) { let pos = 0 let have = 0 let offset = 0 let currentChunk = new Uint8Array(0) const read = async (/** @type {number} */ length) => { have = currentChunk.length - offset const bufa = [currentChunk.subarray(offset)] while (have < length) { const chunk = await readChunk() if (chunk == null) { break } /* c8 ignore next 8 */ // undo this ignore ^ when we have a fd implementation that can seek() if (have < 0) { // because of a seek() /* c8 ignore next 4 */ // toohard to test the else if (chunk.length > have) { bufa.push(chunk.subarray(-have)) } // else discard } else { bufa.push(chunk) } have += chunk.length } currentChunk = new Uint8Array(bufa.reduce((p, c) => p + c.length, 0)) let off = 0 for (const b of bufa) { currentChunk.set(b, off) off += b.length } offset = 0 } /** @type {BytesReader} */ return { async upTo (length) { if (currentChunk.length - offset < length) { await read(length) } return currentChunk.subarray(offset, offset + Math.min(currentChunk.length - offset, length)) }, async exactly (length, seek = false) { if (currentChunk.length - offset < length) { await read(length) } if (currentChunk.length - offset < length) { throw new Error('Unexpected end of data') } const out = currentChunk.subarray(offset, offset + length) if (seek) { pos += length offset += length } return out }, seek (length) { pos += length offset += length }, get pos () { return pos } } } /** * Creates a `BytesReader` from an `AsyncIterable<Uint8Array>`, which allows for * consumption of CAR data from a streaming source. * * @name decoder.asyncIterableReader(asyncIterable) * @param {AsyncIterable<Uint8Array>} asyncIterable * @returns {BytesReader} */ export function asyncIterableReader (asyncIterable) { const iterator = asyncIterable[Symbol.asyncIterator]() async function readChunk () { const next = await iterator.next() if (next.done) { return null } return next.value } return chunkReader(readChunk) } /** * Wraps a `BytesReader` in a limiting `BytesReader` which limits maximum read * to `byteLimit` bytes. It _does not_ update `pos` of the original * `BytesReader`. * * @name decoder.limitReader(reader, byteLimit) * @param {BytesReader} reader * @param {number} byteLimit * @returns {BytesReader} */ export function limitReader (reader, byteLimit) { let bytesRead = 0 /** @type {BytesReader} */ return { async upTo (length) { let bytes = await reader.upTo(length) if (bytes.length + bytesRead > byteLimit) { bytes = bytes.subarray(0, byteLimit - bytesRead) } return bytes }, async exactly (length, seek = false) { const bytes = await reader.exactly(length, seek) if (bytes.length + bytesRead > byteLimit) { throw new Error('Unexpected end of data') } if (seek) { bytesRead += length } return bytes }, seek (length) { bytesRead += length reader.seek(length) }, get pos () { return reader.pos } } }