UNPKG

@atproto/repo

Version:

atproto repo and MST implementation

270 lines 9.55 kB
import { setImmediate } from 'node:timers/promises'; // eslint-disable-next-line import/default, import/no-named-as-default-member import varint from 'varint'; import * as cbor from '@atproto/lex-cbor'; import { decodeCid, isCidForBytes } from '@atproto/lex-data'; import { BlockMap } from './block-map.js'; import { schema } from './types.js'; import { concatBytesAsync } from './util.js'; export async function* writeCarStream(root, blocks) { const header = new Uint8Array(cbor.encode({ version: 1, roots: root ? [root] : [], })); yield new Uint8Array(varint.encode(header.byteLength)); yield header; for await (const block of blocks) { yield new Uint8Array(varint.encode(block.cid.bytes.byteLength + block.bytes.byteLength)); yield block.cid.bytes; yield block.bytes; } } export async function blocksToCarFile(root, blocks) { return concatBytesAsync(blocksToCarStream(root, blocks)); } export const blocksToCarStream = (root, blocks) => { return writeCarStream(root, iterateBlocks(blocks)); }; async function* iterateBlocks(blocks) { for (const entry of blocks.entries()) { yield { cid: entry.cid, bytes: entry.bytes }; } } export const readCar = async (bytes, opts) => { const { roots, blocks } = await readCarReader(new Ui8Reader(bytes), opts); const blockMap = new BlockMap(); for await (const block of blocks) { blockMap.set(block.cid, block.bytes); } return { roots, blocks: blockMap }; }; export const readCarWithRoot = async (bytes, opts) => { const { roots, blocks } = await readCar(bytes, opts); if (roots.length !== 1) { throw new Error(`Expected one root, got ${roots.length}`); } const root = roots[0]; return { root, blocks, }; }; export const readCarStream = async (car, opts) => { return readCarReader(new BufferedReader(car), opts); }; export const readCarReader = async (reader, opts) => { try { const headerSize = await readVarint(reader); if (headerSize === null) { throw new Error('Could not parse CAR header'); } const headerBytes = await reader.read(headerSize); const header = cbor.decode(headerBytes); const result = schema.carHeader.safeParse(header); if (!result.success) { throw new Error('Could not parse CAR header', { cause: result.error }); } return { roots: result.data.roots, blocks: readCarBlocksIter(reader, opts), }; } catch (err) { await reader.close(); throw err; } }; const readCarBlocksIter = (reader, opts) => { let generator = readCarBlocksIterGenerator(reader); if (!opts?.skipCidVerification) { generator = verifyIncomingCarBlocks(generator); } return Object.assign(generator, { async dump() { // try/finally to ensure that reader.close is called even if blocks.return throws. try { // Prevent the iterator from being started after this method is called. await generator.return(); } finally { // @NOTE the "finally" block of the async generator won't be called // if the iteration was never started so we need to manually close here. await reader.close(); } }, }); }; async function* readCarBlocksIterGenerator(reader) { let blocks = 0; try { while (!reader.isDone) { const blockSize = await readVarint(reader); if (blockSize === null) { break; } const blockBytes = await reader.read(blockSize); const cid = decodeCid(blockBytes.subarray(0, 36)); const bytes = blockBytes.subarray(36); yield { cid, bytes }; // yield to the event loop every 25 blocks // in the case the incoming CAR is synchronous, this can end up jamming up the thread blocks++; if (blocks % 25 === 0) { await setImmediate(); } } } finally { await reader.close(); } } export async function* verifyIncomingCarBlocks(car) { for await (const block of car) { if (!(await isCidForBytes(block.cid, block.bytes))) { throw new Error(`Not a valid CID for bytes (${block.cid.toString()})`); } yield block; } } const readVarint = async (reader) => { let done = false; const bytes = []; while (!done) { const byte = await reader.read(1); if (byte.byteLength === 0) { if (bytes.length > 0) { throw new Error('could not parse varint'); } else { return null; } } bytes.push(byte); if (byte[0] < 128) { done = true; } } const concatted = Buffer.concat(bytes); return varint.decode(concatted); }; class Ui8Reader { constructor(bytes) { this.bytes = bytes; this.idx = 0; this.isDone = false; } async read(bytesToRead) { const value = this.bytes.subarray(this.idx, this.idx + bytesToRead); this.idx += bytesToRead; if (this.idx >= this.bytes.length) { this.isDone = true; } return value; } async close() { } } /** * This code was optimized for performance. See * {@link https://github.com/bluesky-social/atproto/pull/4729 #4729} for more details * and benchmarks. */ class BufferedReader { constructor(stream) { this.isDone = false; /** fifo list of chunks to consume */ this.chunks = []; this.iterator = Symbol.asyncIterator in stream ? stream[Symbol.asyncIterator]() : stream[Symbol.iterator](); } /** Number of bytes currently buffered and available for reading */ get bufferedByteLength() { let total = 0; for (let i = 0; i < this.chunks.length; i++) { total += this.chunks[i].byteLength; } return total; } /** * @note concurrent reads are **NOT** supported by the current implementation * and would require call to readUntilBuffered to be using a fifo lock for * read()s to be processed in fifo order. */ async read(bytesToRead) { const bytesNeeded = bytesToRead - this.bufferedByteLength; if (bytesNeeded > 0 && !this.isDone) { await this.readUntilBuffered(bytesNeeded); } const resultLength = Math.min(bytesToRead, this.bufferedByteLength); if (resultLength <= 0) return new Uint8Array(); const firstChunk = this.consumeChunk(resultLength); if (firstChunk.byteLength === resultLength) { // If the data consumed from the first chunk contains all we need, return // it as-is. This allows to avoid any copy operation. return firstChunk; } // The first chunk does not have all the data we need. We have to copy // multiple chunks into a larger buffer const result = new Uint8Array(resultLength); let resultWriteIndex = 0; // Copy the first chunk into the result buffer result.set(firstChunk, resultWriteIndex); resultWriteIndex += firstChunk.byteLength; // Copy more chunks as needed (we use do-while because we *know* we need // more than one chunk) do { const missingLength = resultLength - resultWriteIndex; const currentChunk = this.consumeChunk(missingLength); result.set(currentChunk, resultWriteIndex); resultWriteIndex += currentChunk.byteLength; } while (resultWriteIndex < resultLength); return result; } async readUntilBuffered(bytesNeeded) { let bytesRead = 0; while (bytesRead < bytesNeeded) { const next = await this.iterator.next(); if (next.done) { this.isDone = true; break; } else { this.chunks.push(next.value); bytesRead += next.value.byteLength; } } return bytesRead; } consumeChunk(bytesToConsume) { const firstChunk = this.chunks[0]; if (bytesToConsume < firstChunk.byteLength) { // return a sub-view of the data being read and replace the first chunk // with a sub-view that does not contain that data. // @NOTE for some reason, subarray() revealed to be 7-8% slower in NodeJS // benchmarks. // this.chunks[0] = firstChunk.subarray(bytesToConsume) // return firstChunk.subarray(0, bytesToConsume) this.chunks[0] = new Uint8Array(firstChunk.buffer, firstChunk.byteOffset + bytesToConsume, firstChunk.byteLength - bytesToConsume); return new Uint8Array(firstChunk.buffer, firstChunk.byteOffset, bytesToConsume); } else { // First chunk is being read in full, discard it this.chunks.shift(); return firstChunk; } } async close() { try { if (!this.isDone && this.iterator.return) { await this.iterator.return(); } } finally { this.isDone = true; this.chunks.length = 0; } } } //# sourceMappingURL=car.js.map