UNPKG

tar-iterator

Version:

Extract contents from tar archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms

github.com/kmalakoff/tar-iterator

kmalakoff/tar-iterator

176 lines (175 loc) • 6.76 kB

JavaScript

/** * GNU Sparse File Support * * Handles parsing of GNU sparse file headers and stream reconstruction. * * GNU sparse format stores: * 1. A sparse map in the header (offset 386) with up to 4 entries * 2. Extended sparse headers (512-byte blocks) if more entries needed * 3. The actual data chunks (only non-zero portions of the file) * * Each sparse entry contains: * - offset: position in the virtual file where this data chunk belongs * - numbytes: size of this data chunk * * Node 0.8 compatible - uses only basic Buffer operations. */ import { allocBuffer } from 'extract-base-iterator'; import { SPARSE_ENTRIES_IN_HEADER, SPARSE_ENTRY_NUMBYTES_SIZE, SPARSE_ENTRY_OFFSET_SIZE, SPARSE_ENTRY_SIZE, SPARSE_EXTENDED_ENTRIES, SPARSE_EXTENDED_ISEXTENDED_OFFSET, SPARSE_ISEXTENDED_OFFSET, SPARSE_OFFSET, SPARSE_REALSIZE_OFFSET, SPARSE_REALSIZE_SIZE } from './constants.js'; import EntryStream from './EntryStream.js'; import { decodeOct } from './headers.js'; // Reusable zero buffer for sparse hole emission (64KB) const ZERO_BUFFER_SIZE = 65536; let zeroBuffer = null; function getZeroBuffer() { if (!zeroBuffer) { // allocBuffer already zero-fills on Node 4.5+, manual fill for Node 0.8 zeroBuffer = allocBuffer(ZERO_BUFFER_SIZE); } return zeroBuffer; } /** * Parse sparse entries from a buffer starting at given offset * * @param buf - Buffer containing sparse entries * @param startOffset - Offset in buffer where entries begin * @param maxEntries - Maximum number of entries to read * @returns Array of valid sparse entries (stops at first zero entry) */ function parseSparseEntries(buf, startOffset, maxEntries) { const entries = []; for(let i = 0; i < maxEntries; i++){ const entryOffset = startOffset + i * SPARSE_ENTRY_SIZE; const offset = decodeOct(buf, entryOffset, SPARSE_ENTRY_OFFSET_SIZE); const numbytes = decodeOct(buf, entryOffset + SPARSE_ENTRY_OFFSET_SIZE, SPARSE_ENTRY_NUMBYTES_SIZE); // Stop at first zero entry (end of sparse map) if (offset === 0 && numbytes === 0) { break; } entries.push({ offset, numbytes }); } return entries; } /** * Parse GNU sparse header information from a tar header block * * @param headerBuf - The 512-byte tar header buffer * @returns Sparse info including real size, entries, and extended flag */ export function parseGnuSparseHeader(headerBuf) { // Parse sparse entries from header (up to 4) const entries = parseSparseEntries(headerBuf, SPARSE_OFFSET, SPARSE_ENTRIES_IN_HEADER); // Parse isextended flag const isExtended = headerBuf[SPARSE_ISEXTENDED_OFFSET] !== 0; // Parse real file size const realSize = decodeOct(headerBuf, SPARSE_REALSIZE_OFFSET, SPARSE_REALSIZE_SIZE); return { realSize, entries, isExtended }; } /** * Parse GNU sparse extended header block * * @param extBuf - The 512-byte extended sparse header buffer * @returns Object with entries and whether more extended blocks follow */ export function parseGnuSparseExtended(extBuf) { // Parse sparse entries from extended block (up to 21) const entries = parseSparseEntries(extBuf, 0, SPARSE_EXTENDED_ENTRIES); // Parse isextended flag const isExtended = extBuf[SPARSE_EXTENDED_ISEXTENDED_OFFSET] !== 0; return { entries, isExtended }; } /** * Calculate total data size from sparse map * This is the actual size of data stored in the archive (sum of all numbytes) */ export function sparseDataSize(entries) { let total = 0; for(let i = 0; i < entries.length; i++){ total += entries[i].numbytes; } return total; } /** * Stream that reconstructs a sparse file from data chunks * * Takes the sparse map and actual data, outputs reconstructed file * with zeros inserted for holes. * * Extends EntryStream to inherit pause/resume/pipe behavior. * * @internal */ export class SparseStream extends EntryStream { /** * Push data from the tar archive (actual sparse data chunk) * Overrides EntryStream.push() to reconstruct sparse file with holes. */ push(data) { // Allow null through to signal end if (data === null) return super.push(null); if (this.ended) return false; let dataOffset = 0; let result = true; while(dataOffset < data.length && this.currentEntry < this.entries.length){ const entry = this.entries[this.currentEntry]; // First, emit zeros for any hole before current entry if (this.virtualPosition < entry.offset) { const holeSize = entry.offset - this.virtualPosition; this._emitZeros(holeSize); this.virtualPosition = entry.offset; } // Now emit actual data for this entry const toEmit = Math.min(this.entryBytesRemaining, data.length - dataOffset); if (toEmit > 0) { const chunk = data.slice(dataOffset, dataOffset + toEmit); result = super.push(chunk); dataOffset += toEmit; this.virtualPosition += toEmit; this.entryBytesRemaining -= toEmit; } // Move to next entry if current is exhausted if (this.entryBytesRemaining <= 0) { this.currentEntry++; if (this.currentEntry < this.entries.length) { this.entryBytesRemaining = this.entries[this.currentEntry].numbytes; } } } return result; } /** * End the stream - emit any trailing zeros * Overrides EntryStream.end() to emit trailing zeros first. */ end() { if (this.ended) return; // Emit remaining zeros to reach real file size if (this.virtualPosition < this.realSize) { this._emitZeros(this.realSize - this.virtualPosition); this.virtualPosition = this.realSize; } super.end(); } /** * Emit zeros for a hole, reusing the shared zero buffer */ _emitZeros(size) { const zeros = getZeroBuffer(); let remaining = size; while(remaining > 0){ const toEmit = Math.min(remaining, ZERO_BUFFER_SIZE); // Slice from the reusable buffer to emit exact size needed super.push(zeros.slice(0, toEmit)); remaining -= toEmit; } } constructor(entries, realSize){ super(); this.entries = entries; this.realSize = realSize; this.currentEntry = 0; this.virtualPosition = 0; this.entryBytesRemaining = entries.length > 0 ? entries[0].numbytes : 0; } }