UNPKG

tar-iterator

Version:

Extract contents from tar archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms

github.com/kmalakoff/tar-iterator

kmalakoff/tar-iterator

271 lines (270 loc) • 10.4 kB

JavaScript

/** * TAR Header Parsing * * All functions use only Node 0.8 compatible Buffer APIs: * - Buffer indexing: buf[i] * - Buffer.slice(start, end) * - Buffer.toString(encoding) * - Buffer.write(string, offset, length, encoding) * - new Buffer(size) or new Buffer(string) * * NOT using (added in later Node versions): * - Buffer.from() * - Buffer.alloc() * - Buffer.allocUnsafe() * - Buffer.compare() * - Number.isNaN() (use global isNaN instead) */ import { CHECKSUM_OFFSET, CHECKSUM_SIZE, DEVMAJOR_OFFSET, DEVMAJOR_SIZE, DEVMINOR_OFFSET, DEVMINOR_SIZE, GID_OFFSET, GID_SIZE, GNAME_OFFSET, GNAME_SIZE, GNU_MAGIC, GNU_VER, HEADER_SIZE, LINKNAME_OFFSET, LINKNAME_SIZE, MAGIC_OFFSET, MODE_OFFSET, MODE_SIZE, MTIME_OFFSET, MTIME_SIZE, NAME_OFFSET, NAME_SIZE, PREFIX_OFFSET, PREFIX_SIZE, SIZE_OFFSET, SIZE_SIZE, TYPE_BLOCK_DEVICE, TYPE_CHAR_DEVICE, TYPE_CONTIGUOUS, TYPE_DIRECTORY, TYPE_FIFO, TYPE_FILE, TYPE_GNU_DUMPDIR, TYPE_GNU_LONG_LINK, TYPE_GNU_LONG_PATH, TYPE_GNU_MULTIVOL, TYPE_GNU_SPARSE, TYPE_GNU_VOLHDR, TYPE_LINK, TYPE_PAX_GLOBAL, TYPE_PAX_HEADER, TYPE_SYMLINK, TYPEFLAG_OFFSET, UID_OFFSET, UID_SIZE, UNAME_OFFSET, UNAME_SIZE, USTAR_MAGIC, VERSION_OFFSET, ZERO_OFFSET } from './constants.js'; import { createTarError, TarErrorCode } from './errors.js'; /** * Convert type flag number to type string */ export function toType(flag) { switch(flag){ case TYPE_FILE: return 'file'; case TYPE_LINK: return 'link'; case TYPE_SYMLINK: return 'symlink'; case TYPE_CHAR_DEVICE: return 'character-device'; case TYPE_BLOCK_DEVICE: return 'block-device'; case TYPE_DIRECTORY: return 'directory'; case TYPE_FIFO: return 'fifo'; case TYPE_CONTIGUOUS: return 'contiguous-file'; case TYPE_GNU_LONG_PATH: return 'gnu-long-path'; case TYPE_GNU_LONG_LINK: return 'gnu-long-link-path'; case TYPE_GNU_SPARSE: return 'gnu-sparse'; case TYPE_GNU_DUMPDIR: return 'gnu-dumpdir'; case TYPE_GNU_MULTIVOL: return 'gnu-multivol'; case TYPE_GNU_VOLHDR: return 'gnu-volume-header'; case TYPE_PAX_HEADER: return 'pax-header'; case TYPE_PAX_GLOBAL: return 'pax-global-header'; default: return null; } } /** * Node 0.8 compatible isNaN (Number.isNaN didn't exist until ES2015) */ // biome-ignore lint/suspicious/noShadowRestrictedNames: Legacy function isNaN(value) { // biome-ignore lint/suspicious/noSelfCompare: Legacy return value !== value; } /** * Find null terminator in buffer region */ function findNull(buf, start, end) { for(let i = start; i < end; i++){ if (buf[i] === 0) return i; } return end; } /** * Decode null-terminated string from buffer */ function decodeStr(buf, offset, length, encoding) { const enc = encoding || 'utf8'; const end = findNull(buf, offset, offset + length); return buf.slice(offset, end).toString(enc); } /** * Parse base-256 encoded number (GNU extension for large files >8GB) * If high bit of first byte is set, remaining bytes are big-endian base-256 */ function parse256(buf) { // Check sign bit (bit 6 of first byte, after the marker bit 7) const positive = (buf[0] & 0x40) === 0; // Build number from bytes (big-endian, excluding first byte's marker bits) let sum = 0; let base = 1; // Process bytes from right to left (least significant first) for(let i = buf.length - 1; i > 0; i--){ const byte = buf[i]; if (positive) { sum += byte * base; } else { sum += (0xff - byte) * base; } base *= 256; } return positive ? sum : -1 * sum; } /** * Decode octal number from buffer, with base-256 fallback for large values */ export function decodeOct(buf, offset, length) { const val = buf.slice(offset, offset + length); // If high bit is set, parse as base-256 (GNU extension) if (val[0] & 0x80) { return parse256(val); } // Skip leading spaces (some old tar versions use them) let start = 0; while(start < val.length && val[start] === 32)start++; // Find end (space or null terminator) let end = start; while(end < val.length && val[end] !== 32 && val[end] !== 0)end++; // Skip leading zeros while(start < end && val[start] === ZERO_OFFSET)start++; if (start === end) return 0; return parseInt(val.slice(start, end).toString(), 8); } /** * Calculate checksum of header block * Per POSIX: sum of all bytes, treating checksum field as spaces (0x20) */ export function checksum(buf) { let sum = 0; for(let i = 0; i < HEADER_SIZE; i++){ // Treat checksum field (offset 148, length 8) as spaces if (i >= CHECKSUM_OFFSET && i < CHECKSUM_OFFSET + CHECKSUM_SIZE) { sum += 32; // space character } else { sum += buf[i]; } } return sum; } /** * Compare buffer region to byte array * Replacement for Buffer.compare that works on Node 0.8+ */ function bufferEquals(buf, offset, expected) { for(let i = 0; i < expected.length; i++){ if (buf[offset + i] !== expected[i]) return false; } return true; } /** * Check if buffer contains USTAR magic */ export function isUstar(buf) { return bufferEquals(buf, MAGIC_OFFSET, USTAR_MAGIC); } /** * Check if buffer contains GNU tar magic */ export function isGnu(buf) { return bufferEquals(buf, MAGIC_OFFSET, GNU_MAGIC) && bufferEquals(buf, VERSION_OFFSET, GNU_VER); } /** * Parse a 512-byte TAR header * * @param buf - 512-byte header buffer * @param opts - Parse options * @returns Parsed header or null if empty block (end of archive) */ export function parseHeader(buf, opts) { const options = opts || {}; const filenameEncoding = options.filenameEncoding || 'utf8'; const allowUnknownFormat = options.allowUnknownFormat || false; // Get type flag (handle null as 0 for old tar compatibility) // Standard POSIX types are '0'-'7' (ASCII 48-55), subtract ZERO_OFFSET to get 0-7 // GNU/PAX extension types are letters ('L'=76, 'K'=75, 'x'=120, 'g'=103), use raw ASCII value const rawTypeflag = buf[TYPEFLAG_OFFSET]; let typeflag; if (rawTypeflag === 0) { typeflag = 0; // Null byte treated as regular file } else if (rawTypeflag >= ZERO_OFFSET && rawTypeflag <= ZERO_OFFSET + 7) { // Standard POSIX type '0'-'7' typeflag = rawTypeflag - ZERO_OFFSET; } else { // GNU/PAX extension type - use raw ASCII value typeflag = rawTypeflag; } // Decode basic fields let name = decodeStr(buf, NAME_OFFSET, NAME_SIZE, filenameEncoding); const mode = decodeOct(buf, MODE_OFFSET, MODE_SIZE); const uid = decodeOct(buf, UID_OFFSET, UID_SIZE); const gid = decodeOct(buf, GID_OFFSET, GID_SIZE); const size = decodeOct(buf, SIZE_OFFSET, SIZE_SIZE); const mtime = decodeOct(buf, MTIME_OFFSET, MTIME_SIZE); const type = toType(typeflag); const linkname = buf[LINKNAME_OFFSET] === 0 ? null : decodeStr(buf, LINKNAME_OFFSET, LINKNAME_SIZE, filenameEncoding); const uname = decodeStr(buf, UNAME_OFFSET, UNAME_SIZE); const gname = decodeStr(buf, GNAME_OFFSET, GNAME_SIZE); const devmajor = decodeOct(buf, DEVMAJOR_OFFSET, DEVMAJOR_SIZE); const devminor = decodeOct(buf, DEVMINOR_OFFSET, DEVMINOR_SIZE); // Calculate and validate checksum const computed = checksum(buf); // Empty block check: checksum of all zeros treated as spaces = 8 * 32 = 256 if (computed === 8 * 32) return null; // Validate stored checksum const stored = decodeOct(buf, CHECKSUM_OFFSET, CHECKSUM_SIZE); if (computed !== stored) { throw createTarError('Invalid tar header. Maybe the tar is corrupted or it needs to be gunzipped?', TarErrorCode.INVALID_CHECKSUM); } // Handle USTAR format (prepend prefix to name if present) if (isUstar(buf)) { if (buf[PREFIX_OFFSET] !== 0) { name = `${decodeStr(buf, PREFIX_OFFSET, PREFIX_SIZE, filenameEncoding)}/${name}`; } } else if (isGnu(buf)) { // GNU format - magic is validated, no additional processing needed } else { if (!allowUnknownFormat) { throw createTarError('Invalid tar header: unknown format.', TarErrorCode.INVALID_FORMAT); } } // NOTE: Old tar versions use trailing / to indicate directories. // This check is intentionally NOT done here because GNU long path // extensions may change the name. The check is done in TarExtract._applyExtensions() // after the full name is resolved. return { name, mode, uid, gid, size, mtime: new Date(1000 * mtime), type, linkname, uname, gname, devmajor, devminor, pax: null }; } /** * Decode PAX extended attributes * Format: "length key=value\n" repeated * Length includes the entire record (length field + space + key=value + newline) */ export function decodePax(buf) { const result = {}; let pos = 0; while(pos < buf.length){ // Find space after length let spacePos = pos; while(spacePos < buf.length && buf[spacePos] !== 32)spacePos++; // Parse length const len = parseInt(buf.slice(pos, spacePos).toString(), 10); if (!len || isNaN(len)) break; // Extract key=value (after space, before newline) // Record spans from spacePos+1 to pos+len-1 (excluding newline) const record = buf.slice(spacePos + 1, pos + len - 1).toString('utf8'); const eqPos = record.indexOf('='); if (eqPos === -1) break; const key = record.slice(0, eqPos); const value = record.slice(eqPos + 1); result[key] = value; pos += len; } return result; } /** * Decode GNU long path/linkname * The content is null-terminated string */ export function decodeLongPath(buf, encoding) { return decodeStr(buf, 0, buf.length, encoding); } /** * Calculate number of padding bytes to reach 512-byte block alignment */ export function overflow(size) { const remainder = size & 511; // size % 512 return remainder ? 512 - remainder : 0; }