UNPKG

tar-iterator

Version:

Extract contents from tar archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms

284 lines 10.7 kB
/** * TAR Header Parsing * * All functions use only Node 0.8 compatible Buffer APIs: * - Buffer indexing: buf[i] * - Buffer.slice(start, end) * - Buffer.toString(encoding) * - Buffer.write(string, offset, length, encoding) * - new Buffer(size) or new Buffer(string) * * NOT using (added in later Node versions): * - Buffer.from() * - Buffer.alloc() * - Buffer.allocUnsafe() * - Buffer.compare() * - Number.isNaN() (use global isNaN instead) */ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); function _export(target, all) { for(var name in all)Object.defineProperty(target, name, { enumerable: true, get: Object.getOwnPropertyDescriptor(all, name).get }); } _export(exports, { get checksum () { return checksum; }, get decodeLongPath () { return decodeLongPath; }, get decodeOct () { return decodeOct; }, get decodePax () { return decodePax; }, get isGnu () { return isGnu; }, get isUstar () { return isUstar; }, get overflow () { return overflow; }, get parseHeader () { return parseHeader; }, get toType () { return toType; } }); var _constantsts = require("./constants.js"); var _errorsts = require("./errors.js"); function toType(flag) { switch(flag){ case _constantsts.TYPE_FILE: return 'file'; case _constantsts.TYPE_LINK: return 'link'; case _constantsts.TYPE_SYMLINK: return 'symlink'; case _constantsts.TYPE_CHAR_DEVICE: return 'character-device'; case _constantsts.TYPE_BLOCK_DEVICE: return 'block-device'; case _constantsts.TYPE_DIRECTORY: return 'directory'; case _constantsts.TYPE_FIFO: return 'fifo'; case _constantsts.TYPE_CONTIGUOUS: return 'contiguous-file'; case _constantsts.TYPE_GNU_LONG_PATH: return 'gnu-long-path'; case _constantsts.TYPE_GNU_LONG_LINK: return 'gnu-long-link-path'; case _constantsts.TYPE_GNU_SPARSE: return 'gnu-sparse'; case _constantsts.TYPE_GNU_DUMPDIR: return 'gnu-dumpdir'; case _constantsts.TYPE_GNU_MULTIVOL: return 'gnu-multivol'; case _constantsts.TYPE_GNU_VOLHDR: return 'gnu-volume-header'; case _constantsts.TYPE_PAX_HEADER: return 'pax-header'; case _constantsts.TYPE_PAX_GLOBAL: return 'pax-global-header'; default: return null; } } /** * Node 0.8 compatible isNaN (Number.isNaN didn't exist until ES2015) */ // biome-ignore lint/suspicious/noShadowRestrictedNames: Legacy function isNaN(value) { // biome-ignore lint/suspicious/noSelfCompare: Legacy return value !== value; } /** * Find null terminator in buffer region */ function findNull(buf, start, end) { for(var i = start; i < end; i++){ if (buf[i] === 0) return i; } return end; } /** * Decode null-terminated string from buffer */ function decodeStr(buf, offset, length, encoding) { var enc = encoding || 'utf8'; var end = findNull(buf, offset, offset + length); return buf.slice(offset, end).toString(enc); } /** * Parse base-256 encoded number (GNU extension for large files >8GB) * If high bit of first byte is set, remaining bytes are big-endian base-256 */ function parse256(buf) { // Check sign bit (bit 6 of first byte, after the marker bit 7) var positive = (buf[0] & 0x40) === 0; // Build number from bytes (big-endian, excluding first byte's marker bits) var sum = 0; var base = 1; // Process bytes from right to left (least significant first) for(var i = buf.length - 1; i > 0; i--){ var byte = buf[i]; if (positive) { sum += byte * base; } else { sum += (0xff - byte) * base; } base *= 256; } return positive ? sum : -1 * sum; } function decodeOct(buf, offset, length) { var val = buf.slice(offset, offset + length); // If high bit is set, parse as base-256 (GNU extension) if (val[0] & 0x80) { return parse256(val); } // Skip leading spaces (some old tar versions use them) var start = 0; while(start < val.length && val[start] === 32)start++; // Find end (space or null terminator) var end = start; while(end < val.length && val[end] !== 32 && val[end] !== 0)end++; // Skip leading zeros while(start < end && val[start] === _constantsts.ZERO_OFFSET)start++; if (start === end) return 0; return parseInt(val.slice(start, end).toString(), 8); } function checksum(buf) { var sum = 0; for(var i = 0; i < _constantsts.HEADER_SIZE; i++){ // Treat checksum field (offset 148, length 8) as spaces if (i >= _constantsts.CHECKSUM_OFFSET && i < _constantsts.CHECKSUM_OFFSET + _constantsts.CHECKSUM_SIZE) { sum += 32; // space character } else { sum += buf[i]; } } return sum; } /** * Compare buffer region to byte array * Replacement for Buffer.compare that works on Node 0.8+ */ function bufferEquals(buf, offset, expected) { for(var i = 0; i < expected.length; i++){ if (buf[offset + i] !== expected[i]) return false; } return true; } function isUstar(buf) { return bufferEquals(buf, _constantsts.MAGIC_OFFSET, _constantsts.USTAR_MAGIC); } function isGnu(buf) { return bufferEquals(buf, _constantsts.MAGIC_OFFSET, _constantsts.GNU_MAGIC) && bufferEquals(buf, _constantsts.VERSION_OFFSET, _constantsts.GNU_VER); } function parseHeader(buf, opts) { var options = opts || {}; var filenameEncoding = options.filenameEncoding || 'utf8'; var allowUnknownFormat = options.allowUnknownFormat || false; // Get type flag (handle null as 0 for old tar compatibility) // Standard POSIX types are '0'-'7' (ASCII 48-55), subtract ZERO_OFFSET to get 0-7 // GNU/PAX extension types are letters ('L'=76, 'K'=75, 'x'=120, 'g'=103), use raw ASCII value var rawTypeflag = buf[_constantsts.TYPEFLAG_OFFSET]; var typeflag; if (rawTypeflag === 0) { typeflag = 0; // Null byte treated as regular file } else if (rawTypeflag >= _constantsts.ZERO_OFFSET && rawTypeflag <= _constantsts.ZERO_OFFSET + 7) { // Standard POSIX type '0'-'7' typeflag = rawTypeflag - _constantsts.ZERO_OFFSET; } else { // GNU/PAX extension type - use raw ASCII value typeflag = rawTypeflag; } // Decode basic fields var name = decodeStr(buf, _constantsts.NAME_OFFSET, _constantsts.NAME_SIZE, filenameEncoding); var mode = decodeOct(buf, _constantsts.MODE_OFFSET, _constantsts.MODE_SIZE); var uid = decodeOct(buf, _constantsts.UID_OFFSET, _constantsts.UID_SIZE); var gid = decodeOct(buf, _constantsts.GID_OFFSET, _constantsts.GID_SIZE); var size = decodeOct(buf, _constantsts.SIZE_OFFSET, _constantsts.SIZE_SIZE); var mtime = decodeOct(buf, _constantsts.MTIME_OFFSET, _constantsts.MTIME_SIZE); var type = toType(typeflag); var linkname = buf[_constantsts.LINKNAME_OFFSET] === 0 ? null : decodeStr(buf, _constantsts.LINKNAME_OFFSET, _constantsts.LINKNAME_SIZE, filenameEncoding); var uname = decodeStr(buf, _constantsts.UNAME_OFFSET, _constantsts.UNAME_SIZE); var gname = decodeStr(buf, _constantsts.GNAME_OFFSET, _constantsts.GNAME_SIZE); var devmajor = decodeOct(buf, _constantsts.DEVMAJOR_OFFSET, _constantsts.DEVMAJOR_SIZE); var devminor = decodeOct(buf, _constantsts.DEVMINOR_OFFSET, _constantsts.DEVMINOR_SIZE); // Calculate and validate checksum var computed = checksum(buf); // Empty block check: checksum of all zeros treated as spaces = 8 * 32 = 256 if (computed === 8 * 32) return null; // Validate stored checksum var stored = decodeOct(buf, _constantsts.CHECKSUM_OFFSET, _constantsts.CHECKSUM_SIZE); if (computed !== stored) { throw (0, _errorsts.createTarError)('Invalid tar header. Maybe the tar is corrupted or it needs to be gunzipped?', _errorsts.TarErrorCode.INVALID_CHECKSUM); } // Handle USTAR format (prepend prefix to name if present) if (isUstar(buf)) { if (buf[_constantsts.PREFIX_OFFSET] !== 0) { name = "".concat(decodeStr(buf, _constantsts.PREFIX_OFFSET, _constantsts.PREFIX_SIZE, filenameEncoding), "/").concat(name); } } else if (isGnu(buf)) { // GNU format - magic is validated, no additional processing needed } else { if (!allowUnknownFormat) { throw (0, _errorsts.createTarError)('Invalid tar header: unknown format.', _errorsts.TarErrorCode.INVALID_FORMAT); } } // NOTE: Old tar versions use trailing / to indicate directories. // This check is intentionally NOT done here because GNU long path // extensions may change the name. The check is done in TarExtract._applyExtensions() // after the full name is resolved. return { name: name, mode: mode, uid: uid, gid: gid, size: size, mtime: new Date(1000 * mtime), type: type, linkname: linkname, uname: uname, gname: gname, devmajor: devmajor, devminor: devminor, pax: null }; } function decodePax(buf) { var result = {}; var pos = 0; while(pos < buf.length){ // Find space after length var spacePos = pos; while(spacePos < buf.length && buf[spacePos] !== 32)spacePos++; // Parse length var len = parseInt(buf.slice(pos, spacePos).toString(), 10); if (!len || isNaN(len)) break; // Extract key=value (after space, before newline) // Record spans from spacePos+1 to pos+len-1 (excluding newline) var record = buf.slice(spacePos + 1, pos + len - 1).toString('utf8'); var eqPos = record.indexOf('='); if (eqPos === -1) break; var key = record.slice(0, eqPos); var value = record.slice(eqPos + 1); result[key] = value; pos += len; } return result; } function decodeLongPath(buf, encoding) { return decodeStr(buf, 0, buf.length, encoding); } function overflow(size) { var remainder = size & 511; // size % 512 return remainder ? 512 - remainder : 0; } /* CJS INTEROP */ if (exports.__esModule && exports.default) { try { Object.defineProperty(exports.default, '__esModule', { value: true }); for (var key in exports) { exports.default[key] = exports[key]; } } catch (_) {}; module.exports = exports.default; }