tar-iterator
Version:
Extract contents from tar archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms
284 lines • 10.7 kB
JavaScript
/**
* TAR Header Parsing
*
* All functions use only Node 0.8 compatible Buffer APIs:
* - Buffer indexing: buf[i]
* - Buffer.slice(start, end)
* - Buffer.toString(encoding)
* - Buffer.write(string, offset, length, encoding)
* - new Buffer(size) or new Buffer(string)
*
* NOT using (added in later Node versions):
* - Buffer.from()
* - Buffer.alloc()
* - Buffer.allocUnsafe()
* - Buffer.compare()
* - Number.isNaN() (use global isNaN instead)
*/ "use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
function _export(target, all) {
for(var name in all)Object.defineProperty(target, name, {
enumerable: true,
get: Object.getOwnPropertyDescriptor(all, name).get
});
}
_export(exports, {
get checksum () {
return checksum;
},
get decodeLongPath () {
return decodeLongPath;
},
get decodeOct () {
return decodeOct;
},
get decodePax () {
return decodePax;
},
get isGnu () {
return isGnu;
},
get isUstar () {
return isUstar;
},
get overflow () {
return overflow;
},
get parseHeader () {
return parseHeader;
},
get toType () {
return toType;
}
});
var _constantsts = require("./constants.js");
var _errorsts = require("./errors.js");
function toType(flag) {
switch(flag){
case _constantsts.TYPE_FILE:
return 'file';
case _constantsts.TYPE_LINK:
return 'link';
case _constantsts.TYPE_SYMLINK:
return 'symlink';
case _constantsts.TYPE_CHAR_DEVICE:
return 'character-device';
case _constantsts.TYPE_BLOCK_DEVICE:
return 'block-device';
case _constantsts.TYPE_DIRECTORY:
return 'directory';
case _constantsts.TYPE_FIFO:
return 'fifo';
case _constantsts.TYPE_CONTIGUOUS:
return 'contiguous-file';
case _constantsts.TYPE_GNU_LONG_PATH:
return 'gnu-long-path';
case _constantsts.TYPE_GNU_LONG_LINK:
return 'gnu-long-link-path';
case _constantsts.TYPE_GNU_SPARSE:
return 'gnu-sparse';
case _constantsts.TYPE_GNU_DUMPDIR:
return 'gnu-dumpdir';
case _constantsts.TYPE_GNU_MULTIVOL:
return 'gnu-multivol';
case _constantsts.TYPE_GNU_VOLHDR:
return 'gnu-volume-header';
case _constantsts.TYPE_PAX_HEADER:
return 'pax-header';
case _constantsts.TYPE_PAX_GLOBAL:
return 'pax-global-header';
default:
return null;
}
}
/**
* Node 0.8 compatible isNaN (Number.isNaN didn't exist until ES2015)
*/ // biome-ignore lint/suspicious/noShadowRestrictedNames: Legacy
function isNaN(value) {
// biome-ignore lint/suspicious/noSelfCompare: Legacy
return value !== value;
}
/**
* Find null terminator in buffer region
*/ function findNull(buf, start, end) {
for(var i = start; i < end; i++){
if (buf[i] === 0) return i;
}
return end;
}
/**
* Decode null-terminated string from buffer
*/ function decodeStr(buf, offset, length, encoding) {
var enc = encoding || 'utf8';
var end = findNull(buf, offset, offset + length);
return buf.slice(offset, end).toString(enc);
}
/**
* Parse base-256 encoded number (GNU extension for large files >8GB)
* If high bit of first byte is set, remaining bytes are big-endian base-256
*/ function parse256(buf) {
// Check sign bit (bit 6 of first byte, after the marker bit 7)
var positive = (buf[0] & 0x40) === 0;
// Build number from bytes (big-endian, excluding first byte's marker bits)
var sum = 0;
var base = 1;
// Process bytes from right to left (least significant first)
for(var i = buf.length - 1; i > 0; i--){
var byte = buf[i];
if (positive) {
sum += byte * base;
} else {
sum += (0xff - byte) * base;
}
base *= 256;
}
return positive ? sum : -1 * sum;
}
function decodeOct(buf, offset, length) {
var val = buf.slice(offset, offset + length);
// If high bit is set, parse as base-256 (GNU extension)
if (val[0] & 0x80) {
return parse256(val);
}
// Skip leading spaces (some old tar versions use them)
var start = 0;
while(start < val.length && val[start] === 32)start++;
// Find end (space or null terminator)
var end = start;
while(end < val.length && val[end] !== 32 && val[end] !== 0)end++;
// Skip leading zeros
while(start < end && val[start] === _constantsts.ZERO_OFFSET)start++;
if (start === end) return 0;
return parseInt(val.slice(start, end).toString(), 8);
}
function checksum(buf) {
var sum = 0;
for(var i = 0; i < _constantsts.HEADER_SIZE; i++){
// Treat checksum field (offset 148, length 8) as spaces
if (i >= _constantsts.CHECKSUM_OFFSET && i < _constantsts.CHECKSUM_OFFSET + _constantsts.CHECKSUM_SIZE) {
sum += 32; // space character
} else {
sum += buf[i];
}
}
return sum;
}
/**
* Compare buffer region to byte array
* Replacement for Buffer.compare that works on Node 0.8+
*/ function bufferEquals(buf, offset, expected) {
for(var i = 0; i < expected.length; i++){
if (buf[offset + i] !== expected[i]) return false;
}
return true;
}
function isUstar(buf) {
return bufferEquals(buf, _constantsts.MAGIC_OFFSET, _constantsts.USTAR_MAGIC);
}
function isGnu(buf) {
return bufferEquals(buf, _constantsts.MAGIC_OFFSET, _constantsts.GNU_MAGIC) && bufferEquals(buf, _constantsts.VERSION_OFFSET, _constantsts.GNU_VER);
}
function parseHeader(buf, opts) {
var options = opts || {};
var filenameEncoding = options.filenameEncoding || 'utf8';
var allowUnknownFormat = options.allowUnknownFormat || false;
// Get type flag (handle null as 0 for old tar compatibility)
// Standard POSIX types are '0'-'7' (ASCII 48-55), subtract ZERO_OFFSET to get 0-7
// GNU/PAX extension types are letters ('L'=76, 'K'=75, 'x'=120, 'g'=103), use raw ASCII value
var rawTypeflag = buf[_constantsts.TYPEFLAG_OFFSET];
var typeflag;
if (rawTypeflag === 0) {
typeflag = 0; // Null byte treated as regular file
} else if (rawTypeflag >= _constantsts.ZERO_OFFSET && rawTypeflag <= _constantsts.ZERO_OFFSET + 7) {
// Standard POSIX type '0'-'7'
typeflag = rawTypeflag - _constantsts.ZERO_OFFSET;
} else {
// GNU/PAX extension type - use raw ASCII value
typeflag = rawTypeflag;
}
// Decode basic fields
var name = decodeStr(buf, _constantsts.NAME_OFFSET, _constantsts.NAME_SIZE, filenameEncoding);
var mode = decodeOct(buf, _constantsts.MODE_OFFSET, _constantsts.MODE_SIZE);
var uid = decodeOct(buf, _constantsts.UID_OFFSET, _constantsts.UID_SIZE);
var gid = decodeOct(buf, _constantsts.GID_OFFSET, _constantsts.GID_SIZE);
var size = decodeOct(buf, _constantsts.SIZE_OFFSET, _constantsts.SIZE_SIZE);
var mtime = decodeOct(buf, _constantsts.MTIME_OFFSET, _constantsts.MTIME_SIZE);
var type = toType(typeflag);
var linkname = buf[_constantsts.LINKNAME_OFFSET] === 0 ? null : decodeStr(buf, _constantsts.LINKNAME_OFFSET, _constantsts.LINKNAME_SIZE, filenameEncoding);
var uname = decodeStr(buf, _constantsts.UNAME_OFFSET, _constantsts.UNAME_SIZE);
var gname = decodeStr(buf, _constantsts.GNAME_OFFSET, _constantsts.GNAME_SIZE);
var devmajor = decodeOct(buf, _constantsts.DEVMAJOR_OFFSET, _constantsts.DEVMAJOR_SIZE);
var devminor = decodeOct(buf, _constantsts.DEVMINOR_OFFSET, _constantsts.DEVMINOR_SIZE);
// Calculate and validate checksum
var computed = checksum(buf);
// Empty block check: checksum of all zeros treated as spaces = 8 * 32 = 256
if (computed === 8 * 32) return null;
// Validate stored checksum
var stored = decodeOct(buf, _constantsts.CHECKSUM_OFFSET, _constantsts.CHECKSUM_SIZE);
if (computed !== stored) {
throw (0, _errorsts.createTarError)('Invalid tar header. Maybe the tar is corrupted or it needs to be gunzipped?', _errorsts.TarErrorCode.INVALID_CHECKSUM);
}
// Handle USTAR format (prepend prefix to name if present)
if (isUstar(buf)) {
if (buf[_constantsts.PREFIX_OFFSET] !== 0) {
name = "".concat(decodeStr(buf, _constantsts.PREFIX_OFFSET, _constantsts.PREFIX_SIZE, filenameEncoding), "/").concat(name);
}
} else if (isGnu(buf)) {
// GNU format - magic is validated, no additional processing needed
} else {
if (!allowUnknownFormat) {
throw (0, _errorsts.createTarError)('Invalid tar header: unknown format.', _errorsts.TarErrorCode.INVALID_FORMAT);
}
}
// NOTE: Old tar versions use trailing / to indicate directories.
// This check is intentionally NOT done here because GNU long path
// extensions may change the name. The check is done in TarExtract._applyExtensions()
// after the full name is resolved.
return {
name: name,
mode: mode,
uid: uid,
gid: gid,
size: size,
mtime: new Date(1000 * mtime),
type: type,
linkname: linkname,
uname: uname,
gname: gname,
devmajor: devmajor,
devminor: devminor,
pax: null
};
}
function decodePax(buf) {
var result = {};
var pos = 0;
while(pos < buf.length){
// Find space after length
var spacePos = pos;
while(spacePos < buf.length && buf[spacePos] !== 32)spacePos++;
// Parse length
var len = parseInt(buf.slice(pos, spacePos).toString(), 10);
if (!len || isNaN(len)) break;
// Extract key=value (after space, before newline)
// Record spans from spacePos+1 to pos+len-1 (excluding newline)
var record = buf.slice(spacePos + 1, pos + len - 1).toString('utf8');
var eqPos = record.indexOf('=');
if (eqPos === -1) break;
var key = record.slice(0, eqPos);
var value = record.slice(eqPos + 1);
result[key] = value;
pos += len;
}
return result;
}
function decodeLongPath(buf, encoding) {
return decodeStr(buf, 0, buf.length, encoding);
}
function overflow(size) {
var remainder = size & 511; // size % 512
return remainder ? 512 - remainder : 0;
}
/* CJS INTEROP */ if (exports.__esModule && exports.default) { try { Object.defineProperty(exports.default, '__esModule', { value: true }); for (var key in exports) { exports.default[key] = exports[key]; } } catch (_) {}; module.exports = exports.default; }