tar-iterator
Version:
Extract contents from tar archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms
557 lines • 23 kB
JavaScript
/**
* TarExtract - Streaming TAR extraction
*
* Event-based TAR parser that emits 'entry' events for each file.
* Node 0.8 compatible.
*
* State Machine:
* ```
* ┌─────────────────────────────────────────────┐
* │ │
* HEADER ─┬─ [file] ────> FILE_DATA ──> PADDING ─────────────────────>──┤
* │ │
* ├─ [gnu-long-path] ──> GNU_LONG_PATH ──> PADDING ──>──────────┤
* │ │
* ├─ [gnu-long-link] ──> GNU_LONG_LINK ──> PADDING ──>──────────┤
* │ │
* ├─ [pax-header] ──> PAX_HEADER ──> PADDING ──>────────────────┤
* │ │
* ├─ [gnu-sparse] ─┬─> SPARSE_EXTENDED ──> SPARSE_DATA ──>──────┤
* │ │ │
* │ └─> SPARSE_DATA ──> PADDING ──>──────────────┤
* │ │
* └─ [null header] ──> END │
* │
* <─────────────────────────────────────────────────────────────┘
* ```
*
* Extension handling:
* - GNU LongPath/LongLink headers store path for NEXT entry
* - PAX headers store attributes for NEXT entry (or all entries if global)
* - Extensions are applied when the actual file header is processed
*
* Events:
* 'entry' (header: TarHeader, stream: Readable, next: () => void)
* 'error' (err: Error)
* 'finish' ()
*/ "use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
Object.defineProperty(exports, "default", {
enumerable: true,
get: function() {
return TarExtract;
}
});
var _events = require("events");
var _extractbaseiterator = require("extract-base-iterator");
var _constantsts = require("./constants.js");
var _EntryStreamts = /*#__PURE__*/ _interop_require_default(require("./EntryStream.js"));
var _Extensionsts = require("./Extensions.js");
var _headersts = require("./headers.js");
var _sparsets = require("./sparse.js");
function _assert_this_initialized(self) {
if (self === void 0) {
throw new ReferenceError("this hasn't been initialised - super() hasn't been called");
}
return self;
}
function _call_super(_this, derived, args) {
derived = _get_prototype_of(derived);
return _possible_constructor_return(_this, _is_native_reflect_construct() ? Reflect.construct(derived, args || [], _get_prototype_of(_this).constructor) : derived.apply(_this, args));
}
function _class_call_check(instance, Constructor) {
if (!(instance instanceof Constructor)) {
throw new TypeError("Cannot call a class as a function");
}
}
function _get_prototype_of(o) {
_get_prototype_of = Object.setPrototypeOf ? Object.getPrototypeOf : function getPrototypeOf(o) {
return o.__proto__ || Object.getPrototypeOf(o);
};
return _get_prototype_of(o);
}
function _inherits(subClass, superClass) {
if (typeof superClass !== "function" && superClass !== null) {
throw new TypeError("Super expression must either be null or a function");
}
subClass.prototype = Object.create(superClass && superClass.prototype, {
constructor: {
value: subClass,
writable: true,
configurable: true
}
});
if (superClass) _set_prototype_of(subClass, superClass);
}
function _interop_require_default(obj) {
return obj && obj.__esModule ? obj : {
default: obj
};
}
function _possible_constructor_return(self, call) {
if (call && (_type_of(call) === "object" || typeof call === "function")) {
return call;
}
return _assert_this_initialized(self);
}
function _set_prototype_of(o, p) {
_set_prototype_of = Object.setPrototypeOf || function setPrototypeOf(o, p) {
o.__proto__ = p;
return o;
};
return _set_prototype_of(o, p);
}
function _type_of(obj) {
"@swc/helpers - typeof";
return obj && typeof Symbol !== "undefined" && obj.constructor === Symbol ? "symbol" : typeof obj;
}
function _is_native_reflect_construct() {
try {
var result = !Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function() {}));
} catch (_) {}
return (_is_native_reflect_construct = function() {
return !!result;
})();
}
// Parser states
var STATE_HEADER = 0;
var STATE_FILE_DATA = 1;
var STATE_PADDING = 2;
var STATE_END = 3;
var STATE_GNU_LONG_PATH = 4;
var STATE_GNU_LONG_LINK = 5;
var STATE_PAX_HEADER = 6;
var STATE_SPARSE_EXTENDED = 7;
var STATE_SPARSE_DATA = 8;
var TarExtract = /*#__PURE__*/ function(EventEmitter) {
"use strict";
_inherits(TarExtract, EventEmitter);
function TarExtract(options) {
_class_call_check(this, TarExtract);
var _this;
_this = _call_super(this, TarExtract), // Current entry state
_this.header = null, _this.entryStream = null, _this.entryRemaining = 0, _this.paddingRemaining = 0, // Backpressure control
_this.locked = false, _this.pending = false, _this.finished = false, _this.finishEmitted = false, // Pending entry to emit (waiting for consumer to set up listeners)
_this.pendingEntry = null, // GNU sparse file state
_this.sparseInfo = null, _this.sparseStream = null, _this.sparseDataRemaining = 0;
_this.buffer = new _extractbaseiterator.BufferList();
_this.state = STATE_HEADER;
_this.options = options || {};
_this.extState = (0, _Extensionsts.createExtensionState)();
return _this;
}
var _proto = TarExtract.prototype;
/**
* Write data to the parser
*/ _proto.write = function write(chunk, callback) {
if (this.finished) {
if (callback) callback();
return false;
}
this.buffer.append(chunk);
this._process();
// Emit any pending entry that was parsed during _process()
// This is necessary because _process() may parse new entry headers
// from incoming data, and those entries need to be emitted to listeners
this.resume();
if (callback) callback();
return !this.locked;
};
/**
* Signal end of input
*/ _proto.end = function end(callback) {
this.finished = true;
this._process();
// Emit any pending entry before checking for finish
this.resume();
this._maybeFinish();
if (callback) callback();
};
/**
* Emit error to the main stream and any active entry stream
* This prevents tests from hanging when errors occur mid-extraction
*/ _proto._emitError = function _emitError(err) {
// Propagate error to any active entry stream first
var activeStream = this.entryStream || this.sparseStream;
if (activeStream && !activeStream.ended) {
activeStream.emit('error', err);
}
// Then emit to the main extract stream
this.emit('error', err);
};
/**
* Emit 'finish' if appropriate
*/ _proto._maybeFinish = function _maybeFinish() {
// Don't emit finish more than once
if (this.finishEmitted) return;
// Don't emit finish if we have a pending entry
if (this.pendingEntry) return;
// Don't emit finish if not finished yet
if (!this.finished) return;
// Don't emit finish if locked - consumer hasn't called next() yet
// and there may be more entries to process
if (this.locked) return;
// Only emit finish when we're in a terminal state
if (this.state === STATE_HEADER || this.state === STATE_END) {
this.state = STATE_END; // Mark as ended
this.finishEmitted = true;
this.emit('finish');
}
};
/**
* Resume parsing - emit any pending entry
* Call this after setting up 'entry' listeners
*/ _proto.resume = function resume() {
// Only emit if there are listeners - this prevents entries from being
// lost when resume() is called from write() before listeners are set up
// Use listeners().length for Node 0.8 compatibility (listenerCount added in 0.10)
if (this.pendingEntry && this.listeners('entry').length > 0) {
var entry = this.pendingEntry;
this.pendingEntry = null;
// Clear pending flag so file data can flow
this.pending = false;
// Emit the entry
this.emit('entry', entry.header, entry.stream, entry.next);
// Continue processing file data
this._process();
// Check if we should emit finish now
this._maybeFinish();
}
};
/**
* Process buffered data through state machine
*/ _proto._process = function _process() {
// Note: locked/pending only blocks processing NEXT header, not current entry data
if (this.pending) return;
var cont = true;
while(cont){
switch(this.state){
case STATE_HEADER:
// Don't process new headers while locked
if (this.locked) {
cont = false;
} else {
cont = this._processHeader();
}
break;
case STATE_FILE_DATA:
cont = this._processFileData();
break;
case STATE_PADDING:
cont = this._processPadding();
break;
case STATE_GNU_LONG_PATH:
case STATE_GNU_LONG_LINK:
case STATE_PAX_HEADER:
cont = this._processExtensionData();
break;
case STATE_SPARSE_EXTENDED:
cont = this._processSparseExtended();
break;
case STATE_SPARSE_DATA:
cont = this._processSparseData();
break;
case STATE_END:
cont = false;
break;
default:
cont = false;
}
}
};
/**
* Process header state
*/ _proto._processHeader = function _processHeader() {
if (!this.buffer.has(_constantsts.HEADER_SIZE)) {
return false; // Need more data
}
var headerBuf = this.buffer.consume(_constantsts.HEADER_SIZE);
// Try to parse header
var header;
try {
header = (0, _headersts.parseHeader)(headerBuf, this.options);
} catch (err) {
this._emitError(err);
this.state = STATE_END;
return false;
}
// Null header means end of archive (empty block)
if (header === null) {
this.state = STATE_END;
this.emit('finish');
return false;
}
this.header = header;
this.paddingRemaining = (0, _headersts.overflow)(header.size);
// Handle GNU/PAX extension headers - collect data silently
if (header.type === 'gnu-long-path') {
this.extState.extensionRemaining = header.size;
this.extState.extensionData = [];
this.state = STATE_GNU_LONG_PATH;
return true; // Continue processing
}
if (header.type === 'gnu-long-link-path') {
this.extState.extensionRemaining = header.size;
this.extState.extensionData = [];
this.state = STATE_GNU_LONG_LINK;
return true; // Continue processing
}
if (header.type === 'pax-header') {
this.extState.extensionRemaining = header.size;
this.extState.extensionData = [];
this.state = STATE_PAX_HEADER;
return true; // Continue processing
}
if (header.type === 'pax-global-header') {
// For global headers, we read them but they apply to all subsequent entries
this.extState.extensionRemaining = header.size;
this.extState.extensionData = [];
this.state = STATE_PAX_HEADER; // Same handling, different application
return true; // Continue processing
}
// Handle GNU sparse files
if (header.type === 'gnu-sparse') {
// Parse sparse info from header
this.sparseInfo = (0, _sparsets.parseGnuSparseHeader)(headerBuf);
// Apply extensions (e.g., GNU long path)
(0, _Extensionsts.applyExtensions)(header, this.extState);
// Update header size to real (reconstructed) file size
header.size = this.sparseInfo.realSize;
// If extended sparse headers follow, read them first
if (this.sparseInfo.isExtended) {
this.header = header;
this.state = STATE_SPARSE_EXTENDED;
return true; // Continue processing
}
// No extended headers - set up sparse entry now
return this._setupSparseEntry(header);
}
// Apply any pending GNU/PAX extensions to this entry
(0, _Extensionsts.applyExtensions)(header, this.extState);
// Set up for file data
this.entryRemaining = header.size;
// Create entry stream
this.entryStream = new _EntryStreamts.default();
// Lock until consumer calls next()
this.locked = true;
this.pending = true;
// Store pending entry (will be emitted when consumer calls resume())
var self = this;
var entryStream = this.entryStream;
var next = function next() {
self._unlock();
};
this.pendingEntry = {
header: header,
stream: entryStream,
next: next
};
// If no data, go straight to padding
if (this.entryRemaining === 0) {
this.entryStream.end();
this.entryStream = null;
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
} else {
this.state = STATE_FILE_DATA;
}
return false; // Don't continue processing until unlocked
};
/**
* Process extension data (GNU long path/link, PAX headers)
*/ _proto._processExtensionData = function _processExtensionData() {
if (this.extState.extensionRemaining <= 0) {
// Done collecting extension data - decode and store
var encoding = this.options.filenameEncoding || 'utf8';
(0, _Extensionsts.finalizeExtension)(this.extState, this.state, this.header, encoding);
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
return true;
}
if (this.buffer.length === 0) {
return false; // Need more data
}
// Read as much as we can
var toRead = Math.min(this.extState.extensionRemaining, this.buffer.length);
var data = this.buffer.consume(toRead);
this.extState.extensionRemaining -= toRead;
this.extState.extensionData.push(data);
// Check if done
if (this.extState.extensionRemaining <= 0) {
var encoding1 = this.options.filenameEncoding || 'utf8';
(0, _Extensionsts.finalizeExtension)(this.extState, this.state, this.header, encoding1);
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
}
return true;
};
/**
* Process file data state
*/ _proto._processFileData = function _processFileData() {
if (this.entryRemaining <= 0) {
// Done with file data
if (this.entryStream) {
this.entryStream.end();
this.entryStream = null;
}
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
return true;
}
if (this.buffer.length === 0) {
return false; // Need more data
}
// Read as much as we can
var toRead = Math.min(this.entryRemaining, this.buffer.length);
var data = this.buffer.consume(toRead);
this.entryRemaining -= toRead;
// Push to entry stream
if (this.entryStream) {
this.entryStream.push(data);
}
// Check if done
if (this.entryRemaining <= 0) {
if (this.entryStream) {
this.entryStream.end();
this.entryStream = null;
}
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
}
return true;
};
/**
* Process padding state
*/ _proto._processPadding = function _processPadding() {
if (this.paddingRemaining <= 0) {
this.state = STATE_HEADER;
return true;
}
if (this.buffer.length === 0) {
return false; // Need more data
}
// Skip padding bytes
var toSkip = Math.min(this.paddingRemaining, this.buffer.length);
this.buffer.consume(toSkip);
this.paddingRemaining -= toSkip;
if (this.paddingRemaining <= 0) {
this.state = STATE_HEADER;
}
return true;
};
/**
* Unlock parser (called by next() callback)
*/ _proto._unlock = function _unlock() {
this.locked = false;
this.pending = false;
this._process();
// After processing, if there's a pending entry, emit it
// (the consumer's listeners are still set up from previous entry)
this.resume();
// Check if we should emit finish (e.g., if end() was called while locked)
this._maybeFinish();
};
/**
* Set up a sparse entry with SparseStream
*/ _proto._setupSparseEntry = function _setupSparseEntry(header) {
if (!this.sparseInfo) {
this._emitError(new Error('Sparse info not available'));
this.state = STATE_END;
return false;
}
// Calculate actual data size (sum of all sparse entry numbytes)
this.sparseDataRemaining = (0, _sparsets.sparseDataSize)(this.sparseInfo.entries);
// Calculate padding for the actual data size
this.paddingRemaining = (0, _headersts.overflow)(this.sparseDataRemaining);
// Create sparse stream for reconstruction
this.sparseStream = new _sparsets.SparseStream(this.sparseInfo.entries, this.sparseInfo.realSize);
// Lock until consumer calls next()
this.locked = true;
this.pending = true;
// Store pending entry (the stream looks like a regular entry to consumers)
var self = this;
var stream = this.sparseStream;
var next = function next() {
self._unlock();
};
// Change header type to 'file' for consumers (they don't need to know it's sparse)
header.type = 'file';
this.pendingEntry = {
header: header,
stream: stream,
next: next
};
// Go to sparse data state
if (this.sparseDataRemaining === 0) {
// No data - just holes (all zeros)
this.sparseStream.end();
this.sparseStream = null;
this.sparseInfo = null;
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
} else {
this.state = STATE_SPARSE_DATA;
}
return false; // Don't continue until unlocked
};
/**
* Process extended sparse headers
*/ _proto._processSparseExtended = function _processSparseExtended() {
if (!this.buffer.has(_constantsts.BLOCK_SIZE)) {
return false; // Need more data
}
var extBuf = this.buffer.consume(_constantsts.BLOCK_SIZE);
var ext = (0, _sparsets.parseGnuSparseExtended)(extBuf);
// Add entries to sparse info
if (this.sparseInfo) {
for(var i = 0; i < ext.entries.length; i++){
this.sparseInfo.entries.push(ext.entries[i]);
}
// Check if more extended headers follow
if (ext.isExtended) {
return true; // Continue reading extended headers
}
}
// Done reading extended headers - set up the sparse entry
if (this.header) {
return this._setupSparseEntry(this.header);
}
// Should not reach here
this._emitError(new Error('Header not available for sparse entry'));
this.state = STATE_END;
return false;
};
/**
* Process sparse file data
*/ _proto._processSparseData = function _processSparseData() {
if (this.sparseDataRemaining <= 0) {
// Done with sparse data
if (this.sparseStream) {
this.sparseStream.end();
this.sparseStream = null;
}
this.sparseInfo = null;
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
return true;
}
if (this.buffer.length === 0) {
return false; // Need more data
}
// Read as much as we can
var toRead = Math.min(this.sparseDataRemaining, this.buffer.length);
var data = this.buffer.consume(toRead);
this.sparseDataRemaining -= toRead;
// Push to sparse stream for reconstruction
if (this.sparseStream) {
this.sparseStream.push(data);
}
// Check if done
if (this.sparseDataRemaining <= 0) {
if (this.sparseStream) {
this.sparseStream.end();
this.sparseStream = null;
}
this.sparseInfo = null;
this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER;
}
return true;
};
return TarExtract;
}(_events.EventEmitter);
/* CJS INTEROP */ if (exports.__esModule && exports.default) { try { Object.defineProperty(exports.default, '__esModule', { value: true }); for (var key in exports) { exports.default[key] = exports[key]; } } catch (_) {}; module.exports = exports.default; }