UNPKG

tar-iterator

Version:

Extract contents from tar archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms

557 lines 23 kB
/** * TarExtract - Streaming TAR extraction * * Event-based TAR parser that emits 'entry' events for each file. * Node 0.8 compatible. * * State Machine: * ``` * ┌─────────────────────────────────────────────┐ * │ │ * HEADER ─┬─ [file] ────> FILE_DATA ──> PADDING ─────────────────────>──┤ * │ │ * ├─ [gnu-long-path] ──> GNU_LONG_PATH ──> PADDING ──>──────────┤ * │ │ * ├─ [gnu-long-link] ──> GNU_LONG_LINK ──> PADDING ──>──────────┤ * │ │ * ├─ [pax-header] ──> PAX_HEADER ──> PADDING ──>────────────────┤ * │ │ * ├─ [gnu-sparse] ─┬─> SPARSE_EXTENDED ──> SPARSE_DATA ──>──────┤ * │ │ │ * │ └─> SPARSE_DATA ──> PADDING ──>──────────────┤ * │ │ * └─ [null header] ──> END │ * │ * <─────────────────────────────────────────────────────────────┘ * ``` * * Extension handling: * - GNU LongPath/LongLink headers store path for NEXT entry * - PAX headers store attributes for NEXT entry (or all entries if global) * - Extensions are applied when the actual file header is processed * * Events: * 'entry' (header: TarHeader, stream: Readable, next: () => void) * 'error' (err: Error) * 'finish' () */ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); Object.defineProperty(exports, "default", { enumerable: true, get: function() { return TarExtract; } }); var _events = require("events"); var _extractbaseiterator = require("extract-base-iterator"); var _constantsts = require("./constants.js"); var _EntryStreamts = /*#__PURE__*/ _interop_require_default(require("./EntryStream.js")); var _Extensionsts = require("./Extensions.js"); var _headersts = require("./headers.js"); var _sparsets = require("./sparse.js"); function _assert_this_initialized(self) { if (self === void 0) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return self; } function _call_super(_this, derived, args) { derived = _get_prototype_of(derived); return _possible_constructor_return(_this, _is_native_reflect_construct() ? Reflect.construct(derived, args || [], _get_prototype_of(_this).constructor) : derived.apply(_this, args)); } function _class_call_check(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _get_prototype_of(o) { _get_prototype_of = Object.setPrototypeOf ? Object.getPrototypeOf : function getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _get_prototype_of(o); } function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _set_prototype_of(subClass, superClass); } function _interop_require_default(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _possible_constructor_return(self, call) { if (call && (_type_of(call) === "object" || typeof call === "function")) { return call; } return _assert_this_initialized(self); } function _set_prototype_of(o, p) { _set_prototype_of = Object.setPrototypeOf || function setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _set_prototype_of(o, p); } function _type_of(obj) { "@swc/helpers - typeof"; return obj && typeof Symbol !== "undefined" && obj.constructor === Symbol ? "symbol" : typeof obj; } function _is_native_reflect_construct() { try { var result = !Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function() {})); } catch (_) {} return (_is_native_reflect_construct = function() { return !!result; })(); } // Parser states var STATE_HEADER = 0; var STATE_FILE_DATA = 1; var STATE_PADDING = 2; var STATE_END = 3; var STATE_GNU_LONG_PATH = 4; var STATE_GNU_LONG_LINK = 5; var STATE_PAX_HEADER = 6; var STATE_SPARSE_EXTENDED = 7; var STATE_SPARSE_DATA = 8; var TarExtract = /*#__PURE__*/ function(EventEmitter) { "use strict"; _inherits(TarExtract, EventEmitter); function TarExtract(options) { _class_call_check(this, TarExtract); var _this; _this = _call_super(this, TarExtract), // Current entry state _this.header = null, _this.entryStream = null, _this.entryRemaining = 0, _this.paddingRemaining = 0, // Backpressure control _this.locked = false, _this.pending = false, _this.finished = false, _this.finishEmitted = false, // Pending entry to emit (waiting for consumer to set up listeners) _this.pendingEntry = null, // GNU sparse file state _this.sparseInfo = null, _this.sparseStream = null, _this.sparseDataRemaining = 0; _this.buffer = new _extractbaseiterator.BufferList(); _this.state = STATE_HEADER; _this.options = options || {}; _this.extState = (0, _Extensionsts.createExtensionState)(); return _this; } var _proto = TarExtract.prototype; /** * Write data to the parser */ _proto.write = function write(chunk, callback) { if (this.finished) { if (callback) callback(); return false; } this.buffer.append(chunk); this._process(); // Emit any pending entry that was parsed during _process() // This is necessary because _process() may parse new entry headers // from incoming data, and those entries need to be emitted to listeners this.resume(); if (callback) callback(); return !this.locked; }; /** * Signal end of input */ _proto.end = function end(callback) { this.finished = true; this._process(); // Emit any pending entry before checking for finish this.resume(); this._maybeFinish(); if (callback) callback(); }; /** * Emit error to the main stream and any active entry stream * This prevents tests from hanging when errors occur mid-extraction */ _proto._emitError = function _emitError(err) { // Propagate error to any active entry stream first var activeStream = this.entryStream || this.sparseStream; if (activeStream && !activeStream.ended) { activeStream.emit('error', err); } // Then emit to the main extract stream this.emit('error', err); }; /** * Emit 'finish' if appropriate */ _proto._maybeFinish = function _maybeFinish() { // Don't emit finish more than once if (this.finishEmitted) return; // Don't emit finish if we have a pending entry if (this.pendingEntry) return; // Don't emit finish if not finished yet if (!this.finished) return; // Don't emit finish if locked - consumer hasn't called next() yet // and there may be more entries to process if (this.locked) return; // Only emit finish when we're in a terminal state if (this.state === STATE_HEADER || this.state === STATE_END) { this.state = STATE_END; // Mark as ended this.finishEmitted = true; this.emit('finish'); } }; /** * Resume parsing - emit any pending entry * Call this after setting up 'entry' listeners */ _proto.resume = function resume() { // Only emit if there are listeners - this prevents entries from being // lost when resume() is called from write() before listeners are set up // Use listeners().length for Node 0.8 compatibility (listenerCount added in 0.10) if (this.pendingEntry && this.listeners('entry').length > 0) { var entry = this.pendingEntry; this.pendingEntry = null; // Clear pending flag so file data can flow this.pending = false; // Emit the entry this.emit('entry', entry.header, entry.stream, entry.next); // Continue processing file data this._process(); // Check if we should emit finish now this._maybeFinish(); } }; /** * Process buffered data through state machine */ _proto._process = function _process() { // Note: locked/pending only blocks processing NEXT header, not current entry data if (this.pending) return; var cont = true; while(cont){ switch(this.state){ case STATE_HEADER: // Don't process new headers while locked if (this.locked) { cont = false; } else { cont = this._processHeader(); } break; case STATE_FILE_DATA: cont = this._processFileData(); break; case STATE_PADDING: cont = this._processPadding(); break; case STATE_GNU_LONG_PATH: case STATE_GNU_LONG_LINK: case STATE_PAX_HEADER: cont = this._processExtensionData(); break; case STATE_SPARSE_EXTENDED: cont = this._processSparseExtended(); break; case STATE_SPARSE_DATA: cont = this._processSparseData(); break; case STATE_END: cont = false; break; default: cont = false; } } }; /** * Process header state */ _proto._processHeader = function _processHeader() { if (!this.buffer.has(_constantsts.HEADER_SIZE)) { return false; // Need more data } var headerBuf = this.buffer.consume(_constantsts.HEADER_SIZE); // Try to parse header var header; try { header = (0, _headersts.parseHeader)(headerBuf, this.options); } catch (err) { this._emitError(err); this.state = STATE_END; return false; } // Null header means end of archive (empty block) if (header === null) { this.state = STATE_END; this.emit('finish'); return false; } this.header = header; this.paddingRemaining = (0, _headersts.overflow)(header.size); // Handle GNU/PAX extension headers - collect data silently if (header.type === 'gnu-long-path') { this.extState.extensionRemaining = header.size; this.extState.extensionData = []; this.state = STATE_GNU_LONG_PATH; return true; // Continue processing } if (header.type === 'gnu-long-link-path') { this.extState.extensionRemaining = header.size; this.extState.extensionData = []; this.state = STATE_GNU_LONG_LINK; return true; // Continue processing } if (header.type === 'pax-header') { this.extState.extensionRemaining = header.size; this.extState.extensionData = []; this.state = STATE_PAX_HEADER; return true; // Continue processing } if (header.type === 'pax-global-header') { // For global headers, we read them but they apply to all subsequent entries this.extState.extensionRemaining = header.size; this.extState.extensionData = []; this.state = STATE_PAX_HEADER; // Same handling, different application return true; // Continue processing } // Handle GNU sparse files if (header.type === 'gnu-sparse') { // Parse sparse info from header this.sparseInfo = (0, _sparsets.parseGnuSparseHeader)(headerBuf); // Apply extensions (e.g., GNU long path) (0, _Extensionsts.applyExtensions)(header, this.extState); // Update header size to real (reconstructed) file size header.size = this.sparseInfo.realSize; // If extended sparse headers follow, read them first if (this.sparseInfo.isExtended) { this.header = header; this.state = STATE_SPARSE_EXTENDED; return true; // Continue processing } // No extended headers - set up sparse entry now return this._setupSparseEntry(header); } // Apply any pending GNU/PAX extensions to this entry (0, _Extensionsts.applyExtensions)(header, this.extState); // Set up for file data this.entryRemaining = header.size; // Create entry stream this.entryStream = new _EntryStreamts.default(); // Lock until consumer calls next() this.locked = true; this.pending = true; // Store pending entry (will be emitted when consumer calls resume()) var self = this; var entryStream = this.entryStream; var next = function next() { self._unlock(); }; this.pendingEntry = { header: header, stream: entryStream, next: next }; // If no data, go straight to padding if (this.entryRemaining === 0) { this.entryStream.end(); this.entryStream = null; this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; } else { this.state = STATE_FILE_DATA; } return false; // Don't continue processing until unlocked }; /** * Process extension data (GNU long path/link, PAX headers) */ _proto._processExtensionData = function _processExtensionData() { if (this.extState.extensionRemaining <= 0) { // Done collecting extension data - decode and store var encoding = this.options.filenameEncoding || 'utf8'; (0, _Extensionsts.finalizeExtension)(this.extState, this.state, this.header, encoding); this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; return true; } if (this.buffer.length === 0) { return false; // Need more data } // Read as much as we can var toRead = Math.min(this.extState.extensionRemaining, this.buffer.length); var data = this.buffer.consume(toRead); this.extState.extensionRemaining -= toRead; this.extState.extensionData.push(data); // Check if done if (this.extState.extensionRemaining <= 0) { var encoding1 = this.options.filenameEncoding || 'utf8'; (0, _Extensionsts.finalizeExtension)(this.extState, this.state, this.header, encoding1); this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; } return true; }; /** * Process file data state */ _proto._processFileData = function _processFileData() { if (this.entryRemaining <= 0) { // Done with file data if (this.entryStream) { this.entryStream.end(); this.entryStream = null; } this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; return true; } if (this.buffer.length === 0) { return false; // Need more data } // Read as much as we can var toRead = Math.min(this.entryRemaining, this.buffer.length); var data = this.buffer.consume(toRead); this.entryRemaining -= toRead; // Push to entry stream if (this.entryStream) { this.entryStream.push(data); } // Check if done if (this.entryRemaining <= 0) { if (this.entryStream) { this.entryStream.end(); this.entryStream = null; } this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; } return true; }; /** * Process padding state */ _proto._processPadding = function _processPadding() { if (this.paddingRemaining <= 0) { this.state = STATE_HEADER; return true; } if (this.buffer.length === 0) { return false; // Need more data } // Skip padding bytes var toSkip = Math.min(this.paddingRemaining, this.buffer.length); this.buffer.consume(toSkip); this.paddingRemaining -= toSkip; if (this.paddingRemaining <= 0) { this.state = STATE_HEADER; } return true; }; /** * Unlock parser (called by next() callback) */ _proto._unlock = function _unlock() { this.locked = false; this.pending = false; this._process(); // After processing, if there's a pending entry, emit it // (the consumer's listeners are still set up from previous entry) this.resume(); // Check if we should emit finish (e.g., if end() was called while locked) this._maybeFinish(); }; /** * Set up a sparse entry with SparseStream */ _proto._setupSparseEntry = function _setupSparseEntry(header) { if (!this.sparseInfo) { this._emitError(new Error('Sparse info not available')); this.state = STATE_END; return false; } // Calculate actual data size (sum of all sparse entry numbytes) this.sparseDataRemaining = (0, _sparsets.sparseDataSize)(this.sparseInfo.entries); // Calculate padding for the actual data size this.paddingRemaining = (0, _headersts.overflow)(this.sparseDataRemaining); // Create sparse stream for reconstruction this.sparseStream = new _sparsets.SparseStream(this.sparseInfo.entries, this.sparseInfo.realSize); // Lock until consumer calls next() this.locked = true; this.pending = true; // Store pending entry (the stream looks like a regular entry to consumers) var self = this; var stream = this.sparseStream; var next = function next() { self._unlock(); }; // Change header type to 'file' for consumers (they don't need to know it's sparse) header.type = 'file'; this.pendingEntry = { header: header, stream: stream, next: next }; // Go to sparse data state if (this.sparseDataRemaining === 0) { // No data - just holes (all zeros) this.sparseStream.end(); this.sparseStream = null; this.sparseInfo = null; this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; } else { this.state = STATE_SPARSE_DATA; } return false; // Don't continue until unlocked }; /** * Process extended sparse headers */ _proto._processSparseExtended = function _processSparseExtended() { if (!this.buffer.has(_constantsts.BLOCK_SIZE)) { return false; // Need more data } var extBuf = this.buffer.consume(_constantsts.BLOCK_SIZE); var ext = (0, _sparsets.parseGnuSparseExtended)(extBuf); // Add entries to sparse info if (this.sparseInfo) { for(var i = 0; i < ext.entries.length; i++){ this.sparseInfo.entries.push(ext.entries[i]); } // Check if more extended headers follow if (ext.isExtended) { return true; // Continue reading extended headers } } // Done reading extended headers - set up the sparse entry if (this.header) { return this._setupSparseEntry(this.header); } // Should not reach here this._emitError(new Error('Header not available for sparse entry')); this.state = STATE_END; return false; }; /** * Process sparse file data */ _proto._processSparseData = function _processSparseData() { if (this.sparseDataRemaining <= 0) { // Done with sparse data if (this.sparseStream) { this.sparseStream.end(); this.sparseStream = null; } this.sparseInfo = null; this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; return true; } if (this.buffer.length === 0) { return false; // Need more data } // Read as much as we can var toRead = Math.min(this.sparseDataRemaining, this.buffer.length); var data = this.buffer.consume(toRead); this.sparseDataRemaining -= toRead; // Push to sparse stream for reconstruction if (this.sparseStream) { this.sparseStream.push(data); } // Check if done if (this.sparseDataRemaining <= 0) { if (this.sparseStream) { this.sparseStream.end(); this.sparseStream = null; } this.sparseInfo = null; this.state = this.paddingRemaining > 0 ? STATE_PADDING : STATE_HEADER; } return true; }; return TarExtract; }(_events.EventEmitter); /* CJS INTEROP */ if (exports.__esModule && exports.default) { try { Object.defineProperty(exports.default, '__esModule', { value: true }); for (var key in exports) { exports.default[key] = exports[key]; } } catch (_) {}; module.exports = exports.default; }