UNPKG

gdal-async

Version:

Bindings to GDAL (Geospatial Data Abstraction Library) with full async support

github.com/mmomtchev/node-gdal-async

mmomtchev/node-gdal-async

595 lines • 21.7 kB

JavaScript

// this[BUFFER] is the remainder of a chunk if we're waiting for // the full 512 bytes of a header to come in. We will Buffer.concat() // it to the next write(), which is a mem copy, but a small one. // // this[QUEUE] is a Yallist of entries that haven't been emitted // yet this can only get filled up if the user keeps write()ing after // a write() returns false, or does a write() with more than one entry // // We don't buffer chunks, we always parse them and either create an // entry, or push it into the active entry. The ReadEntry class knows // to throw data away if .ignore=true // // Shift entry off the buffer when it emits 'end', and emit 'entry' for // the next one in the list. // // At any time, we're pushing body chunks into the entry at WRITEENTRY, // and waiting for 'end' on the entry at READENTRY // // ignored entries get .resume() called on them straight away import { EventEmitter as EE } from 'events'; import { BrotliDecompress, Unzip } from 'minizlib'; import { Yallist } from 'yallist'; import { Header } from './header.js'; import { Pax } from './pax.js'; import { ReadEntry } from './read-entry.js'; import { warnMethod, } from './warn-method.js'; const maxMetaEntrySize = 1024 * 1024; const gzipHeader = Buffer.from([0x1f, 0x8b]); const STATE = Symbol('state'); const WRITEENTRY = Symbol('writeEntry'); const READENTRY = Symbol('readEntry'); const NEXTENTRY = Symbol('nextEntry'); const PROCESSENTRY = Symbol('processEntry'); const EX = Symbol('extendedHeader'); const GEX = Symbol('globalExtendedHeader'); const META = Symbol('meta'); const EMITMETA = Symbol('emitMeta'); const BUFFER = Symbol('buffer'); const QUEUE = Symbol('queue'); const ENDED = Symbol('ended'); const EMITTEDEND = Symbol('emittedEnd'); const EMIT = Symbol('emit'); const UNZIP = Symbol('unzip'); const CONSUMECHUNK = Symbol('consumeChunk'); const CONSUMECHUNKSUB = Symbol('consumeChunkSub'); const CONSUMEBODY = Symbol('consumeBody'); const CONSUMEMETA = Symbol('consumeMeta'); const CONSUMEHEADER = Symbol('consumeHeader'); const CONSUMING = Symbol('consuming'); const BUFFERCONCAT = Symbol('bufferConcat'); const MAYBEEND = Symbol('maybeEnd'); const WRITING = Symbol('writing'); const ABORTED = Symbol('aborted'); const DONE = Symbol('onDone'); const SAW_VALID_ENTRY = Symbol('sawValidEntry'); const SAW_NULL_BLOCK = Symbol('sawNullBlock'); const SAW_EOF = Symbol('sawEOF'); const CLOSESTREAM = Symbol('closeStream'); const noop = () => true; export class Parser extends EE { file; strict; maxMetaEntrySize; filter; brotli; writable = true; readable = false; [QUEUE] = new Yallist(); [BUFFER]; [READENTRY]; [WRITEENTRY]; [STATE] = 'begin'; [META] = ''; [EX]; [GEX]; [ENDED] = false; [UNZIP]; [ABORTED] = false; [SAW_VALID_ENTRY]; [SAW_NULL_BLOCK] = false; [SAW_EOF] = false; [WRITING] = false; [CONSUMING] = false; [EMITTEDEND] = false; constructor(opt = {}) { super(); this.file = opt.file || ''; // these BADARCHIVE errors can't be detected early. listen on DONE. this.on(DONE, () => { if (this[STATE] === 'begin' || this[SAW_VALID_ENTRY] === false) { // either less than 1 block of data, or all entries were invalid. // Either way, probably not even a tarball. this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format'); } }); if (opt.ondone) { this.on(DONE, opt.ondone); } else { this.on(DONE, () => { this.emit('prefinish'); this.emit('finish'); this.emit('end'); }); } this.strict = !!opt.strict; this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize; this.filter = typeof opt.filter === 'function' ? opt.filter : noop; // Unlike gzip, brotli doesn't have any magic bytes to identify it // Users need to explicitly tell us they're extracting a brotli file // Or we infer from the file extension const isTBR = opt.file && (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')); // if it's a tbr file it MIGHT be brotli, but we don't know until // we look at it and verify it's not a valid tar file. this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli : isTBR ? undefined : false; // have to set this so that streams are ok piping into it this.on('end', () => this[CLOSESTREAM]()); if (typeof opt.onwarn === 'function') { this.on('warn', opt.onwarn); } if (typeof opt.onReadEntry === 'function') { this.on('entry', opt.onReadEntry); } } warn(code, message, data = {}) { warnMethod(this, code, message, data); } [CONSUMEHEADER](chunk, position) { if (this[SAW_VALID_ENTRY] === undefined) { this[SAW_VALID_ENTRY] = false; } let header; try { header = new Header(chunk, position, this[EX], this[GEX]); } catch (er) { return this.warn('TAR_ENTRY_INVALID', er); } if (header.nullBlock) { if (this[SAW_NULL_BLOCK]) { this[SAW_EOF] = true; // ending an archive with no entries. pointless, but legal. if (this[STATE] === 'begin') { this[STATE] = 'header'; } this[EMIT]('eof'); } else { this[SAW_NULL_BLOCK] = true; this[EMIT]('nullBlock'); } } else { this[SAW_NULL_BLOCK] = false; if (!header.cksumValid) { this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header }); } else if (!header.path) { this.warn('TAR_ENTRY_INVALID', 'path is required', { header }); } else { const type = header.type; if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) { this.warn('TAR_ENTRY_INVALID', 'linkpath required', { header, }); } else if (!/^(Symbolic)?Link$/.test(type) && !/^(Global)?ExtendedHeader$/.test(type) && header.linkpath) { this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', { header, }); } else { const entry = (this[WRITEENTRY] = new ReadEntry(header, this[EX], this[GEX])); // we do this for meta & ignored entries as well, because they // are still valid tar, or else we wouldn't know to ignore them if (!this[SAW_VALID_ENTRY]) { if (entry.remain) { // this might be the one! const onend = () => { if (!entry.invalid) { this[SAW_VALID_ENTRY] = true; } }; entry.on('end', onend); } else { this[SAW_VALID_ENTRY] = true; } } if (entry.meta) { if (entry.size > this.maxMetaEntrySize) { entry.ignore = true; this[EMIT]('ignoredEntry', entry); this[STATE] = 'ignore'; entry.resume(); } else if (entry.size > 0) { this[META] = ''; entry.on('data', c => (this[META] += c)); this[STATE] = 'meta'; } } else { this[EX] = undefined; entry.ignore = entry.ignore || !this.filter(entry.path, entry); if (entry.ignore) { // probably valid, just not something we care about this[EMIT]('ignoredEntry', entry); this[STATE] = entry.remain ? 'ignore' : 'header'; entry.resume(); } else { if (entry.remain) { this[STATE] = 'body'; } else { this[STATE] = 'header'; entry.end(); } if (!this[READENTRY]) { this[QUEUE].push(entry); this[NEXTENTRY](); } else { this[QUEUE].push(entry); } } } } } } } [CLOSESTREAM]() { queueMicrotask(() => this.emit('close')); } [PROCESSENTRY](entry) { let go = true; if (!entry) { this[READENTRY] = undefined; go = false; } else if (Array.isArray(entry)) { const [ev, ...args] = entry; this.emit(ev, ...args); } else { this[READENTRY] = entry; this.emit('entry', entry); if (!entry.emittedEnd) { entry.on('end', () => this[NEXTENTRY]()); go = false; } } return go; } [NEXTENTRY]() { do { } while (this[PROCESSENTRY](this[QUEUE].shift())); if (!this[QUEUE].length) { // At this point, there's nothing in the queue, but we may have an // entry which is being consumed (readEntry). // If we don't, then we definitely can handle more data. // If we do, and either it's flowing, or it has never had any data // written to it, then it needs more. // The only other possibility is that it has returned false from a // write() call, so we wait for the next drain to continue. const re = this[READENTRY]; const drainNow = !re || re.flowing || re.size === re.remain; if (drainNow) { if (!this[WRITING]) { this.emit('drain'); } } else { re.once('drain', () => this.emit('drain')); } } } [CONSUMEBODY](chunk, position) { // write up to but no more than writeEntry.blockRemain const entry = this[WRITEENTRY]; /* c8 ignore start */ if (!entry) { throw new Error('attempt to consume body without entry??'); } const br = entry.blockRemain ?? 0; /* c8 ignore stop */ const c = br >= chunk.length && position === 0 ? chunk : chunk.subarray(position, position + br); entry.write(c); if (!entry.blockRemain) { this[STATE] = 'header'; this[WRITEENTRY] = undefined; entry.end(); } return c.length; } [CONSUMEMETA](chunk, position) { const entry = this[WRITEENTRY]; const ret = this[CONSUMEBODY](chunk, position); // if we finished, then the entry is reset if (!this[WRITEENTRY] && entry) { this[EMITMETA](entry); } return ret; } [EMIT](ev, data, extra) { if (!this[QUEUE].length && !this[READENTRY]) { this.emit(ev, data, extra); } else { this[QUEUE].push([ev, data, extra]); } } [EMITMETA](entry) { this[EMIT]('meta', this[META]); switch (entry.type) { case 'ExtendedHeader': case 'OldExtendedHeader': this[EX] = Pax.parse(this[META], this[EX], false); break; case 'GlobalExtendedHeader': this[GEX] = Pax.parse(this[META], this[GEX], true); break; case 'NextFileHasLongPath': case 'OldGnuLongPath': { const ex = this[EX] ?? Object.create(null); this[EX] = ex; ex.path = this[META].replace(/\0.*/, ''); break; } case 'NextFileHasLongLinkpath': { const ex = this[EX] || Object.create(null); this[EX] = ex; ex.linkpath = this[META].replace(/\0.*/, ''); break; } /* c8 ignore start */ default: throw new Error('unknown meta: ' + entry.type); /* c8 ignore stop */ } } abort(error) { this[ABORTED] = true; this.emit('abort', error); // always throws, even in non-strict mode this.warn('TAR_ABORT', error, { recoverable: false }); } write(chunk, encoding, cb) { if (typeof encoding === 'function') { cb = encoding; encoding = undefined; } if (typeof chunk === 'string') { chunk = Buffer.from(chunk, /* c8 ignore next */ typeof encoding === 'string' ? encoding : 'utf8'); } if (this[ABORTED]) { /* c8 ignore next */ cb?.(); return false; } // first write, might be gzipped const needSniff = this[UNZIP] === undefined || (this.brotli === undefined && this[UNZIP] === false); if (needSniff && chunk) { if (this[BUFFER]) { chunk = Buffer.concat([this[BUFFER], chunk]); this[BUFFER] = undefined; } if (chunk.length < gzipHeader.length) { this[BUFFER] = chunk; /* c8 ignore next */ cb?.(); return true; } // look for gzip header for (let i = 0; this[UNZIP] === undefined && i < gzipHeader.length; i++) { if (chunk[i] !== gzipHeader[i]) { this[UNZIP] = false; } } const maybeBrotli = this.brotli === undefined; if (this[UNZIP] === false && maybeBrotli) { // read the first header to see if it's a valid tar file. If so, // we can safely assume that it's not actually brotli, despite the // .tbr or .tar.br file extension. // if we ended before getting a full chunk, yes, def brotli if (chunk.length < 512) { if (this[ENDED]) { this.brotli = true; } else { this[BUFFER] = chunk; /* c8 ignore next */ cb?.(); return true; } } else { // if it's tar, it's pretty reliably not brotli, chances of // that happening are astronomical. try { new Header(chunk.subarray(0, 512)); this.brotli = false; } catch (_) { this.brotli = true; } } } if (this[UNZIP] === undefined || (this[UNZIP] === false && this.brotli)) { const ended = this[ENDED]; this[ENDED] = false; this[UNZIP] = this[UNZIP] === undefined ? new Unzip({}) : new BrotliDecompress({}); this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk)); this[UNZIP].on('error', er => this.abort(er)); this[UNZIP].on('end', () => { this[ENDED] = true; this[CONSUMECHUNK](); }); this[WRITING] = true; const ret = !!this[UNZIP][ended ? 'end' : 'write'](chunk); this[WRITING] = false; cb?.(); return ret; } } this[WRITING] = true; if (this[UNZIP]) { this[UNZIP].write(chunk); } else { this[CONSUMECHUNK](chunk); } this[WRITING] = false; // return false if there's a queue, or if the current entry isn't flowing const ret = this[QUEUE].length ? false : this[READENTRY] ? this[READENTRY].flowing : true; // if we have no queue, then that means a clogged READENTRY if (!ret && !this[QUEUE].length) { this[READENTRY]?.once('drain', () => this.emit('drain')); } /* c8 ignore next */ cb?.(); return ret; } [BUFFERCONCAT](c) { if (c && !this[ABORTED]) { this[BUFFER] = this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c; } } [MAYBEEND]() { if (this[ENDED] && !this[EMITTEDEND] && !this[ABORTED] && !this[CONSUMING]) { this[EMITTEDEND] = true; const entry = this[WRITEENTRY]; if (entry && entry.blockRemain) { // truncated, likely a damaged file const have = this[BUFFER] ? this[BUFFER].length : 0; this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${entry.blockRemain} more bytes, only ${have} available)`, { entry }); if (this[BUFFER]) { entry.write(this[BUFFER]); } entry.end(); } this[EMIT](DONE); } } [CONSUMECHUNK](chunk) { if (this[CONSUMING] && chunk) { this[BUFFERCONCAT](chunk); } else if (!chunk && !this[BUFFER]) { this[MAYBEEND](); } else if (chunk) { this[CONSUMING] = true; if (this[BUFFER]) { this[BUFFERCONCAT](chunk); const c = this[BUFFER]; this[BUFFER] = undefined; this[CONSUMECHUNKSUB](c); } else { this[CONSUMECHUNKSUB](chunk); } while (this[BUFFER] && this[BUFFER]?.length >= 512 && !this[ABORTED] && !this[SAW_EOF]) { const c = this[BUFFER]; this[BUFFER] = undefined; this[CONSUMECHUNKSUB](c); } this[CONSUMING] = false; } if (!this[BUFFER] || this[ENDED]) { this[MAYBEEND](); } } [CONSUMECHUNKSUB](chunk) { // we know that we are in CONSUMING mode, so anything written goes into // the buffer. Advance the position and put any remainder in the buffer. let position = 0; const length = chunk.length; while (position + 512 <= length && !this[ABORTED] && !this[SAW_EOF]) { switch (this[STATE]) { case 'begin': case 'header': this[CONSUMEHEADER](chunk, position); position += 512; break; case 'ignore': case 'body': position += this[CONSUMEBODY](chunk, position); break; case 'meta': position += this[CONSUMEMETA](chunk, position); break; /* c8 ignore start */ default: throw new Error('invalid state: ' + this[STATE]); /* c8 ignore stop */ } } if (position < length) { if (this[BUFFER]) { this[BUFFER] = Buffer.concat([ chunk.subarray(position), this[BUFFER], ]); } else { this[BUFFER] = chunk.subarray(position); } } } end(chunk, encoding, cb) { if (typeof chunk === 'function') { cb = chunk; encoding = undefined; chunk = undefined; } if (typeof encoding === 'function') { cb = encoding; encoding = undefined; } if (typeof chunk === 'string') { chunk = Buffer.from(chunk, encoding); } if (cb) this.once('finish', cb); if (!this[ABORTED]) { if (this[UNZIP]) { /* c8 ignore start */ if (chunk) this[UNZIP].write(chunk); /* c8 ignore stop */ this[UNZIP].end(); } else { this[ENDED] = true; if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0); if (chunk) this.write(chunk); this[MAYBEEND](); } } return this; } } //# sourceMappingURL=parse.js.map