@obsidize/tar-browserify
Version:
Browser-based tar utility for packing and unpacking tar files (stream-capable)
190 lines (189 loc) • 8.6 kB
JavaScript
import { InMemoryAsyncUint8Array } from '../common/async-uint8-array';
import { AsyncUint8ArrayIterator } from '../common/async-uint8-array-iterator';
import { Constants } from '../common/constants';
import { TarUtility } from '../common/tar-utility';
import { PaxHeader } from '../header/pax/pax-header';
import { TarHeader } from '../header/tar-header';
import { TarHeaderUtility } from '../header/tar-header-utility';
import { UstarHeader } from '../header/ustar/ustar-header';
import { ArchiveEntry } from './archive-entry';
const MAX_LOADED_BYTES = Constants.SECTOR_SIZE * 100000; // ~50Mb
/**
* Errors that will be thrown if the reader encounters an invalid data layout
*/
export var ArchiveReadError;
(function (ArchiveReadError) {
/**
* Occurs when the reader fails to fully load the content buffer of an entry
* due to the input data stream ending prematurely.
*/
ArchiveReadError["ERR_ENTRY_CONTENT_MIN_BUFFER_LENGTH_NOT_MET"] = "ERR_ENTRY_CONTENT_MIN_BUFFER_LENGTH_NOT_MET";
/**
* Occurs when the reader fails to fully load a PAX header
* due to the input data stream ending prematurely.
*/
ArchiveReadError["ERR_HEADER_PAX_MIN_BUFFER_LENGTH_NOT_MET"] = "ERR_HEADER_PAX_MIN_BUFFER_LENGTH_NOT_MET";
/**
* Occurs when the reader fails to fully load a PAX header
* due to the third and final segment not appearing in the input data stream.
*/
ArchiveReadError["ERR_HEADER_MISSING_POST_PAX_SEGMENT"] = "ERR_HEADER_MISSING_POST_PAX_SEGMENT";
})(ArchiveReadError || (ArchiveReadError = {}));
/**
* Generic utility for parsing tar entries from a stream of octets via `AsyncUint8ArrayIterator`
*/
export class ArchiveReader {
constructor(bufferIterator) {
this.bufferIterator = bufferIterator;
this.mGlobalPaxHeaders = [];
this.mBufferCache = null;
this.mOffset = 0;
this.mHasSyncInput = this.bufferIterator.input instanceof InMemoryAsyncUint8Array;
}
static withInput(input) {
return new ArchiveReader(new AsyncUint8ArrayIterator(input));
}
[Symbol.asyncIterator]() {
return this;
}
get source() {
return this.bufferIterator.input;
}
get globalPaxHeaders() {
return this.mGlobalPaxHeaders;
}
async readAllEntries() {
const entries = [];
for await (const entry of this) {
entries.push(entry);
}
return entries;
}
async next() {
const entry = await this.tryParseNextEntry();
if (entry !== null) {
return { done: false, value: entry };
}
return { done: true, value: null };
}
clearBufferCache() {
this.mBufferCache = null;
this.mOffset = 0;
}
getBufferCacheSlice(start, end) {
return TarUtility.cloneUint8Array(this.mBufferCache, start, end);
}
async tryRequireBufferSize(size) {
const buffer = await this.requireBufferSize(size);
return buffer !== null;
}
async requireBufferSize(size) {
while (!this.mBufferCache || this.mBufferCache.byteLength < size) {
if (!(await this.loadNextChunk())) {
this.clearBufferCache();
return null;
}
}
return this.mBufferCache;
}
async loadNextChunk() {
const nextChunk = await this.bufferIterator.tryNext();
if (!nextChunk) {
return false;
}
if (this.mBufferCache) {
this.mBufferCache = TarUtility.concatUint8Arrays(this.mBufferCache, nextChunk);
}
else {
this.mBufferCache = nextChunk;
this.mOffset = 0;
}
return true;
}
async tryParseNextEntry() {
const headerParseResult = await this.tryParseNextHeader();
if (headerParseResult === null) {
this.clearBufferCache();
return null;
}
const context = this;
const { header, headerOffset, contentOffset } = headerParseResult;
const headerByteLength = contentOffset - headerOffset;
const contentEnd = contentOffset + header.fileSize;
const offset = headerOffset;
// `contentEnd` may not be an even division of SECTOR_SIZE, so
// round up to the nearest sector start point after the content end.
const nextSectorStart = TarUtility.roundUpSectorOffset(contentEnd);
let content = null;
// If the buffer source is in-memory already, just read the content immediately
if (this.mHasSyncInput && header.fileSize > 0) {
if (!(await this.tryRequireBufferSize(nextSectorStart))) {
throw ArchiveReadError.ERR_ENTRY_CONTENT_MIN_BUFFER_LENGTH_NOT_MET;
}
content = this.getBufferCacheSlice(contentOffset, contentEnd);
}
// if some of the in-memory buffer is left over after this iteration,
// trim this entry's bytes off of the buffer and reset the offset pointer.
if (nextSectorStart + Constants.SECTOR_SIZE <= this.mBufferCache.byteLength) {
this.mBufferCache = this.getBufferCacheSlice(nextSectorStart);
this.mOffset = 0;
// otherwise, move the offset pointer so more data will be loaded in the next iterator call
}
else {
this.mOffset = nextSectorStart;
}
return new ArchiveEntry({ header, offset, headerByteLength, content, context });
}
async tryParseNextHeader() {
if (!(await this.tryRequireBufferSize(this.mOffset + Constants.HEADER_SIZE))) {
return null;
}
let ustarOffset = TarHeaderUtility.findNextUstarSectorOffset(this.mBufferCache, this.mOffset);
// Find next ustar marker
while (ustarOffset < 0 && this.mBufferCache.byteLength < MAX_LOADED_BYTES && (await this.loadNextChunk())) {
ustarOffset = TarHeaderUtility.findNextUstarSectorOffset(this.mBufferCache, this.mOffset);
}
// No header marker found and we ran out of bytes to load, terminate
if (ustarOffset < 0) {
this.clearBufferCache();
return null;
}
// Construct Header
let headerOffset = ustarOffset;
let headerBuffer = this.getBufferCacheSlice(headerOffset, headerOffset + Constants.HEADER_SIZE);
let ustarHeader = UstarHeader.deserialize(headerBuffer);
let header = new TarHeader({ ustar: ustarHeader });
// Advance cursor to process potential PAX header or entry content
let nextOffset = TarUtility.advanceSectorOffset(headerOffset, this.mBufferCache.byteLength);
if (ustarHeader.isPaxHeader) {
// Make sure we've buffered the pax header region and the next sector after that (next sector contains the _actual_ header)
const paxHeaderSectorEnd = nextOffset + TarUtility.roundUpSectorOffset(header.fileSize);
const requiredBufferSize = paxHeaderSectorEnd + Constants.HEADER_SIZE;
const isGlobalPax = header.isGlobalPaxHeader;
const preambleHeader = ustarHeader;
if (!(await this.tryRequireBufferSize(requiredBufferSize))) {
throw ArchiveReadError.ERR_HEADER_PAX_MIN_BUFFER_LENGTH_NOT_MET;
}
// Parse the pax header out from the next sector
const paxHeader = PaxHeader.deserialize(this.mBufferCache, nextOffset);
nextOffset = paxHeaderSectorEnd;
if (!TarHeaderUtility.isUstarSector(this.mBufferCache, nextOffset)) {
throw ArchiveReadError.ERR_HEADER_MISSING_POST_PAX_SEGMENT;
}
// The _actual_ header is AFTER the pax header, so need to do the header parse song and dance one more time
headerOffset = nextOffset;
headerBuffer = this.getBufferCacheSlice(headerOffset, headerOffset + Constants.HEADER_SIZE);
ustarHeader = UstarHeader.deserialize(headerBuffer);
nextOffset = TarUtility.advanceSectorOffsetUnclamped(nextOffset);
header = new TarHeader({
ustar: ustarHeader,
pax: paxHeader,
preamble: preambleHeader,
});
if (isGlobalPax) {
this.mGlobalPaxHeaders.push(header);
}
}
return { header, headerOffset, contentOffset: nextOffset };
}
}