browser-stream-tar
Version:
split tar web-stream into a sequence of Files
351 lines (299 loc) • 9.74 kB
JavaScript
/**
* Field Name Byte Offset Length in Bytes Field Type
* name 0 100 NUL-terminated if NUL fits
* mode 100 8
* uid 108 8
* gid 116 8
* size 124 12
* mtime 136 12
* chksum 148 8
* typeflag 156 1 see below
* linkname 157 100 NUL-terminated if NUL fits
* magic 257 6 must be TMAGIC (NUL term.)
* version 263 2 must be TVERSION
* uname 265 32 NUL-terminated
* gname 297 32 NUL-terminated
* devmajor 329 8
* devminor 337 8
* prefix 345 155 NUL-terminated if NUL fits
*/
const BLOCKSIZE = 512;
const DECODER = new TextDecoder();
/**
* Decodes a PAX header
* @see https://www.systutorials.com/docs/linux/man/5-star/
* @param {ReadableStreamReader<Uint8Array>} reader where to read from
* @param {Uint8Array} buffer
* @param {Object} header to be filled with values form buffer
* @returns {Promise<Uint8Array>} buffer positioned after the consumed bytes
*/
export async function decodePaxHeader(reader, buffer, header) {
buffer = await fill(reader, buffer, BLOCKSIZE);
for (const line of DECODER.decode(buffer).split(/\n/)) {
const m = line.match(/^\d+ ([^=]+)=(.*)/);
if (m) {
let key = m[1];
if (key === "path") {
key = "name";
}
header[key] = m[2];
}
}
return buffer.subarray(BLOCKSIZE);
}
function decodeBase(buffer, file) {
file.mode = decodeInteger(buffer.subarray(100, 108));
file.uid = decodeInteger(buffer.subarray(108, 116));
file.gid = decodeInteger(buffer.subarray(116, 124));
file.size = decodeInteger(buffer.subarray(124, 136));
file.lastModified = new Date(1000 * decodeInteger(buffer.subarray(136, 148)));
//file.magic = decodeString(buffer.subarray(257, 265));
file.uname = decodeString(buffer.subarray(265, 297));
file.gname = decodeString(buffer.subarray(297, 329));
const name = decodeString(buffer.subarray(0, 100));
const prefix = decodeString(buffer.subarray(345, 500));
file.name = prefix.length ? prefix + "/" + name : name;
}
/**
* Decodes the next header.
* @param {ReadableStreamReader<Uint8Array>} reader where to read from
* @param {Uint8Array|undefined} buffer
* @param {Object} header to be filled with values form buffer and reader
* @returns {Promise<Uint8Array|undefined>} buffer positioned after the consumed bytes
*/
export async function decodeHeader(reader, buffer, header) {
while ((buffer = await fill(reader, buffer, BLOCKSIZE))) {
if (buffer[0] === 0) break;
const type = buffer[156];
switch (type) {
case 49: // '1' link
case 50: // '2' reserved
case 51: // '3' character special
case 52: // '4' block special
case 53: // '5' directory
case 54: // '6' FIFO special
case 55: // '7' reserved
case 86: // 'V' tape/volume header Ignore on extraction
buffer = buffer.subarray(BLOCKSIZE);
break;
case 76: // 'L' NEXT file has a long name
decodeBase(buffer, header);
buffer = await fill(reader, buffer, BLOCKSIZE);
header.name = toString(buffer.subarray(0, BLOCKSIZE));
//console.log("NAME", header.name);
return buffer.subarray(BLOCKSIZE);
break;
case 103: // 'g' Global extended header
case 120: // 'x' Extended header referring to the next file in the archive
buffer = buffer.subarray(BLOCKSIZE);
return decodeHeader(
reader,
await decodePaxHeader(reader, buffer, header),
header
);
case 0: // regular file
case 48: // '0' regular file
decodeBase(buffer, header);
return buffer.subarray(BLOCKSIZE);
default:
throw new Error(`Unsupported header type ${type}`);
}
}
}
/**
* Provide tar entry iterator.
* @param {ReadableStream} tar
* @return {AsyncIterable<File>}
*/
export async function* files(tar) {
const reader = tar.getReader();
let buffer, header;
while ((buffer = await decodeHeader(reader, buffer, (header = {})))) {
let consumed = false;
if (header.prefix) {
if (header.prefix.length) {
header.name = header.prefix + "/" + header.name;
}
delete header.prefix;
}
const stream = new ReadableStream({
async pull(controller) {
let remaining = header.size;
while (remaining >= buffer.length) {
remaining = remaining - buffer.length;
controller.enqueue(buffer);
buffer = await fill(reader);
}
/**
* --512--|-----512------|
* | R | O |
* |
* DDDDDDDDDDDD---------HHHH
* | | |
* A0 A0 A1
*
*/
controller.enqueue(buffer.subarray(0, remaining));
/**
* +--------- size --------+
* | +- remaining -+- overflow -+
* | | | |
* HDD ... DDDDDDDDDDDDDDDDDD------------HHHHHH
* [BUFFER .... ] [BUFFER ... ]
* +----------- skip --------+
*/
buffer = await skip(reader, buffer, remaining + overflow(header.size));
controller.close();
consumed = true;
}
});
yield new StreamFile(stream, header.name, header);
if (!consumed) {
const reader = stream.getReader();
while (!(await reader.read()).done);
}
}
}
/**
* Convert bytes into string.
* @param {Uint8Array} bytes
* @returns {string}
*/
export function decodeString(bytes) {
const i = bytes.findIndex(b => b === 0);
return DECODER.decode(i < 0 ? bytes : bytes.subarray(0, i));
}
/**
* Convert ASCII octal number into number.
* @param {Uint8Array} bytes
* @returns {number}
*/
export function decodeInteger(bytes) {
return parseInt(decodeString(bytes), 8);
}
export function encodeInteger(buffer, offset, number, length) {
for (const s of number.toString(8).padStart(length, "0")) {
buffer[offset++] = s.charCodeAt(0);
}
}
export function encodeString(buffer, offset, string, length) {
for (let i = 0; i < length; i++) {
buffer[offset + i] = string.charCodeAt(i);
}
}
export function checksum(buffer) {
let sum = 0;
for (let i = 0; i < BLOCKSIZE; i++) {
sum += i >= 148 && i < 156 ? 32 : buffer[i];
}
return sum;
}
export function overflow(size) {
size &= BLOCKSIZE - 1;
return size && BLOCKSIZE - size;
}
/**
* Read bytes from a reader and append them to a given buffer until a requested length of the buffer is reached.
* @param {ReadableStreamReader<Uint8Array>} reader where to read from
* @param {Uint8Array} [buffer] initial buffer or undefined
* @param {number} [length] desired buffer length
* @returns {Promise<Uint8Array|undefined>} filled up buffer
*/
export async function fill(reader, buffer, length) {
if (buffer?.length >= length) {
return buffer;
}
do {
const { done, value } = await reader.read();
if (done) {
return undefined;
}
if (buffer === undefined) {
buffer = value;
} else {
const newBuffer = new Uint8Array(buffer.length + value.length);
newBuffer.set(buffer);
newBuffer.set(value, buffer.length);
buffer = newBuffer;
}
} while (buffer.length < length);
return buffer;
}
/**
* Skip some bytes from a buffer.
* @param {ReadableStreamReader<Uint8Array>} reader where to read from
* @param {Uint8Array} buffer
* @param {number} length to be skipped
* @returns {Promise<Uint8Array|undefined>} buffer positionend after skipped bytes
*/
export async function skip(reader, buffer, length) {
return (await fill(reader, buffer, length))?.subarray(length);
}
/**
* Reads web stream content into a Uint8Array.
* @param {ReadableStream} stream
* @returns {Promise<Uint8Array>}
*/
async function streamToUint8Array(stream) {
const reader = stream.getReader();
let buffer = new Uint8Array();
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
const newBuffer = new Uint8Array(buffer.length + value.length);
newBuffer.set(buffer);
newBuffer.set(value, buffer.length);
buffer = newBuffer;
}
reader.releaseLock();
return buffer;
}
export const extension2Mime = {
".txt": "text/plain",
".csv": "text/csv",
".json": "application/json",
".xml": "application/xml",
".tar": "application/x-tar"
};
export function typeFromName(name) {
if (name.match(/\._/)) {
return "application/octet-stream";
}
const m = name.match(/(\.\w+)$/);
return extension2Mime[m?.[1]] || "application/octet-stream";
}
export class StreamFile extends File {
#stream;
#lastModified;
#size;
constructor(stream, name, options) {
super([], name, options);
this.#stream = stream;
if (options) {
this.#lastModified = options.lastModified;
this.#size = options.size;
this.mode = options.mode;
this.uid = options.uid;
this.gid = options.gid;
this.uname = options.uname;
this.gname = options.gname;
}
}
stream() {
return this.#stream;
}
async text() {
return new TextDecoder().decode(await streamToUint8Array(this.#stream));
}
get type() {
return typeFromName(this.name);
}
get lastModified() {
return this.#lastModified;
}
get size() {
return this.#size;
}
}