node-pkware
Version:
nodejs implementation of StormLib's pkware compressor/de-compressor
322 lines • 13.8 kB
JavaScript
import { Buffer } from 'node:buffer';
import { ChBitsAsc, ChCodeAsc, Compression, DictionarySize, DistBits, DistCode, ExLenBits, LenBits, LenCode, LONGEST_ALLOWED_REPETITION, } from './constants.js';
import { InvalidCompressionTypeError, InvalidDictionarySizeError } from './errors.js';
import { ExpandingBuffer } from './ExpandingBuffer.js';
import { clamp, quotientAndRemainder, getLowestNBitsOf, nBitsOfOnes, repeat, toHex } from './functions.js';
function getSizeOfMatching(inputBytes, a, b) {
const limit = clamp(2, LONGEST_ALLOWED_REPETITION, b - a);
for (let i = 2; i <= limit; i++) {
if (inputBytes[a + i] !== inputBytes[b + i]) {
return i;
}
}
return limit;
}
/**
* TODO: make sure that we find the most recent one,
* which in turn allows us to store backward length in less amount of bits
* currently the code goes from the furthest point
*/
function findRepetitions(inputBytes, endOfLastMatch, cursor) {
const notEnoughBytes = inputBytes.length - cursor < 2;
const tooClose = cursor === endOfLastMatch || cursor - endOfLastMatch < 2;
if (notEnoughBytes || tooClose) {
return { size: 0, distance: 0 };
}
const haystack = inputBytes.subarray(endOfLastMatch, cursor);
const needle = inputBytes.subarray(cursor, cursor + 2);
const matchIndex = haystack.indexOf(needle);
if (matchIndex !== -1) {
const distance = cursor - endOfLastMatch - matchIndex;
let size = 2;
if (distance > 2) {
size = getSizeOfMatching(inputBytes, endOfLastMatch + matchIndex, cursor);
}
return { distance: distance - 1, size };
}
return { size: 0, distance: 0 };
}
export class Implode {
verbose;
isFirstChunk;
inputBuffer;
outputBuffer;
stats;
compressionType;
dictionarySize;
dictionarySizeMask;
streamEnded;
distCodes;
distBits;
startIndex;
handledFirstTwoBytes;
outBits;
nChBits;
nChCodes;
constructor(compressionType, dictionarySize, config) {
if (!(compressionType in Compression) || compressionType === Compression.Unknown) {
throw new InvalidCompressionTypeError();
}
if (!(dictionarySize in DictionarySize) || dictionarySize === DictionarySize.Unknown) {
throw new InvalidDictionarySizeError();
}
this.verbose = config?.verbose ?? false;
this.isFirstChunk = true;
this.inputBuffer = new ExpandingBuffer(0x1_00_00);
this.outputBuffer = new ExpandingBuffer(0x1_20_00);
this.stats = { chunkCounter: 0 };
this.compressionType = compressionType;
this.dictionarySize = dictionarySize;
this.dictionarySizeMask = -1;
this.streamEnded = false;
this.distCodes = structuredClone(DistCode);
this.distBits = structuredClone(DistBits);
this.startIndex = 0;
this.handledFirstTwoBytes = false;
this.outBits = 0;
this.nChBits = repeat(0, 0x3_06);
this.nChCodes = repeat(0, 0x3_06);
}
getHandler() {
const instance = this;
return function (chunk, encoding, callback) {
try {
instance.inputBuffer.append(chunk);
if (instance.isFirstChunk) {
instance.isFirstChunk = false;
this._flush = instance.onInputFinished.bind(instance);
}
if (instance.verbose) {
instance.stats.chunkCounter = instance.stats.chunkCounter + 1;
console.log(`implode: reading ${toHex(chunk.length)} bytes from chunk #${instance.stats.chunkCounter}`);
}
instance.processChunkData();
const blockSize = 0x8_00;
if (instance.outputBuffer.size() <= blockSize) {
callback(null, Buffer.from([]));
return;
}
let [numberOfBlocks] = quotientAndRemainder(instance.outputBuffer.size(), blockSize);
// making sure to leave one block worth of data for lookback when processing chunk data
numberOfBlocks = numberOfBlocks - 1;
const numberOfBytes = numberOfBlocks * blockSize;
// make sure to create a copy of the output buffer slice as it will get flushed in the next line
const output = Buffer.from(instance.outputBuffer.read(0, numberOfBytes));
instance.outputBuffer.flushStart(numberOfBytes);
if (instance.outBits === 0) {
instance.outputBuffer.setByte(-1, 0);
}
callback(null, output);
}
catch (error) {
callback(error);
}
};
}
onInputFinished(callback) {
this.streamEnded = true;
try {
this.processChunkData();
if (this.verbose) {
console.log('---------------');
console.log('implode: total number of chunks read:', this.stats.chunkCounter);
console.log('implode: inputBuffer heap size', toHex(this.inputBuffer.heapSize()));
console.log('implode: outputBuffer heap size', toHex(this.outputBuffer.heapSize()));
}
callback(null, this.outputBuffer.read());
}
catch (error) {
callback(error);
}
}
processChunkData() {
if (this.dictionarySizeMask === -1) {
this.setup();
}
if (!this.inputBuffer.isEmpty()) {
this.startIndex = 0;
if (!this.handledFirstTwoBytes) {
if (this.inputBuffer.size() < 3) {
return;
}
this.handledFirstTwoBytes = true;
this.handleFirstTwoBytes();
}
// -------------------------------
// work in progress
// eslint-disable-next-line prefer-const -- this might get overriden while searching for repetitions
let endOfLastMatch = 0; // used when searching for longer repetitions later
while (this.startIndex < this.inputBuffer.size()) {
// eslint-disable-next-line prefer-const -- this might get overriden while searching for repetitions
let { size, distance } = findRepetitions(this.inputBuffer.read(endOfLastMatch), endOfLastMatch, this.startIndex);
// eslint-disable-next-line prefer-const -- this might get overriden while searching for repetitions
let isFlushable = this.isRepetitionFlushable(size, distance);
if (isFlushable === false) {
const byte = this.inputBuffer.readByte(this.startIndex);
this.outputBits(this.nChBits[byte], this.nChCodes[byte]);
this.startIndex = this.startIndex + 1;
}
else {
if (isFlushable === null) {
/*
// Try to find better repetition 1 byte later.
// stormlib/implode.c L517
let cursor = this.startIndex
let newSize = size
let newDistance = distance
let currentSize
let currentDistance
while (newSize <= currentSize && this.isRepetitionFlushable(newSize, newDistance)) {
currentSize = newSize
currentDistance = newDistance
cursor = cursor + 1
const reps = findRepetitions(this.inputBuffer.read(endOfLastMatch), endOfLastMatch, cursor)
newSize = reps.size
newDistance = reps.distance
}
size = newSize
distance = currentDistance
*/
}
const byte = size + 0xfe;
this.outputBits(this.nChBits[byte], this.nChCodes[byte]);
if (size === 2) {
const byte = distance >> 2;
this.outputBits(this.distBits[byte], this.distCodes[byte]);
this.outputBits(2, distance & 3);
}
else {
const byte = distance >> this.dictionarySize;
this.outputBits(this.distBits[byte], this.distCodes[byte]);
this.outputBits(this.dictionarySize, this.dictionarySizeMask & distance);
}
this.startIndex = this.startIndex + size;
}
/*
this.inputBuffer.dropStart(endOfLastMatch)
this.startIndex -= endOfLastMatch
endOfLastMatch = 0
*/
if (this.dictionarySize === DictionarySize.Small && this.startIndex >= 0x4_00) {
this.inputBuffer.dropStart(0x4_00);
this.startIndex = this.startIndex - 0x4_00;
}
else if (this.dictionarySize === DictionarySize.Medium && this.startIndex >= 0x8_00) {
this.inputBuffer.dropStart(0x8_00);
this.startIndex = this.startIndex - 0x8_00;
}
else if (this.dictionarySize === DictionarySize.Large && this.startIndex >= 0x10_00) {
this.inputBuffer.dropStart(0x10_00);
this.startIndex = this.startIndex - 0x10_00;
}
}
// -------------------------------
// this.inputBuffer.dropStart(this.inputBuffer.size())
this.inputBuffer.clear();
}
if (this.streamEnded) {
// Write the termination literal
this.outputBits(this.nChBits.at(-1), this.nChCodes.at(-1));
}
}
/**
* @returns false - non flushable
* @returns true - flushable
* @returns null - flushable, but there might be a better repetition
*/
isRepetitionFlushable(size, distance) {
if (size === 0) {
return false;
}
// If we found repetition of 2 bytes, that is 0x1_00 or further back,
// don't bother. Storing the distance of 0x1_00 bytes would actually
// take more space than storing the 2 bytes as-is.
if (size === 2 && distance >= 0x1_00) {
return false;
}
if (size >= 8 || this.startIndex + 1 >= this.inputBuffer.size()) {
return true;
}
return null;
}
/**
* repetitions are at least 2 bytes long,
* so the initial 2 bytes can be moved to the output as is
*/
handleFirstTwoBytes() {
const byte1 = this.inputBuffer.readByte(0);
const byte2 = this.inputBuffer.readByte(1);
this.outputBits(this.nChBits[byte1], this.nChCodes[byte1]);
this.outputBits(this.nChBits[byte2], this.nChCodes[byte2]);
this.startIndex = this.startIndex + 2;
}
setup() {
switch (this.compressionType) {
case Compression.Ascii: {
for (let nCount = 0; nCount < 0x1_00; nCount++) {
this.nChBits[nCount] = ChBitsAsc[nCount] + 1;
this.nChCodes[nCount] = ChCodeAsc[nCount] * 2;
}
break;
}
case Compression.Binary: {
let nChCode = 0;
for (let nCount = 0; nCount < 0x1_00; nCount++) {
this.nChBits[nCount] = 9;
this.nChCodes[nCount] = nChCode;
nChCode = getLowestNBitsOf(nChCode, 16) + 2;
}
break;
}
}
switch (this.dictionarySize) {
case DictionarySize.Small: {
this.dictionarySizeMask = nBitsOfOnes(4);
break;
}
case DictionarySize.Medium: {
this.dictionarySizeMask = nBitsOfOnes(5);
break;
}
case DictionarySize.Large: {
this.dictionarySizeMask = nBitsOfOnes(6);
break;
}
}
let nCount = 0x1_00;
for (let i = 0; i < 0x10; i++) {
for (let nCount2 = 0; nCount2 < 1 << ExLenBits[i]; nCount2++) {
this.nChBits[nCount] = ExLenBits[i] + LenBits[i] + 1;
this.nChCodes[nCount] = (nCount2 << (LenBits[i] + 1)) | (LenCode[i] * 2) | 1;
nCount = nCount + 1;
}
}
this.outputBuffer.appendByte(this.compressionType);
this.outputBuffer.appendByte(this.dictionarySize);
this.outputBuffer.appendByte(0);
this.outBits = 0;
}
outputBits(nBits, bitBuffer) {
if (nBits > 8) {
this.outputBits(8, bitBuffer);
bitBuffer = bitBuffer >> 8;
nBits = nBits - 8;
}
const { outBits } = this;
const lastByte = this.outputBuffer.readByte(this.outputBuffer.size() - 1);
this.outputBuffer.setByte(-1, lastByte | getLowestNBitsOf(bitBuffer << outBits, 8));
this.outBits = this.outBits + nBits;
if (this.outBits > 8) {
this.outBits = getLowestNBitsOf(this.outBits, 3);
bitBuffer = bitBuffer >> (8 - outBits);
this.outputBuffer.appendByte(getLowestNBitsOf(bitBuffer, 8));
}
else {
this.outBits = getLowestNBitsOf(this.outBits, 3);
if (this.outBits === 0) {
this.outputBuffer.appendByte(0);
}
}
}
}
//# sourceMappingURL=Implode.js.map