qfs-compression
Version:
A JavaScript implementation of the QFS compression and decompression algorithm
339 lines (289 loc) • 9.99 kB
JavaScript
// # index.js
// A JavaScript implementation of the QFS compression and decompression
// algorithms. Based on wouanagaine's C library found here: https://github.com/
// wouanagaine/SC4Mapper-2013/blob/master/Modules/qfs.c
'use strict';
// # decompress(input)
// JavaScript implementation of the QFS decompression algorithm.
// IMPORTANT! In some cases, the first 4 bytes indicate the size of the input
// buffer. We **don't** detect this automatically, you need to discard those 4
// bytes yourself!
function decompress(input) {
// Check magic number.
let [a, b] = input;
if (!((a === 0x10 || a === 0x11) && b === 0xfb)) {
throw new Error(
'Input is not a valid QFS compressed buffer! Did you forget to truncate the size bytes?'
);
}
// Create an malloc function based on the input buffer class we received.
const malloc = createMalloc(input);
// First two bytes are 0x10fb (QFS id), then follows the *uncompressed*
// size, which allows us to prepare a buffer for it.
const size = 0x10000*input[2] + 0x100*input[3] + input[4];
const out = malloc(size);
// Start decoding now. Note that trailing bytes are handled separately,
// indicated by a control character >= 0xfc.
let inpos = input[0] & 0x01 ? 8 : 5;
let outpos = 0;
while (inpos < input.length && input[inpos] < 0xfc) {
let code = input[inpos];
let a = input[inpos+1];
let b = input[inpos+2];
if (!(code & 0x80)) {
let length = code & 3;
memcpy(out, outpos, input, inpos+2, length);
inpos += length+2;
outpos += length;
// Repeat data that is already in the output. This is the essence
// of the compression algorithm.
length = ((code & 0x1c) >> 2) + 3;
let offset = ((code >> 5) << 8) + a + 1;
memcpy(out, outpos, out, outpos-offset, length);
outpos += length;
} else if (!(code & 0x40)) {
let length = (a >> 6) & 3;
memcpy(out, outpos, input, inpos+3, length);
inpos += length+3;
outpos += length;
// Repeat data already in the outpot.
length = (code & 0x3f) + 4;
let offset = (a & 0x3f)*256 + b + 1;
memcpy(out, outpos, out, outpos-offset, length);
outpos += length;
} else if (!(code & 0x20)) {
let c = input[inpos+3];
let length = code & 3;
memcpy(out, outpos, input, inpos+4, length);
inpos += length+4;
outpos += length;
// Repeat data that is already in the output.
length = ((code>>2) & 3)*256 + c + 5;
let offset = ((code & 0x10)<<12)+256*a + b + 1;
memcpy(out, outpos, out, outpos-offset, length);
outpos += length;
} else {
// The last case means there's no compression really, we just copy
// as is.
let length = (code & 0x1f)*4 + 4;
memcpy(out, outpos, input, inpos+1, length);
inpos += length+1;
outpos += length;
}
}
// Trailing bytes. This is indicated by the control character being
// greater than 0xfc.
if (inpos < input.length && outpos < out.length) {
let length = input[inpos] & 3;
memcpy(out, outpos, input, inpos+1, length);
outpos += length;
}
// Check if everything is correct.
if (outpos !== out.length) {
throw new Error('Error when decompressing!');
}
// We're done!
return out;
}
exports.decompress = decompress;
// # createMalloc(buffer)
// Returns an `malloc()` function which reuses the constructor of the given
// buffer. That way, if we receive an Uint8Array, we output one as well and
// vice versa: if we receive a Node.js buffer - even in the browser - we return
// one.
function createMalloc(buffer) {
const Ctor = buffer.constructor;
if (Ctor.allocUnsafe) {
return size => Ctor.allocUnsafe(size);
} else {
const Constructor = Ctor[Symbol.species] || Ctor;
return size => new Constructor(size);
}
}
// # memcpy(out, outpos, input, inpos, length)
// LZ-compatible memcopy function. We don't use buffer.copy here because we
// might be copying from ourselves as well!
function memcpy(out, outpos, input, inpos, length) {
let i = length;
while (i--) {
out[outpos++] = input[inpos++];
}
}
// # SmartBuffer
// Tiny implementation of a smart buffer that only supports writing raw
// *bytes*.
const DEFAULT_SIZE = 4096;
const MAX_SIZE = 32*1024*1024;
class SmartBuffer {
constructor(malloc) {
this.length = 0;
this.buffer = malloc(DEFAULT_SIZE);
this.malloc = malloc;
}
push(byte) {
let { buffer } = this;
if (buffer.length < this.length+1) {
let newLength = Math.min(MAX_SIZE, 2*buffer.length);
let newBuffer = this.malloc(newLength);
newBuffer.set(buffer);
this.buffer = newBuffer;
}
this.buffer[this.length++] = byte;
}
toBuffer() {
return this.buffer.subarray(0, this.length);
}
}
// Performance calibration constants for compression.
const QFS_MAXITER = 50;
// # compress(input, opts)
// A JavaScript implementation of QFS compression. We use a smart buffer here
// so that we don't have to manage the output size manually.
function compress(input, opts = {}) {
// Important! If the input buffer is larger than 16MB, we can't compress
// because that would cause a bit overflow and the size to be stored as 0!
const inlen = input.length;
if (inlen > 0xffffff) {
throw new Error(`Input size cannot be larger than ${0xffffff} bytes!`);
}
// Constants for tuning performance.
const { windowBits = 17, includeSize = false } = opts;
const WINDOW_LEN = 2**windowBits;
const WINDOW_MASK = WINDOW_LEN-1;
// Prepare our buffer to which we'll write the output.
const malloc = createMalloc(input);
const out = new SmartBuffer(malloc);
const push = out.push.bind(out);
// Initialize our occurence tables. The C++ code is rather difficult to
// understand here as there is a lot of pointer magic involved.Anyway,
// `rev_similar` is an array where we store the offsets that we calculated
// every input position.
let rev_similar = new Int32Array(WINDOW_LEN).fill(-1);
// The `rev_last` code is a lot more difficult to understand though. In
// C++ it's a data structure that can hold 256 x 256 integer pointers.
// This is actually a table for tracking the *offset* at which the last
// [a, b] byte
// sequence was found! We implement this table simply as a flat array. of
// 256*256 size, which means our indices have to be calculated as 256*a +
// b.
let rev_last = new Int32Array(256*256).fill(-1);
// The "fill" method simply writes uncompressed data to the output stream.
// We always do this right before writing away a "best length" match.
let inpos = 0;
let lastwrot = 0;
const fill = () => {
while (inpos - lastwrot >= 4) {
let length = Math.floor((inpos - lastwrot)/4) - 1;
if (length > 0x1b) length = 0x1b;
push(0xe0 + length);
length = 4*length + 4;
while (length--) push(input[lastwrot++]);
}
};
// If we have to include the size of the compressed buffer as well, we'll
// reserve 4 bytes to write this away once we know the size.
if (includeSize) {
for (let i = 0; i < 4; i++) push(0);
}
// Write the header to the output.
push(0x10);
push(0xfb);
push(inlen >> 16);
push((inlen >> 8) & 0xff);
push(inlen & 0xff);
// Main encoding loop.
const max = inlen-1;
for (; inpos < max; inpos++) {
// Update the occurence tables. The C++ code uses some pointer magic
// for this, but we will do it in a more modern way. We simply update
// the last time this combination was found.
let index = 256*input[inpos] + input[inpos+1];
let offs = rev_similar[inpos & WINDOW_MASK] = rev_last[index];
rev_last[index] = inpos;
// If this part has already been compressed, skip ahead.
if (inpos < lastwrot) continue;
// Look for a redundancy now.
let bestlen = 0;
let bestoffs = 0;
let i = 0;
while (offs >= 0 && inpos-offs < WINDOW_LEN && i++ < QFS_MAXITER) {
let length = 2;
let incmp = inpos + 2;
let inref = offs + 2;
while (
incmp < inlen &&
inref < inlen &&
input[incmp++] === input[inref++] &&
length < 1028
) {
length++;
}
if (length > bestlen) {
bestlen = length;
bestoffs = inpos-offs;
}
offs = rev_similar[offs & WINDOW_MASK];
}
// Check if redundancy is good enough.
if (bestlen > inlen-inpos) {
bestlen = inpos-inlen;
} else if (
bestlen <= 2 ||
(bestlen === 3 && bestoffs > 1024) ||
(bestlen === 4 && bestoffs > 16384)
) {
continue;
}
// If we did not find a suitable redundancy length by now, continue.
// We do this to avoid additional nesting.
if (!bestlen) continue;
// Cool, we found a good redundancy. Now write away.
fill();
let length = inpos-lastwrot;
if (bestlen <= 10 && bestoffs <= 1024) {
// 2-byte control character.
let d = bestoffs-1;
push(((d>>8)<<5) + ((bestlen-3)<<2) + length);
push(d & 0xff);
while (length--) push(input[lastwrot++]);
lastwrot += bestlen;
} else if (bestlen <= 67 && bestoffs <= 16384) {
// 3-byte control character.
let d = bestoffs-1;
push(0x80 + (bestlen-4));
push((length<<6) + (d>>8));
push(d & 0xff);
while (length--) push(input[lastwrot++]);
lastwrot += bestlen;
} else if (bestlen <= 1028 && bestoffs < WINDOW_LEN) {
// 4-byte control character.
let d = bestoffs-1;
push(0xC0 + ((d>>16)<<4) + (((bestlen-5)>>8)<<2) + length);
push((d>>8) & 0xff);
push(d & 0xff);
push((bestlen-5) & 0xff);
while (length--) push(input[lastwrot++]);
lastwrot += bestlen;
}
}
// Grab the length of what still needs to be processed and write it away
// as a control character. Then, write the raw contents.
inpos = inlen;
fill();
let length = inpos - lastwrot;
push(0xfc + length);
while (length--) push(input[lastwrot++]);
// If we have to include the size, of the *compressed* buffer, do that as
// well.
let buffer = out.toBuffer();
if (includeSize) {
let size = out.length - 4;
buffer[0] = size & 0xff;
buffer[1] = (size >> 8) & 0xff;
buffer[2] = (size >> 16) & 0xff;
buffer[3] = (size >> 24) & 0xff;
}
// We're done!
return buffer;
}
exports.compress = compress;