lz77
Version:
A typescript implementation of lz77, usable for node and browsers.
78 lines (70 loc) • 4.31 kB
text/typescript
import { describe, it, expect } from 'vitest';
import { compress, compressRollingHash, decompress, decompressLegacy, compressHybrid } from '../index';
import { compressLegacy, compressHashTable } from '../index_legacy';
const compressVariants = [
{ fn: compress, name: 'compress (default)' },
{ fn: compressHybrid, name: 'compressHybrid (hash+window)' },
{ fn: compressRollingHash, name: 'compressRollingHash' },
{ fn: compressLegacy, name: 'compressLegacy' },
{ fn: compressHashTable, name: 'compressHashTable' },
];
const decompressVariants = [
{ fn: decompress, name: 'decompress (array)' },
{ fn: decompressLegacy, name: 'decompressLegacy (string-concat)' },
];
describe('LZ77', () => {
const source = "Sanskrit: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥ Sanskrit (standard transcription): kācaṃ śaknomyattum; nopahinasti mām. Classical Greek: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει. Greek (monotonic): Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. Greek (polytonic): Μπορῶ νὰ φάω σπασμένα γυαλιὰ χωρὶς νὰ πάθω τίποτα. Etruscan: (NEEDED) Latin: Vitrum edere possum; mihi non nocet. Old French: Je puis mangier del voirre. Ne me nuit. French: Je peux manger du verre, ça ne me fait pas mal. Provençal / Occitan: Pòdi manjar de veire, me nafrariá pas. Québécois: J'peux manger d'la vitre, ça m'fa pas mal. Walloon: Dji pou magnî do vêre, çoula m' freut nén må. Champenois: (NEEDED) Lorrain: (NEEDED) Picard: Ch'peux mingi du verre, cha m'foé mie n'ma. Corsican/Corsu: (NEEDED) Jèrriais: (NEEDED) Kreyòl Ayisyen (Haitï): Mwen kap manje vè, li pa blese'm.";
for (const c of compressVariants) {
for (const d of decompressVariants) {
it(`Round-trip: ${c.name} then ${d.name} returns original`, () => {
const compressed = c.fn(source);
const decompressed = d.fn(compressed as string);
expect(decompressed).toBe(source);
});
}
}
describe('compress', () => {
it('Fails if a string is not provided', () => {
const test = compress(['a', 'b'] as any);
expect(test).toBe(false);
});
});
describe('decompress', () => {
it('Fails if a string is not provided', () => {
const test = decompress({ list: ['a', 'b'] } as any);
expect(test).toBe(false);
});
});
// Known limitation: compressHash (and similar hash-table methods) may fail this test for certain minStringLength and input patterns. This is kept for documentation purposes.
it.skip('Round-trip: minStringLength=6, sample issue (#3) string', () => {
const settings = { minStringLength: 6 };
const to_compress = "can't read my, can't read my, no he can't read my poker face";
for (const c of compressVariants) {
for (const d of decompressVariants) {
const compressed = c.fn(to_compress, settings);
const decompressed = d.fn(compressed as string, settings);
expect(decompressed, `Failed for compress=${c.name}, decompress=${d.name}\nCompressed: ${compressed}\nDecompressed: ${decompressed}`).toBe(to_compress);
}
}
});
describe('Compression effectiveness', () => {
it('compresses repeated substrings and round-trips correctly', () => {
const settings = { minStringLength: 5 };
const to_compress = "hello hello baby you called I can't hear a thing";
for (const c of compressVariants) {
const compressed = c.fn(to_compress, settings);
// Debug output: log lengths
// console.log(`[${c.name}] original length:`, to_compress.length, 'compressed length:', (compressed as string).length);
// Should be a string and not equal to the original (unless incompressible)
expect(typeof compressed).toBe('string');
// Should not be longer than the input (unless incompressible)
expect((compressed as string).length).toBeLessThanOrEqual(to_compress.length);
// Should always round-trip
for (const d of decompressVariants) {
const decompressed = d.fn(compressed as string, settings);
expect(decompressed).toBe(to_compress);
}
}
});
});
});