lz77
Version:
A typescript implementation of lz77, usable for node and browsers.
196 lines (185 loc) • 7.15 kB
text/typescript
// lz77 - BSD 2-Clause License - Copyright (c) 2024 Weston Houghton
// Legacy LZ77 compress implementation for benchmarking
interface LZ77Settings {
refPrefix: string;
refIntBase: number;
refIntFloorCode: number;
refIntCeilCode?: number;
maxStringDistance?: number;
minStringLength: number;
maxStringLength?: number;
defaultWindow: number;
maxWindow?: number;
windowLength?: number;
}
const defaultSettings: LZ77Settings = {
refPrefix: '`',
refIntBase: 96,
refIntFloorCode: ' '.charCodeAt(0),
refIntCeilCode: undefined,
maxStringDistance: undefined,
minStringLength: 5,
maxStringLength: undefined,
defaultWindow: 144,
maxWindow: undefined,
windowLength: undefined
};
type AnyObject = Record<string, any>;
const each = (obj: any, iterator: (val: any, key: any, obj: any) => void, context?: any): void => {
if (obj === null) return;
if (Array.prototype.forEach && obj.forEach === Array.prototype.forEach) {
obj.forEach(iterator, context);
} else if (obj.length === +obj.length) {
for (let i = 0, l = obj.length; i < l; i++) {
iterator.call(context, obj[i], i, obj);
}
} else {
for (const key in obj) {
if (Object.prototype.hasOwnProperty.call(obj, key)) {
iterator.call(context, obj[key], key, obj);
}
}
}
};
const extend = (obj: AnyObject, ...sources: AnyObject[]): AnyObject => {
each(sources, (source) => {
if (source) {
for (const prop in source) {
obj[prop] = source[prop];
}
}
});
return obj;
};
function setup(params: Partial<LZ77Settings> = {}): LZ77Settings {
const settings = extend({}, defaultSettings, params) as LZ77Settings;
settings.refIntCeilCode = settings.refIntFloorCode + settings.refIntBase - 1;
settings.maxStringDistance = Math.pow(settings.refIntBase, 2) - 1;
settings.maxStringLength = Math.pow(settings.refIntBase, 1) - 1 + settings.minStringLength;
settings.maxWindow = settings.maxStringDistance + settings.minStringLength;
return settings;
}
function encodeRefInt(value: number, width: number, settings: LZ77Settings): string {
if (value >= 0 && value < Math.pow(settings.refIntBase, width) - 1) {
let encoded = '';
while (value > 0) {
encoded = String.fromCharCode((value % settings.refIntBase) + settings.refIntFloorCode) + encoded;
value = Math.floor(value / settings.refIntBase);
}
const missingLength = width - encoded.length;
for (let i = 0; i < missingLength; i++) {
encoded = String.fromCharCode(settings.refIntFloorCode) + encoded;
}
return encoded;
} else {
throw new Error('Reference int out of range: ' + value + ' (width = ' + width + ')');
}
}
function encodeRefLength(length: number, settings: LZ77Settings): string {
return encodeRefInt(length - settings.minStringLength, 1, settings);
}
// Helper: Hash a substring of length minStringLength (for hash-table, non-rolling version)
function hashSubstring(str: string, pos: number, len: number): string {
return str.substr(pos, len);
}
export function compressHashTable(source: string, params?: Partial<LZ77Settings>): string | false {
if (Object.prototype.toString.call(source) !== '[object String]') return false;
const settings = setup(params);
const windowLength = settings.windowLength || settings.defaultWindow;
if (windowLength > (settings.maxWindow as number)) throw new Error('Window length too large');
let compressed = '';
let pos = 0;
const lastPos = source.length - settings.minStringLength;
const hashTable: Map<string, number[]> = new Map();
const minLen = settings.minStringLength;
const maxLen = settings.maxStringLength as number;
while (pos < lastPos) {
const windowStart = Math.max(pos - windowLength, 0);
let bestMatch = { distance: settings.maxStringDistance as number, length: 0 };
let newCompressed: string | null = null;
if (pos + minLen <= source.length) {
const hash = hashSubstring(source, pos, minLen);
const candidates = hashTable.get(hash) || [];
for (let i = candidates.length - 1; i >= 0; i--) {
const candidatePos = candidates[i];
if (candidatePos < windowStart) break;
let matchLength = minLen;
while (
matchLength < maxLen &&
source.charAt(candidatePos + matchLength) === source.charAt(pos + matchLength)
) {
matchLength++;
}
if (matchLength > bestMatch.length) {
bestMatch.distance = pos - candidatePos;
bestMatch.length = matchLength;
}
}
if (!hashTable.has(hash)) hashTable.set(hash, []);
hashTable.get(hash)!.push(pos);
}
if (bestMatch.length) {
newCompressed = settings.refPrefix + encodeRefInt(bestMatch.distance, 2, settings) + encodeRefLength(bestMatch.length, settings);
pos += bestMatch.length;
} else {
if (source.charAt(pos) !== settings.refPrefix) {
newCompressed = source.charAt(pos);
} else {
newCompressed = settings.refPrefix + settings.refPrefix;
}
pos++;
}
compressed += newCompressed;
}
return compressed + source.slice(pos).replace(/`/g, '``');
}
export function compressLegacy(source: string, params?: Partial<LZ77Settings>): string | false {
if (Object.prototype.toString.call(source) !== '[object String]') return false;
const settings = setup(params);
const windowLength = settings.windowLength || settings.defaultWindow;
if (windowLength > (settings.maxWindow as number)) throw new Error('Window length too large');
let compressed = '';
let pos = 0;
const lastPos = source.length - settings.minStringLength;
while (pos < lastPos) {
let searchStart = Math.max(pos - windowLength, 0);
let matchLength = settings.minStringLength;
let foundMatch = false;
let bestMatch = {
distance: settings.maxStringDistance as number,
length: 0
};
let newCompressed: string | null = null;
let isValidMatch: boolean;
let realMatchLength: number;
while ((searchStart + matchLength) < pos) {
isValidMatch = (source.substr(searchStart, matchLength) === source.substr(pos, matchLength)) && (matchLength < (settings.maxStringLength as number));
if (isValidMatch) {
matchLength++;
foundMatch = true;
} else {
realMatchLength = matchLength - 1;
if (foundMatch && (realMatchLength > bestMatch.length)) {
bestMatch.distance = pos - searchStart;
bestMatch.length = realMatchLength;
}
matchLength = settings.minStringLength;
searchStart++;
foundMatch = false;
}
}
if (bestMatch.length) {
newCompressed = settings.refPrefix + encodeRefInt(bestMatch.distance, 2, settings) + encodeRefLength(bestMatch.length, settings);
pos += bestMatch.length;
} else {
if (source.charAt(pos) !== settings.refPrefix) {
newCompressed = source.charAt(pos);
} else {
newCompressed = settings.refPrefix + settings.refPrefix;
}
pos++;
}
compressed += newCompressed;
}
return compressed + source.slice(pos).replace(/`/g, '``');
}