cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
174 lines (172 loc) • 5.64 kB
JavaScript
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License
/**
* Hash Table Utility
* src/utils/HashTable.ts
*
* @see https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
* @see https://en.wikipedia.org/wiki/Hash_table
*
* This module implements an instantiable hash table/cache using the FNV-1a hash algorithm.
* It allows for multiple independent caches (e.g. for metrics, normalization, etc.) with
* type safety and high performance. The FNV-1a algorithm is factored out into its own
* static utility class to avoid code duplication and memory overhead.
*
* The key() method supports any number of string arguments, enabling flexible cache keys
* for different use cases (e.g. normalization, metrics, etc.).
*
* @module Utils/HashTable
* @author Paul Köhler (komed3)
* @license MIT
*/
/**
* Hasher Utility
* Static class for FNV-1a hash calculation.
*/
class Hasher {
// Constants for the FNV-1a hash algorithm
static FNV_PRIME = 0x01000193;
static HASH_OFFSET = 0x811c9dc5;
/**
* Computes a hash value for a given string using the FNV-1a algorithm.
* Processes the string in chunks of 4 characters for better performance.
*
* @param {string} str - The string to hash
* @return {number} - The computed hash value as an unsigned 32-bit integer
*/
static fnv1a(str) {
const len = str.length;
let hash = this.HASH_OFFSET;
// Process 4 characters at a time for better performance
const chunks = Math.floor(len / 4);
for (let i = 0; i < chunks; i++) {
const pos = i * 4;
// Combine 4 chars into a single number for faster processing
const chunk =
str.charCodeAt(pos) |
(str.charCodeAt(pos + 1) << 8) |
(str.charCodeAt(pos + 2) << 16) |
(str.charCodeAt(pos + 3) << 24);
hash ^= chunk;
hash *= this.FNV_PRIME;
}
// Handle remaining characters
const remaining = len % 4;
if (remaining > 0) {
const pos = chunks * 4;
for (let i = 0; i < remaining; i++) {
hash ^= str.charCodeAt(pos + i);
hash *= this.FNV_PRIME;
}
}
// Final mixing to improve distribution
hash ^= hash >>> 16;
hash *= 0x85ebca6b;
hash ^= hash >>> 13;
hash *= 0xc2b2ae35;
hash ^= hash >>> 16;
// Convert to unsigned 32-bit integer
return hash >>> 0;
}
}
/**
* HashTable class implements an instantiable hash table/cache.
* Allows for multiple independent caches with type safety and high performance.
*
* @template K - The type of the label for the key (e.g. string, MetricName, …)
* @template T - The type of value to be stored in the hash table (e.g. MetricCompute, string, …)
*/
class HashTable {
// The max. length of a string to hash, which is set to 2048 characters.
static MAX_LEN = 2048;
// The max. size of the hash table, which is set to 10,000.
static TABLE_SIZE = 10_000;
/**
* The internal map to store entries.
* The key is a string generated from the label and any number of hashed strings.
* The value is of type T.
*/
table = new Map();
/**
* Generates a unique hash key for any number of string arguments.
* The key is in the format "label-H1-H2-H3-..."
*
* @param {K} label - Label for this key (e.g. metric name, normalization flags, …)
* @param {string[]} strs - Array of strings to hash (e.g. input, params, …)
* @param {boolean} [sorted=false] - Whether to sort the hashes before creating the key
* @returns {string|false} - A unique hash key or false if any string is too long
*/
key(label, strs, sorted = false) {
// Return false if any string exceeds the maximum length
for (const str of strs) {
if (str.length > HashTable.MAX_LEN) return false;
}
// Hash all strings
const hashes = strs.map((s) => Hasher.fnv1a(s));
// Sort them in ascending order
if (sorted) hashes.sort();
// Build key: label-H1-H2-H3-...
return [label, ...hashes].join('-');
}
/**
* Checks if a key exists in the hash table.
*
* @param {string} key - The key to check
* @returns {boolean} - True if the key exists, false otherwise
*/
has(key) {
return this.table.has(key);
}
/**
* Retrieves the entry from the hash table by its key.
*
* @param {string} key - The key to look up
* @returns {T|undefined} - The entry if found, undefined otherwise
*/
get(key) {
return this.table.get(key);
}
/**
* Adds an entry to the hash table.
*
* @param {string} key - The hashed key for the entry
* @param {T} entry - The entry itself to add
* @param {boolean} [update=true] - Whether to update the entry if it already exists
* @returns {boolean} - True if added successfully, false if the table is full
*/
set(key, entry, update = true) {
// If the table is not full and the key does not exist or update is true, add the entry
if (
this.table.size < HashTable.TABLE_SIZE &&
(update || !this.table.has(key))
) {
this.table.set(key, entry);
return true;
}
return false;
}
/**
* Deletes an entry from the hash table by its key.
*
* @param {string} key - The key of the entry to delete
*/
delete(key) {
this.table.delete(key);
}
/**
* Clears the hash table.
* This method removes all entries from the hash table.
*/
clear() {
this.table.clear();
}
/**
* Returns the current size of the hash table.
*
* @returns {number} - The number of entries in the hash table
*/
size() {
return this.table.size;
}
}
export { HashTable };
//# sourceMappingURL=HashTable.js.map