@raven-js/cortex
Version:
Zero-dependency machine learning, AI, and data processing library for modern JavaScript
131 lines (115 loc) • 4.46 kB
JavaScript
/**
* @author Anonyfox <max@anonyfox.com>
* @license MIT
* @see {@link https://github.com/Anonyfox/ravenjs}
* @see {@link https://ravenjs.dev}
* @see {@link https://anonyfox.com}
*/
/**
* @file Hamming distance algorithm implementation.
*
* The Hamming distance is the number of positions at which corresponding
* characters differ between two equal-length strings. It's the simplest
* edit distance metric, requiring only substitutions (no insertions or deletions).
*/
/**
* Calculate the Hamming distance between two equal-length strings.
*
* Hamming distance counts the number of character positions where two
* equal-length strings differ. It only allows substitution operations,
* making it the simplest edit distance metric.
*
* This forms a true metric space, satisfying all metric properties including
* the triangle inequality. It's commonly used for:
* - Error detection and correction codes
* - DNA sequence analysis
* - Fixed-length string comparison
* - Binary data comparison
*
* Time complexity: O(n) where n is string length
* Space complexity: O(1)
*
* @param {string} source - Source string
* @param {string} target - Target string (must be same length as source)
* @param {{caseSensitive?: boolean}} [options={}] - Configuration options
* @returns {number} Hamming distance between the strings
*
* @example
* // Basic usage
* hammingDistance("karolin", "kathrin"); // 3
* hammingDistance("1011101", "1001001"); // 2
*
* // Case insensitive comparison
* hammingDistance("Hello", "HELLO", { caseSensitive: false }); // 0
*
* // DNA sequence comparison
* hammingDistance("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT"); // 7
*
* // Binary strings
* hammingDistance("101010", "010101"); // 6 (all positions differ)
*
* @throws {Error} If strings are not equal length or not strings
*/
export function hammingDistance(source, target, options = {}) {
// Input validation with consistent error patterns
if (typeof source !== "string" || typeof target !== "string") {
throw new Error("Both arguments must be strings");
}
// Hamming distance requires equal-length strings
if (source.length !== target.length) {
throw new Error("Hamming distance requires equal-length strings");
}
// Extract options with defaults
const { caseSensitive = true } = options;
// Apply case folding if needed (following fold-case.js pattern)
const sourceStr = caseSensitive ? source : source.toLowerCase();
const targetStr = caseSensitive ? target : target.toLowerCase();
// Early termination for identical strings
if (sourceStr === targetStr) return 0;
// Count differing positions - simple O(n) scan
let distance = 0;
const length = sourceStr.length;
for (let i = 0; i < length; i++) {
if (sourceStr[i] !== targetStr[i]) {
distance++;
}
}
return distance;
}
/**
* Calculate Hamming similarity score (normalized to 0-1 range).
*
* Similarity = 1 - (distance / string_length)
* where string_length is the length of the equal-length strings.
*
* @param {string} source - Source string
* @param {string} target - Target string (must be same length as source)
* @param {{caseSensitive?: boolean}} [options={}] - Configuration options (same as hammingDistance)
* @returns {number} Similarity score between 0 (completely different) and 1 (identical)
*
* @example
* hammingSimilarity("karolin", "kathrin"); // 0.571... (1 - 3/7)
* hammingSimilarity("hello", "hello"); // 1.0 (identical)
* hammingSimilarity("abc", "xyz"); // 0.0 (completely different)
* hammingSimilarity("1010", "0101"); // 0.0 (all positions differ)
*
* @throws {Error} If strings are not equal length or not strings
*/
export function hammingSimilarity(source, target, options = {}) {
// Input validation
if (typeof source !== "string" || typeof target !== "string") {
throw new Error("Both arguments must be strings");
}
// Hamming similarity requires equal-length strings
if (source.length !== target.length) {
throw new Error("Hamming similarity requires equal-length strings");
}
// Handle identical strings efficiently
const caseSensitive = options.caseSensitive !== false;
const sourceStr = caseSensitive ? source : source.toLowerCase();
const targetStr = caseSensitive ? target : target.toLowerCase();
if (sourceStr === targetStr) return 1.0;
const stringLength = sourceStr.length;
const distance = hammingDistance(source, target, options);
return 1 - distance / stringLength;
}