@raven-js/cortex
Version:
Zero-dependency machine learning, AI, and data processing library for modern JavaScript
172 lines (148 loc) • 5.74 kB
JavaScript
/**
* @author Anonyfox <max@anonyfox.com>
* @license MIT
* @see {@link https://github.com/Anonyfox/ravenjs}
* @see {@link https://ravenjs.dev}
* @see {@link https://anonyfox.com}
*/
/**
* @file Optimal String Alignment (OSA) distance algorithm implementation.
*
* The OSA distance allows insertions, deletions, substitutions, and adjacent
* transpositions with the restriction that no substring can be edited more than once.
* This is distinct from the unrestricted Damerau-Levenshtein distance.
*/
/**
* Calculate the Optimal String Alignment (OSA) distance between two strings.
*
* OSA distance allows four edit operations:
* - Insertion of a character
* - Deletion of a character
* - Substitution of a character
* - Transposition of two adjacent characters
*
* The key restriction: no substring can be edited more than once, which means
* the triangle inequality does not hold (unlike true Damerau-Levenshtein).
*
* Time complexity: O(m×n) where m and n are string lengths
* Space complexity: O(m×n)
*
* @param {string} source - Source string
* @param {string} target - Target string
* @param {{maxDistance?: number, caseSensitive?: boolean}} [options={}] - Configuration options
* @returns {number} OSA distance between the strings
*
* @example
* // Basic usage
* osaDistance("kitten", "sitting"); // 3
*
* // Adjacent transposition (OSA allows this in 1 operation)
* osaDistance("ab", "ba"); // 1
*
* // Case insensitive comparison
* osaDistance("Hello", "hello", { caseSensitive: false }); // 0
*
* // Early termination with maximum distance
* osaDistance("very long string", "completely different", { maxDistance: 5 }); // 5
*/
export function osaDistance(source, target, options = {}) {
// Input validation with consistent error patterns
if (typeof source !== "string" || typeof target !== "string") {
throw new Error("Both arguments must be strings");
}
// Extract options with defaults
const { maxDistance = Number.POSITIVE_INFINITY, caseSensitive = true } =
options;
if (
typeof maxDistance !== "number" ||
maxDistance < 0 ||
Number.isNaN(maxDistance)
) {
throw new Error("maxDistance must be a non-negative finite number");
}
// Apply case folding if needed (following fold-case.js pattern)
const sourceStr = caseSensitive ? source : source.toLowerCase();
const targetStr = caseSensitive ? target : target.toLowerCase();
const sourceLen = sourceStr.length;
const targetLen = targetStr.length;
// Early termination optimizations
if (sourceLen === 0) return Math.min(targetLen, maxDistance);
if (targetLen === 0) return Math.min(sourceLen, maxDistance);
// If length difference exceeds maxDistance, impossible to achieve
if (Math.abs(sourceLen - targetLen) >= maxDistance) return maxDistance;
// Initialize DP matrix - using native Arrays for V8 optimization
const matrix = Array.from({ length: sourceLen + 1 }, () =>
Array(targetLen + 1).fill(0),
);
// Initialize first row and column (base cases)
for (let i = 0; i <= sourceLen; i++) {
matrix[i][0] = i;
}
for (let j = 0; j <= targetLen; j++) {
matrix[0][j] = j;
}
// Fill DP matrix with OSA recurrence relation
for (let i = 1; i <= sourceLen; i++) {
let minRowValue = Number.POSITIVE_INFINITY;
for (let j = 1; j <= targetLen; j++) {
// Cost of substitution (0 if characters match, 1 if different)
const substitutionCost = sourceStr[i - 1] === targetStr[j - 1] ? 0 : 1;
// Standard edit operations
let distance = Math.min(
matrix[i - 1][j] + 1, // Deletion
matrix[i][j - 1] + 1, // Insertion
matrix[i - 1][j - 1] + substitutionCost, // Substitution
);
// Adjacent transposition (OSA-specific operation)
if (
i > 1 &&
j > 1 &&
sourceStr[i - 1] === targetStr[j - 2] &&
sourceStr[i - 2] === targetStr[j - 1]
) {
distance = Math.min(distance, matrix[i - 2][j - 2] + substitutionCost);
}
matrix[i][j] = distance;
minRowValue = Math.min(minRowValue, distance);
}
// Early termination: if entire row exceeds maxDistance, no solution within limit
if (minRowValue >= maxDistance) {
return maxDistance;
}
}
// Return final distance, capped by maxDistance
const finalDistance = matrix[sourceLen][targetLen];
return Math.min(finalDistance, maxDistance);
}
/**
* Calculate OSA similarity score (normalized to 0-1 range).
*
* Similarity = 1 - (distance / max_possible_distance)
* where max_possible_distance is the length of the longer string.
*
* @param {string} source - Source string
* @param {string} target - Target string
* @param {{maxDistance?: number, caseSensitive?: boolean}} [options={}] - Configuration options (same as osaDistance)
* @returns {number} Similarity score between 0 (completely different) and 1 (identical)
*
* @example
* osaSimilarity("kitten", "sitting"); // 0.571... (1 - 3/7)
* osaSimilarity("hello", "hello"); // 1.0 (identical)
* osaSimilarity("abc", "xyz"); // 0.0 (completely different)
*/
export function osaSimilarity(source, target, options = {}) {
// Input validation
if (typeof source !== "string" || typeof target !== "string") {
throw new Error("Both arguments must be strings");
}
// Handle identical strings efficiently
const caseSensitive = options.caseSensitive !== false;
const sourceStr = caseSensitive ? source : source.toLowerCase();
const targetStr = caseSensitive ? target : target.toLowerCase();
if (sourceStr === targetStr) return 1.0;
// Handle empty strings (one or both empty)
if (sourceStr.length === 0 || targetStr.length === 0) return 0.0;
const maxLength = Math.max(sourceStr.length, targetStr.length);
const distance = osaDistance(source, target, options);
return Math.max(0, 1 - distance / maxLength);
}