UNPKG

@orama/orama

Version:

A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.

123 lines 3.86 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.boundedLevenshtein = boundedLevenshtein; exports.syncBoundedLevenshtein = syncBoundedLevenshtein; exports.levenshtein = levenshtein; /** * Inspired by: * https://github.com/Yomguithereal/talisman/blob/86ae55cbd040ff021d05e282e0e6c71f2dde21f8/src/metrics/levenshtein.js#L218-L340 */ function _boundedLevenshtein(term, word, tolerance) { // Handle base cases if (tolerance < 0) return -1; if (term === word) return 0; const m = term.length; const n = word.length; // Special case for empty strings if (m === 0) return n <= tolerance ? n : -1; if (n === 0) return m <= tolerance ? m : -1; // term = term.toLowerCase() // word = word.toLowerCase() const diff = Math.abs(m - n); // Special case for prefixes // If the searching word starts with the indexed word, return early. if (term.startsWith(word)) { // We just check if the remaining characters are within the tolerance return diff <= tolerance ? diff : -1; } // If the indexed word starts with the searching word, return early. if (word.startsWith(term)) { // any prefixed word is within the tolerance return 0; } // If the length difference is greater than the tolerance, return early if (diff > tolerance) return -1; // Initialize the matrix const matrix = []; for (let i = 0; i <= m; i++) { matrix[i] = [i]; for (let j = 1; j <= n; j++) { matrix[i][j] = i === 0 ? j : 0; } } // Fill the matrix for (let i = 1; i <= m; i++) { let rowMin = Infinity; for (let j = 1; j <= n; j++) { if (term[i - 1] === word[j - 1]) { matrix[i][j] = matrix[i - 1][j - 1]; } else { matrix[i][j] = Math.min(matrix[i - 1][j] + 1, // deletion matrix[i][j - 1] + 1, // insertion matrix[i - 1][j - 1] + 1 // substitution ); } rowMin = Math.min(rowMin, matrix[i][j]); } // Early termination if all values in this row exceed tolerance if (rowMin > tolerance) { return -1; } } return matrix[m][n] <= tolerance ? matrix[m][n] : -1; } /** * Computes the Levenshtein distance between two strings (a, b), returning early with -1 if the distance * is greater than the given tolerance. * It assumes that: * - tolerance >= ||a| - |b|| >= 0 */ function boundedLevenshtein(term, w, tolerance) { const distance = _boundedLevenshtein(term, w, tolerance); return { distance, isBounded: distance >= 0 }; } // This is only used internally, keep in sync with the previous one function syncBoundedLevenshtein(term, w, tolerance) { const distance = _boundedLevenshtein(term, w, tolerance); return { distance, isBounded: distance >= 0 }; } function levenshtein(a, b) { /* c8 ignore next 3 */ if (!a.length) { return b.length; } /* c8 ignore next 3 */ if (!b.length) { return a.length; } const swap = a; if (a.length > b.length) { a = b; b = swap; } const row = Array.from({ length: a.length + 1 }, (_, i) => i); let val = 0; for (let i = 1; i <= b.length; i++) { let prev = i; for (let j = 1; j <= a.length; j++) { if (b[i - 1] === a[j - 1]) { val = row[j - 1]; } else { val = Math.min(row[j - 1] + 1, Math.min(prev + 1, row[j] + 1)); } row[j - 1] = prev; prev = val; } row[a.length] = prev; } return row[a.length]; } //# sourceMappingURL=levenshtein.js.map