UNPKG

align-arr

Version:

Find the minimal edit path between two generic arrays.

141 lines (140 loc) 4.69 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.levenshtein = void 0; const defaultCost = () => 1.0; const defaultEquals = (s, t) => s === t; /** * Computes the minimal edit path (Levenshtein distance) between two arrays. * Returns an array of edit operations (alignment) to transform source into target. * * @template S - Source element type * @template T - Target element type * @param {S[]} s - Source array * @param {T[]} t - Target array * @param {AlignerConfig<S, T>} [config] - Optional configuration for custom costs and equality * @returns {Edit<S, T>[]} The alignment (edit script) */ const levenshtein = (s, t, { delCost = defaultCost, insCost = defaultCost, subCost = defaultCost, equals = defaultEquals, } = {}) => { const sLen = s.length; const tLen = t.length; const matrix = initMatrix(sLen, tLen); for (let i = 0; i < sLen; i++) { for (let j = 0; j < tLen; j++) { const si = s[i]; const ti = t[j]; if (equals(si, ti)) { matrix[i + 1][j + 1].cost = matrix[i][j].cost; matrix[i + 1][j + 1].op = 'equal'; } else { const opCostDel = delCost(si); const costDel = matrix[i][j + 1].cost + opCostDel; const opCostIns = insCost(ti); const costIns = matrix[i + 1][j].cost + opCostIns; const opCostSub = subCost(si, ti); const costSub = matrix[i][j].cost + opCostSub; let minCostOpCost = opCostDel; let minCost = costDel; let minCostOp = 'delete'; if (costIns < costDel) { minCostOp = 'insert'; minCost = costIns; minCostOpCost = opCostIns; } else if (costSub < costDel) { minCostOp = 'substitute'; minCost = costSub; minCostOpCost = opCostSub; } matrix[i + 1][j + 1].op = minCostOp; matrix[i + 1][j + 1].cost = minCost; matrix[i + 1][j + 1].opCost = minCostOpCost; } } } return backtrack(matrix, s, t); }; exports.levenshtein = levenshtein; /** * Initializes the dynamic programming matrix for Levenshtein computation. * @param sLen Source array length * @param tLen Target array length * @returns {Cell[][]} Initialized matrix */ const initMatrix = (sLen, tLen) => { const matrix = [...Array(sLen + 1)].map(() => [...Array(tLen + 1)].map(() => ({ cost: 0.0, opCost: 0.0, }))); // Fill in the edges for (let i = 1; i < sLen + 1; i++) { matrix[i][0].cost = matrix[i - 1][0].cost + 1; matrix[i][0].op = 'delete'; } for (let j = 1; j < tLen + 1; j++) { matrix[0][j].cost = matrix[0][j - 1].cost + 1; matrix[0][j].op = 'insert'; } return matrix; }; /** * Backtracks through the matrix to construct the alignment (edit script). * @param matrix The DP matrix * @param s Source array * @param t Target array * @returns {Edit[]} The alignment (edit script) */ const backtrack = (matrix, s, t) => { let i = matrix.length - 1; let j = matrix[0].length - 1; const sequence = []; // Work backwards from bottom right until we hit top left let iPrev = i, jPrev = j; let sData, tData; while (i + j != 0) { const op = matrix[i][j].op; switch (op) { case 'equal': iPrev -= 1; jPrev -= 1; sData = s[iPrev]; tData = t[jPrev]; break; case 'substitute': iPrev -= 1; jPrev -= 1; sData = s[iPrev]; tData = t[jPrev]; break; case 'delete': iPrev -= 1; jPrev = j; sData = s[iPrev]; tData = undefined; break; case 'insert': iPrev = i; jPrev -= 1; sData = undefined; tData = t[jPrev]; break; default: throw Error('No op found'); } sequence.push({ operation: op, source: { position: iPrev, data: sData, }, target: { position: jPrev, data: tData, }, cost: matrix[i][j].opCost, }); i = iPrev; j = jPrev; } return sequence.reverse(); };