UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

99 lines 3.5 kB
import { logToStderr } from '../utilities/Utilities.js'; const log = logToStderr; export function alignDTW(sequence1, sequence2, costFunction) { if (sequence1.length == 0 || sequence2.length == 0) { return { path: [], pathCost: 0 }; } const rowCount = sequence2.length; const columnCount = sequence1.length; // Compute accumulated cost matrix const accumulatedCostMatrix = computeAccumulatedCostMatrix(sequence1, sequence2, costFunction); // Find best path for the computed matrix const path = computeBestPath(accumulatedCostMatrix); // Best path cost is the bottom right element of the matrix const pathCost = accumulatedCostMatrix[rowCount - 1][columnCount - 1]; return { path, pathCost }; } function computeAccumulatedCostMatrix(sequence1, sequence2, costFunction, deletionEnabled = true) { const rowCount = sequence2.length; const columnCount = sequence1.length; const accumulatedCostMatrix = new Array(rowCount); // Initialize matrix for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { accumulatedCostMatrix[rowIndex] = new Float32Array(columnCount); } for (let i = 1; i < columnCount; i++) { accumulatedCostMatrix[0][i] = Infinity; } for (let i = 1; i < rowCount; i++) { accumulatedCostMatrix[i][0] = Infinity; } // Fill out the matrix, go row by row for (let rowIndex = 1; rowIndex < rowCount; rowIndex++) { const previousRow = accumulatedCostMatrix[rowIndex - 1]; const currentRow = accumulatedCostMatrix[rowIndex]; for (let columnIndex = 1; columnIndex < columnCount; columnIndex++) { const cost = costFunction(sequence1[columnIndex], sequence2[rowIndex]); const up = previousRow[columnIndex]; // insertion const left = currentRow[columnIndex - 1]; // deletion const upAndLeft = previousRow[columnIndex - 1]; // match currentRow[columnIndex] = cost + minimumOf3(up, deletionEnabled ? left : Infinity, upAndLeft); } } return accumulatedCostMatrix; } function computeBestPath(costMatrix) { const rowCount = costMatrix.length; const columnCount = costMatrix[0].length; const bestPath = []; let rowIndex = rowCount - 1; let columnIndex = columnCount - 1; bestPath.push({ source: columnIndex, dest: rowIndex }); while (rowIndex > 0 || columnIndex > 0) { const up = costMatrix[rowIndex - 1][columnIndex]; const left = costMatrix[rowIndex][columnIndex - 1]; const upAndLeft = costMatrix[rowIndex - 1][columnIndex - 1]; const smallestCostDirection = argIndexOfMinimumOf3(up, left, upAndLeft); if (smallestCostDirection == 1) { rowIndex -= 1; } else if (smallestCostDirection == 2) { columnIndex -= 1; } else { rowIndex -= 1; columnIndex -= 1; } bestPath.push({ source: columnIndex, dest: rowIndex }); } return bestPath.reverse(); } function minimumOf3(x1, x2, x3) { if (x1 <= x2 && x1 <= x3) { return x1; } else if (x2 <= x3) { return x2; } else { return x3; } } function argIndexOfMinimumOf3(x1, x2, x3) { if (x1 <= x2 && x1 <= x3) { return 1; } else if (x2 <= x3) { return 2; } else { return 3; } } //# sourceMappingURL=DTWSequenceAlignment.js.map