echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
122 lines (93 loc) • 3.33 kB
text/typescript
import { logToStderr } from '../utilities/Utilities.js'
import { AlignmentPath } from './SpeechAlignment.js'
const log = logToStderr
export function alignDTW<T, U>(sequence1: ArrayLike<T>, sequence2: ArrayLike<U>, costFunction: (a: T, b: U) => number) {
if (sequence1.length == 0 || sequence2.length == 0) {
return { path: [] as AlignmentPath, pathCost: 0 }
}
const rowCount = sequence2.length
const columnCount = sequence1.length
// Compute accumulated cost matrix
const accumulatedCostMatrix = computeAccumulatedCostMatrix(sequence1, sequence2, costFunction)
// Find best path for the computed matrix
const path = computeBestPath(accumulatedCostMatrix)
// Best path cost is the bottom right element of the matrix
const pathCost = accumulatedCostMatrix[rowCount - 1][columnCount - 1]
return { path, pathCost }
}
function computeAccumulatedCostMatrix<T, U>(sequence1: ArrayLike<T>, sequence2: ArrayLike<U>, costFunction: (a: T, b: U) => number, deletionEnabled = true) {
const rowCount = sequence2.length
const columnCount = sequence1.length
const accumulatedCostMatrix: Float32Array[] = new Array<Float32Array>(rowCount)
// Initialize matrix
for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
accumulatedCostMatrix[rowIndex] = new Float32Array(columnCount)
}
for (let i = 1; i < columnCount; i++) {
accumulatedCostMatrix[0][i] = Infinity
}
for (let i = 1; i < rowCount; i++) {
accumulatedCostMatrix[i][0] = Infinity
}
// Fill out the matrix, go row by row
for (let rowIndex = 1; rowIndex < rowCount; rowIndex++) {
const previousRow = accumulatedCostMatrix[rowIndex - 1]
const currentRow = accumulatedCostMatrix[rowIndex]
for (let columnIndex = 1; columnIndex < columnCount; columnIndex++) {
const cost = costFunction(sequence1[columnIndex], sequence2[rowIndex])
const up = previousRow[columnIndex] // insertion
const left = currentRow[columnIndex - 1] // deletion
const upAndLeft = previousRow[columnIndex - 1] // match
currentRow[columnIndex] = cost + minimumOf3(up, deletionEnabled ? left : Infinity, upAndLeft)
}
}
return accumulatedCostMatrix
}
function computeBestPath(costMatrix: Float32Array[]) {
const rowCount = costMatrix.length
const columnCount = costMatrix[0].length
const bestPath: AlignmentPath = []
let rowIndex = rowCount - 1
let columnIndex = columnCount - 1
bestPath.push({
source: columnIndex,
dest: rowIndex
})
while (rowIndex > 0 || columnIndex > 0) {
const up = costMatrix[rowIndex - 1][columnIndex]
const left = costMatrix[rowIndex][columnIndex - 1]
const upAndLeft = costMatrix[rowIndex - 1][columnIndex - 1]
const smallestCostDirection = argIndexOfMinimumOf3(up, left, upAndLeft)
if (smallestCostDirection == 1) {
rowIndex -= 1
} else if (smallestCostDirection == 2) {
columnIndex -= 1
} else {
rowIndex -= 1
columnIndex -= 1
}
bestPath.push({
source: columnIndex,
dest: rowIndex
})
}
return bestPath.reverse() as AlignmentPath
}
function minimumOf3(x1: number, x2: number, x3: number) {
if (x1 <= x2 && x1 <= x3) {
return x1
} else if (x2 <= x3) {
return x2
} else {
return x3
}
}
function argIndexOfMinimumOf3(x1: number, x2: number, x3: number) {
if (x1 <= x2 && x1 <= x3) {
return 1
} else if (x2 <= x3) {
return 2
} else {
return 3
}
}