UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

37 lines 1.86 kB
import { getMfccOptionsForGranularity } from "../alignment/SpeechAlignment.js"; import { computeMFCCs, extendDefaultMfccOptions } from "../dsp/MFCC.js"; import { euclideanDistance13Dim } from "../math/VectorMath.js"; import { Logger } from "../utilities/Logger.js"; export async function searchSpeech(sourceRawAudio, queryRawAudio) { const logger = new Logger(); const granularity = 'low'; const mfccOptions = extendDefaultMfccOptions({ ...getMfccOptionsForGranularity(granularity), zeroFirstCoefficient: true }); logger.start('Compute query MFCC features'); const queryMfccs = await computeMFCCs(queryRawAudio, mfccOptions); logger.start('Compute source MFCC features'); const sourceMfccs = await computeMFCCs(sourceRawAudio, mfccOptions); logger.start('Compute cost matrix'); const costMatrixColumns = computeCostMatrix(sourceMfccs, queryMfccs, euclideanDistance13Dim); logger.start('Search'); const rowCount = sourceMfccs.length; const columnCount = queryMfccs.length; const maxSearchWindow = columnCount * 2; for (let rowStartOffset = 0; rowStartOffset < rowCount; rowStartOffset++) { const rowEndOffset = Math.min(rowStartOffset + maxSearchWindow, rowCount); } } function computeCostMatrix(sequence1, sequence2, costFunction) { const rowCount = sequence1.length; const columnCount = sequence2.length; const costMatrixColumns = []; for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { const column = new Float32Array(rowCount); for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { const cost = costFunction(sequence1[rowIndex], sequence2[columnIndex]); column[columnIndex] = cost; } costMatrixColumns.push(column); } return costMatrixColumns; } //# sourceMappingURL=DTWSpeechSearch.js.map