@bottlenose/rxtranscribe
Version:
👂 Realtime speech-to-text (S2T) transcription with RxJS
128 lines (116 loc) • 3.42 kB
JavaScript
(function (global, factory) {
if (typeof define === "function" && define.amd) {
define(["exports", "deepspeech", "rxjs", "rxjs/operators"], factory);
} else if (typeof exports !== "undefined") {
factory(exports, require("deepspeech"), require("rxjs"), require("rxjs/operators"));
} else {
var mod = {
exports: {}
};
factory(mod.exports, global.deepspeech, global.rxjs, global.operators);
global.toDeepSpeech = mod.exports;
}
})(typeof globalThis !== "undefined" ? globalThis : typeof self !== "undefined" ? self : this, function (_exports, _deepspeech, _rxjs, _operators) {
"use strict";
Object.defineProperty(_exports, "__esModule", {
value: true
});
_exports.default = _exports.testExports = void 0;
const errors = {
missingModelDir: () => new Error('modelDir<String> is required for toDeepSpeech operator')
};
const createModel = ({
modelDir,
beamWidth = 1024,
lmAlpha = 0.75,
lmBeta = 1.85,
_newModel = dir => new _deepspeech.Model(dir)
}) => {
const modelPath = `${modelDir}.pbmm`;
const scorerPath = `${modelDir}.scorer`;
const newModel = _newModel(modelPath);
newModel.setBeamWidth(beamWidth);
newModel.enableExternalScorer(scorerPath);
return newModel;
};
const transcribeChunks = ({
model,
sampleRate,
candidateCount = 1
}) => chunks => {
const fullBuffer = Buffer.concat(chunks);
const output = model.sttWithMetadata(fullBuffer, candidateCount);
return output;
};
const transcribe = ({
model,
sampleRate = 16000,
candidateCount = 1
}) => bufferedChunks$ => bufferedChunks$.pipe((0, _operators.filter)(chunks => !!chunks && chunks.length), (0, _operators.map)(transcribeChunks({
model,
sampleRate,
candidateCount
})));
const transcriptWordReducer = ({
words,
nextWord
}, {
text,
start_time
}) => text === ' ' ? {
words: [...words, { ...nextWord,
endTime: start_time
}],
nextWord: {
text: '',
startTime: null
}
} : {
words,
nextWord: {
text: `${nextWord.text}${text}`,
startTime: nextWord.startTime || start_time
}
};
const standardizeOutput = () => deepSpeechMetadata => ({
transcripts: deepSpeechMetadata.transcripts.map(transcript => ({
confidence: transcript.confidence,
words: transcript.tokens.reduce(transcriptWordReducer, {
words: [],
nextWord: {
text: '',
startTime: null
}
}).words
}))
});
const toDeepSpeech = ({
modelDir = process.env.DEEPSPEECH_MODEL_PATH,
sampleRate = 16000,
bufferSize = 3,
candidateCount = 1,
rawOutput = false,
_createModel = createModel,
_transcribe = transcribe
} = {}) => {
if (!modelDir) return () => (0, _rxjs.throwError)(errors.missingModelDir());
const model = _createModel({
modelDir
});
return fileChunk$ => fileChunk$.pipe((0, _operators.bufferCount)(bufferSize), _transcribe({
model,
sampleRate,
candidateCount
}), (0, _operators.map)(rawOutput ? metadata => metadata : standardizeOutput()));
};
const testExports = {
createModel,
standardizeOutput,
transcribe,
transcribeChunks
};
_exports.testExports = testExports;
var _default = toDeepSpeech;
_exports.default = _default;
});
//# sourceMappingURL=toDeepSpeech.js.map