UNPKG

@bottlenose/rxtranscribe

Version:

👂 Realtime speech-to-text (S2T) transcription with RxJS

128 lines (116 loc) • 3.42 kB
(function (global, factory) { if (typeof define === "function" && define.amd) { define(["exports", "deepspeech", "rxjs", "rxjs/operators"], factory); } else if (typeof exports !== "undefined") { factory(exports, require("deepspeech"), require("rxjs"), require("rxjs/operators")); } else { var mod = { exports: {} }; factory(mod.exports, global.deepspeech, global.rxjs, global.operators); global.toDeepSpeech = mod.exports; } })(typeof globalThis !== "undefined" ? globalThis : typeof self !== "undefined" ? self : this, function (_exports, _deepspeech, _rxjs, _operators) { "use strict"; Object.defineProperty(_exports, "__esModule", { value: true }); _exports.default = _exports.testExports = void 0; const errors = { missingModelDir: () => new Error('modelDir<String> is required for toDeepSpeech operator') }; const createModel = ({ modelDir, beamWidth = 1024, lmAlpha = 0.75, lmBeta = 1.85, _newModel = dir => new _deepspeech.Model(dir) }) => { const modelPath = `${modelDir}.pbmm`; const scorerPath = `${modelDir}.scorer`; const newModel = _newModel(modelPath); newModel.setBeamWidth(beamWidth); newModel.enableExternalScorer(scorerPath); return newModel; }; const transcribeChunks = ({ model, sampleRate, candidateCount = 1 }) => chunks => { const fullBuffer = Buffer.concat(chunks); const output = model.sttWithMetadata(fullBuffer, candidateCount); return output; }; const transcribe = ({ model, sampleRate = 16000, candidateCount = 1 }) => bufferedChunks$ => bufferedChunks$.pipe((0, _operators.filter)(chunks => !!chunks && chunks.length), (0, _operators.map)(transcribeChunks({ model, sampleRate, candidateCount }))); const transcriptWordReducer = ({ words, nextWord }, { text, start_time }) => text === ' ' ? { words: [...words, { ...nextWord, endTime: start_time }], nextWord: { text: '', startTime: null } } : { words, nextWord: { text: `${nextWord.text}${text}`, startTime: nextWord.startTime || start_time } }; const standardizeOutput = () => deepSpeechMetadata => ({ transcripts: deepSpeechMetadata.transcripts.map(transcript => ({ confidence: transcript.confidence, words: transcript.tokens.reduce(transcriptWordReducer, { words: [], nextWord: { text: '', startTime: null } }).words })) }); const toDeepSpeech = ({ modelDir = process.env.DEEPSPEECH_MODEL_PATH, sampleRate = 16000, bufferSize = 3, candidateCount = 1, rawOutput = false, _createModel = createModel, _transcribe = transcribe } = {}) => { if (!modelDir) return () => (0, _rxjs.throwError)(errors.missingModelDir()); const model = _createModel({ modelDir }); return fileChunk$ => fileChunk$.pipe((0, _operators.bufferCount)(bufferSize), _transcribe({ model, sampleRate, candidateCount }), (0, _operators.map)(rawOutput ? metadata => metadata : standardizeOutput())); }; const testExports = { createModel, standardizeOutput, transcribe, transcribeChunks }; _exports.testExports = testExports; var _default = toDeepSpeech; _exports.default = _default; }); //# sourceMappingURL=toDeepSpeech.js.map