UNPKG

@valoric/whisper-node

Version:

Local audio transcription on CPU. Node.js bindings for OpenAI's Whisper.

24 lines 1.09 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); function parseTranscript(vtt) { // 1. separate lines by matching the format like "[00:03:04.000 --> 00:03:13.000] XXXXXX" const lines = vtt.match(/\[[0-9:.]+\s-->\s[0-9:.]+\].*/g); // 2. convert each line into an object return lines.map(line => { // 2a. split ts from speech let [timestamp, speech] = line.split('] '); // two spaces (3 spaces doesn't work with punctuation like period . ) // 2b. remove the open bracket of timestamp timestamp = timestamp.substring(1); // 2c. split timestamp into begin and end const [start, end] = timestamp.split(' --> '); // 2d. remove \n from speech with regex speech = speech.replace(/\n/g, ''); // 2e. remove beginning space speech = speech.replace(' ', ''); return { start, end, speech }; }) // 3. filter out empty transcriptions .filter(({ speech }) => speech.length); } exports.default = parseTranscript; //# sourceMappingURL=tsToArray.js.map