UNPKG

@bbc/react-transcript-editor

Version:

A React component to make transcribing audio and video easier and faster.

98 lines (90 loc) 2.71 kB
/** * Convert autoEdit2 Json * * into * ``` const blocks = [ { text: 'Hello', type: 'paragraph', data: { speaker: 'Foo', }, entityRanges: [], }, { text: 'World', type: 'paragraph', data: { speaker: 'Bar', }, entityRanges: [], }, ]; ``` * * See samples folder and test file * for reference data structures */ import generateEntitiesRanges from '../generate-entities-ranges/index.js'; /** * groups words list from kaldi transcript based on punctuation. * @todo To be more accurate, should introduce an honorifics library to do the splitting of the words. * @param {array} words - array of words opbjects from kaldi transcript */ const groupWordsInParagraphs = autoEditText => { const results = []; let paragraph = { words: [], text: [] }; autoEditText.forEach(autoEditparagraph => { autoEditparagraph.paragraph.forEach(autoEditLine => { autoEditLine.line.forEach(word => { // adjusting time reference attributes from //`startTime` `endTime` to `start` `end` // for word object const tmpWord = { text: word.text, start: word.startTime, end: word.endTime }; // if word contains punctuation if (/[.?!]/.test(word.text)) { paragraph.words.push(tmpWord); paragraph.text.push(word.text); results.push(paragraph); // reset paragraph paragraph = { words: [], text: [] }; } else { paragraph.words.push(tmpWord); paragraph.text.push(word.text); } }); }); }); return results; }; const autoEdit2ToDraft = autoEdit2Json => { const results = []; const tmpWords = autoEdit2Json.text; const wordsByParagraphs = groupWordsInParagraphs(tmpWords); // console.log(wordsByParagraphs); wordsByParagraphs.forEach(paragraph => { // console.log(paragraph.words); const draftJsContentBlockParagraph = { text: paragraph.text.join(' '), type: 'paragraph', data: { speaker: 'TBC' }, // the entities as ranges are each word in the space-joined text, // so it needs to be compute for each the offset from the beginning of the paragraph and the length entityRanges: generateEntitiesRanges(paragraph.words, 'text') }; // console.log(JSON.stringify(draftJsContentBlockParagraph,null,2)) results.push(draftJsContentBlockParagraph); }); // console.log(JSON.stringify(results,null,2)) return results; }; export default autoEdit2ToDraft;