@bbc/react-transcript-editor
Version:
A React component to make transcribing audio and video easier and faster.
98 lines (90 loc) • 2.71 kB
JavaScript
/**
* Convert autoEdit2 Json
*
* into
*
```
const blocks = [
{
text: 'Hello',
type: 'paragraph',
data: {
speaker: 'Foo',
},
entityRanges: [],
},
{
text: 'World',
type: 'paragraph',
data: {
speaker: 'Bar',
},
entityRanges: [],
},
];
```
*
* See samples folder and test file
* for reference data structures
*/
import generateEntitiesRanges from '../generate-entities-ranges/index.js';
/**
* groups words list from kaldi transcript based on punctuation.
* @todo To be more accurate, should introduce an honorifics library to do the splitting of the words.
* @param {array} words - array of words opbjects from kaldi transcript
*/
const groupWordsInParagraphs = autoEditText => {
const results = [];
let paragraph = {
words: [],
text: []
};
autoEditText.forEach(autoEditparagraph => {
autoEditparagraph.paragraph.forEach(autoEditLine => {
autoEditLine.line.forEach(word => {
// adjusting time reference attributes from
//`startTime` `endTime` to `start` `end`
// for word object
const tmpWord = {
text: word.text,
start: word.startTime,
end: word.endTime
}; // if word contains punctuation
if (/[.?!]/.test(word.text)) {
paragraph.words.push(tmpWord);
paragraph.text.push(word.text);
results.push(paragraph); // reset paragraph
paragraph = {
words: [],
text: []
};
} else {
paragraph.words.push(tmpWord);
paragraph.text.push(word.text);
}
});
});
});
return results;
};
const autoEdit2ToDraft = autoEdit2Json => {
const results = [];
const tmpWords = autoEdit2Json.text;
const wordsByParagraphs = groupWordsInParagraphs(tmpWords); // console.log(wordsByParagraphs);
wordsByParagraphs.forEach(paragraph => {
// console.log(paragraph.words);
const draftJsContentBlockParagraph = {
text: paragraph.text.join(' '),
type: 'paragraph',
data: {
speaker: 'TBC'
},
// the entities as ranges are each word in the space-joined text,
// so it needs to be compute for each the offset from the beginning of the paragraph and the length
entityRanges: generateEntitiesRanges(paragraph.words, 'text')
}; // console.log(JSON.stringify(draftJsContentBlockParagraph,null,2))
results.push(draftJsContentBlockParagraph);
}); // console.log(JSON.stringify(results,null,2))
return results;
};
export default autoEdit2ToDraft;