@bbc/react-transcript-editor
Version:
A React component to make transcribing audio and video easier and faster.
14 lines • 2.51 kB
JavaScript
;var _index=_interopRequireDefault(require("../generate-entities-ranges/index.js"));Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;function _interopRequireDefault(obj){return obj&&obj.__esModule?obj:{default:obj}}/**
* Convert IBM json to draftJS
* see `sample` folder for example of input and output as well as `example-usage.js`
*
*/var ibmToDraft=function(ibmJson){// helper function to normalise IBM words at line level
var normalizeTimeStampsToWords=function(timestamps){return timestamps.map(function(ibmWord){return{text:ibmWord[0],start:ibmWord[1],end:ibmWord[2]}})},findSpeakerSegmentForWord=function(word,speakerSegments){var tmpSegment=speakerSegments.find(function(seg){var segStart=seg.from,segEnd=seg.to;return word.start===segStart&&word.end===segEnd});// if find doesn't find any matches it returns an undefined
return void 0===tmpSegment?"UKN":"S_".concat(tmpSegment.speaker)},normalisedWords=function normalizeIBMWordsList(ibmResults){var normalisedResults=[];return ibmResults.forEach(function(result){normalisedResults.push(normalizeTimeStampsToWords(result.alternatives[0].timestamps))}),normalisedResults}(ibmJson.results[0].results),ibmNormalisedWordsWithSpeakers=function addSpeakersToWords(ibmWords,ibmSpeakers){return ibmWords.map(function(lines){return lines.map(function(word){return word.speaker=findSpeakerSegmentForWord(word,ibmSpeakers),word})})}(normalisedWords,ibmJson.results[0].speaker_labels),ibmDratJs=function ibmNormalisedWordsToDraftJs(ibmNormalisedWordsWithSpeakers){var draftJsParagraphsResults=[];return ibmNormalisedWordsWithSpeakers.forEach(function(ibmParagraph){var draftJsContentBlockParagraph={text:ibmParagraph.map(function(word){return word.text}).join(" "),type:"paragraph",data:{// Assuming each paragraph in IBM line is the same
// for context it just seems like the IBM data structure gives you word level speakers,
// but also gives you "lines" so assuming each word in a line has the same speaker.
speaker:ibmParagraph[0].speaker,words:ibmParagraph,start:ibmParagraph[0].start},// the entities as ranges are each word in the space-joined text,
// so it needs to be compute for each the offset from the beginning of the paragraph and the length
entityRanges:(0,_index.default)(ibmParagraph,"text")// wordAttributeName
};draftJsParagraphsResults.push(draftJsContentBlockParagraph)}),draftJsParagraphsResults}(ibmNormalisedWordsWithSpeakers);//
return ibmDratJs},_default=ibmToDraft;exports.default=_default;