UNPKG

@bbc/react-transcript-editor

Version:

A React component to make transcribing audio and video easier and faster.

bbc/react-transcript-editor

14 lines • 2.51 kB

JavaScript

"use strict";var _index=_interopRequireDefault(require("../generate-entities-ranges/index.js"));Object.defineProperty(exports,"__esModule",{value:!0}),exports.default=void 0;function _interopRequireDefault(obj){return obj&&obj.__esModule?obj:{default:obj}}/** * Convert IBM json to draftJS * see `sample` folder for example of input and output as well as `example-usage.js` * */var ibmToDraft=function(ibmJson){// helper function to normalise IBM words at line level var normalizeTimeStampsToWords=function(timestamps){return timestamps.map(function(ibmWord){return{text:ibmWord[0],start:ibmWord[1],end:ibmWord[2]}})},findSpeakerSegmentForWord=function(word,speakerSegments){var tmpSegment=speakerSegments.find(function(seg){var segStart=seg.from,segEnd=seg.to;return word.start===segStart&&word.end===segEnd});// if find doesn't find any matches it returns an undefined return void 0===tmpSegment?"UKN":"S_".concat(tmpSegment.speaker)},normalisedWords=function normalizeIBMWordsList(ibmResults){var normalisedResults=[];return ibmResults.forEach(function(result){normalisedResults.push(normalizeTimeStampsToWords(result.alternatives[0].timestamps))}),normalisedResults}(ibmJson.results[0].results),ibmNormalisedWordsWithSpeakers=function addSpeakersToWords(ibmWords,ibmSpeakers){return ibmWords.map(function(lines){return lines.map(function(word){return word.speaker=findSpeakerSegmentForWord(word,ibmSpeakers),word})})}(normalisedWords,ibmJson.results[0].speaker_labels),ibmDratJs=function ibmNormalisedWordsToDraftJs(ibmNormalisedWordsWithSpeakers){var draftJsParagraphsResults=[];return ibmNormalisedWordsWithSpeakers.forEach(function(ibmParagraph){var draftJsContentBlockParagraph={text:ibmParagraph.map(function(word){return word.text}).join(" "),type:"paragraph",data:{// Assuming each paragraph in IBM line is the same // for context it just seems like the IBM data structure gives you word level speakers, // but also gives you "lines" so assuming each word in a line has the same speaker. speaker:ibmParagraph[0].speaker,words:ibmParagraph,start:ibmParagraph[0].start},// the entities as ranges are each word in the space-joined text, // so it needs to be compute for each the offset from the beginning of the paragraph and the length entityRanges:(0,_index.default)(ibmParagraph,"text")// wordAttributeName };draftJsParagraphsResults.push(draftJsContentBlockParagraph)}),draftJsParagraphsResults}(ibmNormalisedWordsWithSpeakers);// return ibmDratJs},_default=ibmToDraft;exports.default=_default;