UNPKG

string-punctuation-tokenizer

Version:

Small library that provides functions to tokenize a string into an array of words with or without punctuation

117 lines (87 loc) 14 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.selectionArray = exports.selectionsToRanges = exports.spliceStringOnRanges = void 0; var _occurrences = require("./occurrences"); /** * Splice string into array of ranges, flagging what is selected * @param {String} string - string. * @param {Array} ranges - array of ranges [[int,int],...] * @return {Array} - array of objects [obj,...] */ var spliceStringOnRanges = function spliceStringOnRanges(string, ranges) { var selectionArray = []; // response var remainingString = string; // shift the range since the loop is destructive by working on the remainingString and not original string var rangeShift = 0; // start the range shift at the first character ranges.forEach(function (range) { var firstCharacterPosition = range[0] - rangeShift; // original range start - the rangeShift var beforeSelection = remainingString.slice(0, firstCharacterPosition); // save all the text before the selection if (beforeSelection) { // only add to the array if string isn't empty selectionArray.push({ text: beforeSelection, selected: false }); } var shiftedRangeStart = range[0] - rangeShift; // range start - the rangeShift var shiftedRangeEnd = range[1] + 1 - rangeShift; // range end - rangeShift + 1 to include last character var selection = remainingString.slice(shiftedRangeStart, shiftedRangeEnd); // save the text in the selection var stringBeforeRange = string.slice(0, range[0]); var occurrence = (0, _occurrences.occurrencesInString)(stringBeforeRange, selection) + 1; var occurrences = (0, _occurrences.occurrencesInString)(string, selection); var selectionObject = { text: selection, selected: true, occurrence: occurrence, occurrences: occurrences }; selectionArray.push(selectionObject); // add the selection to the response array // next iteration is using remaining string var lastCharacterPosition = range[1] - rangeShift + 1; // original range end position - the rangeShift + 1 to not include the last range character in the remaining string remainingString = remainingString.slice(lastCharacterPosition); // update the remainingString to after the range // shift the range up to last char of substring (before+sub) rangeShift += beforeSelection.length; // adjust the rangeShift by the length prior to the selection rangeShift += selection.length; // adjust the rangeShift by the length of the selection itself }); if (remainingString) { // only add to the array if string isn't empty selectionArray.push({ text: remainingString, selected: false }); } return selectionArray; }; /** * Converts ranges to array of selection objects * @param {String} string - text used to get the ranges of * @param {Array} selections - array of selections [obj,...] * @return {Array} - array of range objects */ exports.spliceStringOnRanges = spliceStringOnRanges; var selectionsToRanges = function selectionsToRanges(string, selections) { var ranges = []; // response selections.forEach(function (selection) { if (string && string.includes(selection.text)) { // conditions to prevent errors var splitArray = string.split(selection.text); // split the string to get the text between occurrences var beforeSelection = splitArray.slice(0, selection.occurrence); beforeSelection = beforeSelection.join(selection.text); // get the text before the selection to handle multiple occurrences var start = beforeSelection.length; // the start position happens at the length of the string that comes before it var end = start + selection.text.length - 1; // the end position happens at the end of the selection text, but length doesn't account for 0 based position start var range = [start, end]; // new range ranges.push(range); // add the new range } }); return ranges; }; exports.selectionsToRanges = selectionsToRanges; var selectionArray = function selectionArray(string, selections) { var selectionArray = []; var ranges = selectionsToRanges(string, selections); selectionArray = spliceStringOnRanges(string, ranges); return selectionArray; }; exports.selectionArray = selectionArray; //# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"sources":["../src/selectionHelpers.js"],"names":["spliceStringOnRanges","string","ranges","selectionArray","remainingString","rangeShift","forEach","range","firstCharacterPosition","beforeSelection","slice","push","text","selected","shiftedRangeStart","shiftedRangeEnd","selection","stringBeforeRange","occurrence","occurrences","selectionObject","lastCharacterPosition","length","selectionsToRanges","selections","includes","splitArray","split","join","start","end"],"mappings":";;;;;;;AAAA;;AACA;;;;;;AAMO,IAAMA,oBAAoB,GAAG,SAAvBA,oBAAuB,CAACC,MAAD,EAASC,MAAT,EAAoB;AACtD,MAAIC,cAAc,GAAG,EAArB,CADsD,CAC7B;;AACzB,MAAIC,eAAe,GAAGH,MAAtB,CAFsD,CAGtD;;AACA,MAAII,UAAU,GAAG,CAAjB,CAJsD,CAIlC;;AACpBH,EAAAA,MAAM,CAACI,OAAP,CAAe,UAASC,KAAT,EAAgB;AAC7B,QAAMC,sBAAsB,GAAGD,KAAK,CAAC,CAAD,CAAL,GAASF,UAAxC,CAD6B,CACuB;;AACpD,QAAMI,eAAe,GAAGL,eAAe,CAACM,KAAhB,CAAsB,CAAtB,EAAyBF,sBAAzB,CAAxB,CAF6B,CAE6C;;AAC1E,QAAIC,eAAJ,EAAqB;AAAE;AACrBN,MAAAA,cAAc,CAACQ,IAAf,CAAoB;AAACC,QAAAA,IAAI,EAAEH,eAAP;AAAwBI,QAAAA,QAAQ,EAAE;AAAlC,OAApB;AACD;;AACD,QAAMC,iBAAiB,GAAGP,KAAK,CAAC,CAAD,CAAL,GAASF,UAAnC,CAN6B,CAMkB;;AAC/C,QAAMU,eAAe,GAAGR,KAAK,CAAC,CAAD,CAAL,GAAS,CAAT,GAAWF,UAAnC,CAP6B,CAOkB;;AAC/C,QAAMW,SAAS,GAAGZ,eAAe,CAACM,KAAhB,CAAsBI,iBAAtB,EAAyCC,eAAzC,CAAlB,CAR6B,CAQgD;;AAC7E,QAAME,iBAAiB,GAAGhB,MAAM,CAACS,KAAP,CAAa,CAAb,EAAgBH,KAAK,CAAC,CAAD,CAArB,CAA1B;AACA,QAAMW,UAAU,GAAG,sCAAoBD,iBAApB,EAAuCD,SAAvC,IAAoD,CAAvE;AACA,QAAMG,WAAW,GAAG,sCAAoBlB,MAApB,EAA4Be,SAA5B,CAApB;AACA,QAAMI,eAAe,GAAG;AACtBR,MAAAA,IAAI,EAAEI,SADgB;AAEtBH,MAAAA,QAAQ,EAAE,IAFY;AAGtBK,MAAAA,UAAU,EAAEA,UAHU;AAItBC,MAAAA,WAAW,EAAEA;AAJS,KAAxB;AAMAhB,IAAAA,cAAc,CAACQ,IAAf,CAAoBS,eAApB,EAlB6B,CAkBS;AACtC;;AACA,QAAMC,qBAAqB,GAAGd,KAAK,CAAC,CAAD,CAAL,GAASF,UAAT,GAAoB,CAAlD,CApB6B,CAoBwB;;AACrDD,IAAAA,eAAe,GAAGA,eAAe,CAACM,KAAhB,CAAsBW,qBAAtB,CAAlB,CArB6B,CAqBmC;AAChE;;AACAhB,IAAAA,UAAU,IAAII,eAAe,CAACa,MAA9B,CAvB6B,CAuBS;;AACtCjB,IAAAA,UAAU,IAAIW,SAAS,CAACM,MAAxB,CAxB6B,CAwBG;AACjC,GAzBD;;AA0BA,MAAIlB,eAAJ,EAAqB;AAAE;AACrBD,IAAAA,cAAc,CAACQ,IAAf,CAAoB;AAACC,MAAAA,IAAI,EAAER,eAAP;AAAwBS,MAAAA,QAAQ,EAAE;AAAlC,KAApB;AACD;;AACD,SAAOV,cAAP;AACD,CAnCM;AAoCP;;;;;;;;;;AAMO,IAAMoB,kBAAkB,GAAG,SAArBA,kBAAqB,CAACtB,MAAD,EAASuB,UAAT,EAAwB;AACxD,MAAItB,MAAM,GAAG,EAAb,CADwD,CACvC;;AACfsB,EAAAA,UAAU,CAAClB,OAAX,CAAoB,UAACU,SAAD,EAAe;AACjC,QAAIf,MAAM,IAAIA,MAAM,CAACwB,QAAP,CAAgBT,SAAS,CAACJ,IAA1B,CAAd,EAA+C;AAAE;AAC/C,UAAMc,UAAU,GAAGzB,MAAM,CAAC0B,KAAP,CAAaX,SAAS,CAACJ,IAAvB,CAAnB,CAD6C,CACI;;AACjD,UAAIH,eAAe,GAAGiB,UAAU,CAAChB,KAAX,CAAiB,CAAjB,EAAoBM,SAAS,CAACE,UAA9B,CAAtB;AACAT,MAAAA,eAAe,GAAGA,eAAe,CAACmB,IAAhB,CAAqBZ,SAAS,CAACJ,IAA/B,CAAlB,CAH6C,CAGW;;AACxD,UAAMiB,KAAK,GAAGpB,eAAe,CAACa,MAA9B,CAJ6C,CAIP;;AACtC,UAAMQ,GAAG,GAAGD,KAAK,GAAGb,SAAS,CAACJ,IAAV,CAAeU,MAAvB,GAAgC,CAA5C,CAL6C,CAKE;;AAC/C,UAAMf,KAAK,GAAG,CAACsB,KAAD,EAAQC,GAAR,CAAd,CAN6C,CAMjB;;AAC5B5B,MAAAA,MAAM,CAACS,IAAP,CAAYJ,KAAZ,EAP6C,CAOzB;AACrB;AACF,GAVD;AAWF,SAAOL,MAAP;AACD,CAdM;;;;AAgBA,IAAMC,cAAc,GAAG,wBAACF,MAAD,EAASuB,UAAT,EAAwB;AACpD,MAAIrB,cAAc,GAAG,EAArB;AACA,MAAID,MAAM,GAAGqB,kBAAkB,CAACtB,MAAD,EAASuB,UAAT,CAA/B;AACArB,EAAAA,cAAc,GAAGH,oBAAoB,CAACC,MAAD,EAASC,MAAT,CAArC;AACA,SAAOC,cAAP;AACD,CALM","sourcesContent":["import {occurrencesInString} from './occurrences';\n/**\n * Splice string into array of ranges, flagging what is selected\n * @param {String} string - string.\n * @param {Array} ranges - array of ranges [[int,int],...]\n * @return {Array} - array of objects [obj,...]\n */\nexport const spliceStringOnRanges = (string, ranges) => {\n  let selectionArray = []; // response\n  let remainingString = string;\n  // shift the range since the loop is destructive by working on the remainingString and not original string\n  let rangeShift = 0; // start the range shift at the first character\n  ranges.forEach(function(range) {\n    const firstCharacterPosition = range[0]-rangeShift; // original range start - the rangeShift\n    const beforeSelection = remainingString.slice(0, firstCharacterPosition); // save all the text before the selection\n    if (beforeSelection) { // only add to the array if string isn't empty\n      selectionArray.push({text: beforeSelection, selected: false});\n    }\n    const shiftedRangeStart = range[0]-rangeShift; // range start - the rangeShift\n    const shiftedRangeEnd = range[1]+1-rangeShift; // range end - rangeShift + 1 to include last character\n    const selection = remainingString.slice(shiftedRangeStart, shiftedRangeEnd); // save the text in the selection\n    const stringBeforeRange = string.slice(0, range[0]);\n    const occurrence = occurrencesInString(stringBeforeRange, selection) + 1;\n    const occurrences = occurrencesInString(string, selection);\n    const selectionObject = {\n      text: selection,\n      selected: true,\n      occurrence: occurrence,\n      occurrences: occurrences,\n    };\n    selectionArray.push(selectionObject); // add the selection to the response array\n    // next iteration is using remaining string\n    const lastCharacterPosition = range[1]-rangeShift+1; // original range end position - the rangeShift + 1 to not include the last range character in the remaining string\n    remainingString = remainingString.slice(lastCharacterPosition); // update the remainingString to after the range\n    // shift the range up to last char of substring (before+sub)\n    rangeShift += beforeSelection.length; // adjust the rangeShift by the length prior to the selection\n    rangeShift += selection.length; // adjust the rangeShift by the length of the selection itself\n  });\n  if (remainingString) { // only add to the array if string isn't empty\n    selectionArray.push({text: remainingString, selected: false});\n  }\n  return selectionArray;\n};\n/**\n * Converts ranges to array of selection objects\n * @param {String} string - text used to get the ranges of\n * @param {Array} selections - array of selections [obj,...]\n * @return {Array} - array of range objects\n */\nexport const selectionsToRanges = (string, selections) => {\n  let ranges = []; // response\n    selections.forEach( (selection) => {\n      if (string && string.includes(selection.text)) { // conditions to prevent errors\n        const splitArray = string.split(selection.text); // split the string to get the text between occurrences\n        let beforeSelection = splitArray.slice(0, selection.occurrence);\n        beforeSelection = beforeSelection.join(selection.text); // get the text before the selection to handle multiple occurrences\n        const start = beforeSelection.length; // the start position happens at the length of the string that comes before it\n        const end = start + selection.text.length - 1; // the end position happens at the end of the selection text, but length doesn't account for 0 based position start\n        const range = [start, end]; // new range\n        ranges.push(range); // add the new range\n      }\n    });\n  return ranges;\n};\n\nexport const selectionArray = (string, selections) => {\n  let selectionArray = [];\n  let ranges = selectionsToRanges(string, selections);\n  selectionArray = spliceStringOnRanges(string, ranges);\n  return selectionArray;\n};\n"]}