UNPKG

string-segmenter

Version:

Split a string in to sentences. Supports multiple languages.

3 lines (2 loc) 1.19 kB
"use strict";var f=Object.defineProperty;var r=(e,t)=>f(e,"name",{value:t,configurable:!0});var v=require("module"),y=typeof document<"u"?document.currentScript:null,g=require;function h(e){const t=new Map;function c(n,...s){if(t.has(n))return t.get(n);const i=e(n,...s);return t.set(n,i),i}return r(c,"cachedFn"),c.clear=()=>t.clear(),c.delete=n=>t.delete(n),c}r(h,"cached");function p(e){try{return g(`./abbreviations/${e}.json`)}catch{return[]}}r(p,"fetchAbbreviationsSync");const d=h(e=>({abbreviations:new Set(p(e)),segmenter:new Intl.Segmenter(e,{granularity:"sentence"})}));function*S(e,t="en"){if(!e)return;if(typeof e!="string")throw new TypeError("input must be a string");const{abbreviations:c,segmenter:n}=d(t.toString()),s=/(?<=\s|^)\S+(?=\s+$)/,i=e.replaceAll(/(?<=\.\s+)\S/g,o=>o.toLocaleUpperCase());let a=0;for(const{segment:o,index:m}of n.segment(i)){const u=o.match(s);if(u&&(c.has(u[0].toLocaleLowerCase(t))||u[0].endsWith(")")))continue;const l=m+o.length;yield{segment:e.slice(a,l),index:a,input:e},a=l}}r(S,"splitBySentence");const b=r(()=>void d.clear(),"clearSegmenterCache");exports.clearSegmenterCache=b,exports.splitBySentence=S; //# sourceMappingURL=index.cjs.map