UNPKG

string-segmenter

Version:

Split a string in to sentences. Supports multiple languages.

3 lines (2 loc) 1.16 kB
var f=Object.defineProperty;var r=(e,t)=>f(e,"name",{value:t,configurable:!0});import{createRequire as h}from"module";var d=h(import.meta.url);function b(e){const t=new Map;function c(n,...o){if(t.has(n))return t.get(n);const i=e(n,...o);return t.set(n,i),i}return r(c,"cachedFn"),c.clear=()=>t.clear(),c.delete=n=>t.delete(n),c}r(b,"cached");function p(e){try{return d(`./abbreviations/${e}.json`)}catch{return[]}}r(p,"fetchAbbreviationsSync");const g=b(e=>({abbreviations:new Set(p(e)),segmenter:new Intl.Segmenter(e,{granularity:"sentence"})}));function*S(e,t="en"){if(!e)return;if(typeof e!="string")throw new TypeError("input must be a string");const{abbreviations:c,segmenter:n}=g(t.toString()),o=/(?<=\s|^)\S+(?=\s+$)/,i=e.replaceAll(/(?<=\.\s+)\S/g,s=>s.toLocaleUpperCase());let a=0;for(const{segment:s,index:m}of n.segment(i)){const u=s.match(o);if(u&&(c.has(u[0].toLocaleLowerCase(t))||u[0].endsWith(")")))continue;const l=m+s.length;yield{segment:e.slice(a,l),index:a,input:e},a=l}}r(S,"splitBySentence");const v=r(()=>void g.clear(),"clearSegmenterCache");export{v as clearSegmenterCache,S as splitBySentence}; //# sourceMappingURL=index.mjs.map