UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

9 lines (8 loc) 36.7 kB
/** * CmpStr v3.0.1 dev-052fa0c-250614 * This is a lightweight, fast and well performing library for calculating string similarity. * (c) 2023-2025 Paul Köhler @komed3 / MIT License * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr */ !function(t,e){"object"==typeof exports&&"undefined"!=typeof module?e(exports):"function"==typeof define&&define.amd?define(["exports"],e):e((t="undefined"!=typeof globalThis?globalThis:t||self).CmpStr={})}(this,(function(t){"use strict";const e=t=>t.replace(/\[(\d+)]/g,".$1").split(".").map((t=>/^\d+$/.test(t)?+t:t));function s(t,r,i){if(""===r)return i;const[n,...o]=e(r);if(void 0!==t&&("object"!=typeof t||null===t))throw Error(`cannot set property <${n}> of <${JSON.stringify(t)}>`);return Object.assign(t??("number"==typeof n?[]:Object.create(null)),{[n]:s(t?.[n],o.join("."),i)})}function r(t=Object.create(null),e=Object.create(null),s=!1){return Object.keys(e).forEach((i=>{const n=e[i];(s||void 0!==n)&&"__proto__"!==i&&"constructor"!==i&&(t[i]="object"!=typeof n||Array.isArray(n)?n:r("object"!=typeof t[i]||Array.isArray(t[i])?Object.create(null):t[i],n))})),t}class i{static ENV;static instance;store=new Set;totalTime=0;totalMem=0;active;static detectEnv(){"undefined"!=typeof process?i.ENV="nodejs":"undefined"!=typeof performance?i.ENV="browser":i.ENV="unknown"}static getInstance(t){return i.ENV||i.detectEnv(),i.instance||(i.instance=new i(t)),i.instance}constructor(t){this.active=t??!1}now(){switch(i.ENV){case"nodejs":return Number(process.hrtime.bigint())/1e6;case"browser":return performance.now();default:return Date.now()}}mem(){switch(i.ENV){case"nodejs":return process.memoryUsage().heapUsed;case"browser":return performance.memory?.usedJSHeapSize??0;default:return 0}}enable(){this.active=!0}disable(){this.active=!1}clear(){this.store.clear(),this.totalTime=0,this.totalMem=0}run(t,e={}){if(!this.active)return t();const s=this.now(),r=this.mem(),i=t(),n=this.now()-s,o=this.mem()-r;return this.store.add({time:n,mem:o,res:i,meta:e}),this.totalTime+=n,this.totalMem+=o,i}async runAsync(t,e={}){if(!this.active)return await t();const s=this.now(),r=this.mem(),i=await t(),n=this.now()-s,o=this.mem()-r;return this.store.add({time:n,mem:o,res:i,meta:e}),this.totalTime+=n,this.totalMem+=o,i}getAll(){return[...this.store]}getLast(){return this.getAll().pop()}getTotal(){return{time:this.totalTime,mem:this.totalMem}}services={enable:this.enable.bind(this),disable:this.disable.bind(this),clear:this.clear.bind(this),report:this.getAll.bind(this),last:this.getLast.bind(this),total:this.getTotal.bind(this)}}class n{text;words=[];sentences=[];charFrequency=new Map;wordHistogram=new Map;syllableCache=new Map;constructor(t){this.text=t.trim(),this.tokenize(),this.computeFrequencies()}tokenize(){this.words=[],this.sentences=[];const t=this.text,e=/\p{L}+/gu;let s;for(;null!==(s=e.exec(t));)this.words.push(s[0].toLowerCase());this.sentences=t.split(/(?<=[.!?])\s+/).filter(Boolean)}computeFrequencies(){for(const t of this.text)this.charFrequency.set(t,(this.charFrequency.get(t)??0)+1);for(const t of this.words)this.wordHistogram.set(t,(this.wordHistogram.get(t)??0)+1)}estimateSyllables(t){if(this.syllableCache.has(t))return this.syllableCache.get(t);const e=t.toLowerCase().replace(/[^a-zäöüß]/g,"").match(/[aeiouyäöü]+/g),s=e?e.length:1;return this.syllableCache.set(t,s),s}getLength(){return this.text.length}getWordCount(){return this.words.length}getSentenceCount(){return this.sentences.length}getAvgWordLength(){let t=0;for(const e of this.words)t+=e.length;return this.words.length?t/this.words.length:0}getAvgSentenceLength(){return this.sentences.length?this.words.length/this.sentences.length:0}getWordHistogram(){return Object.fromEntries(this.wordHistogram)}getMostCommonWords(t=5){return[...this.wordHistogram.entries()].sort(((t,e)=>e[1]-t[1])).slice(0,t).map((t=>t[0]))}getHapaxLegomena(){return[...this.wordHistogram.entries()].filter((([,t])=>1===t)).map((t=>t[0]))}hasNumbers(){return/\d/.test(this.text)}getUpperCaseRatio(){let t=0,e=0;for(let s=0,r=this.text.length;s<r;s++){const r=this.text[s];/[A-Za-zÄÖÜäöüß]/.test(r)&&(e++,/[A-ZÄÖÜ]/.test(r)&&t++)}return e?t/e:0}getCharFrequency(){return Object.fromEntries(this.charFrequency)}getUnicodeStats(){const t={};for(const[e,s]of this.charFrequency){const r=e.charCodeAt(0).toString(16).padStart(4,"0").toUpperCase();t[r]=(t[r]??0)+s}return t}getLongWordRatio(t=7){let e=0;for(const s of this.words)s.length>=t&&e++;return this.words.length?e/this.words.length:0}getShortWordRatio(t=3){let e=0;for(const s of this.words)s.length<=t&&e++;return this.words.length?e/this.words.length:0}getSyllablesCount(){let t=0;for(const e of this.words)t+=this.estimateSyllables(e);return t}getMonosyllabicWordCount(){let t=0;for(const e of this.words)1===this.estimateSyllables(e)&&t++;return t}getMinSyllablesWordCount(t){let e=0;for(const s of this.words)this.estimateSyllables(s)>=t&&e++;return e}getMaxSyllablesWordCount(t){let e=0;for(const s of this.words)this.estimateSyllables(s)<=t&&e++;return e}getHonoresR(){return 100*Math.log(this.words.length)/(1-this.getHapaxLegomena().length/(this.wordHistogram.size??1))}getReadingTime(t=200){return Math.max(1,this.words.length/(t??1))}getReadabilityScore(t="flesch"){const e=this.words.length||1,s=e/(this.sentences.length||1),r=(this.getSyllablesCount()||1)/e;switch(t){case"flesch":return 206.835-1.015*s-84.6*r;case"fleschde":return 180-s-58.5*r;case"kincaid":return.39*s+11.8*r-15.59}}getLIXScore(){const t=this.words.length||1;return t/(this.sentences.length||1)+this.getLongWordRatio()*t/t*100}getWSTFScore(){const t=this.words.length||1,e=this.getMinSyllablesWordCount(3)/t*100,s=this.getAvgSentenceLength(),r=100*this.getLongWordRatio();return[.1935*e+.1672*s+.1297*r-this.getMonosyllabicWordCount()/t*100*.0327-.875,.2007*e+.1682*s+.1373*r-2.779,.2963*e+.1905*s-1.1144,.2744*e+.2656*s-1.693]}}class o{a;b;options;entries=[];grouped=[];diffRun=!1;constructor(t,e,s={}){this.a=t,this.b=e,this.options={mode:"word",caseInsensitive:!1,contextLines:1,groupedLines:!0,expandLines:!1,showChangeMagnitude:!0,maxMagnitudeSymbols:5,lineBreak:"\n",...s},this.computeDiff()}text2lines(){const t=this.a.trim().split(/\r?\n/),e=this.b.trim().split(/\r?\n/);return{linesA:t,linesB:e,maxLen:Math.max(t.length,e.length)}}tokenize(t){const{mode:e}=this.options;switch(e){case"line":return[t];case"word":return t.split(/\s+/)}}concat(t){const{mode:e}=this.options;return t.join("word"===e?" ":"")}computeDiff(){if(!this.diffRun){const{linesA:t,linesB:e,maxLen:s}=this.text2lines();for(let r=0;r<s;r++){const s=t[r]||"",i=e[r]||"";this.lineDiff(s,i,r)}this.findGroups(),this.diffRun=!0}}lineDiff(t,e,s){const{mode:r,caseInsensitive:i}=this.options,n=Math.max(t.length,e.length);let o=t,a=e;i&&(o=t.toLowerCase(),a=e.toLowerCase());let c=[],h=0,l=0;if("line"===r)o!==a&&(c.push({posA:0,posB:0,del:t,ins:e,size:e.length-t.length}),h=t.length,l=e.length);else{c=this.preciseDiff(t,o,e,a);for(const t of c)h+=t.del.length,l+=t.ins.length}c.length&&this.entries.push({line:s,diffs:c,delSize:h,insSize:l,baseLen:n,totalSize:l-h,magnitude:this.magnitude(h,l,n)})}preciseDiff(t,e,s,r){const i=t=>t.reduce(((e,s,r)=>(e.push(r?e[r-1]+t[r-1].length+1:0),e)),[]),n=this.tokenize(t),o=this.tokenize(s),a=this.tokenize(e),c=this.tokenize(r),h=a.length,l=c.length,u=i(n),p=i(o),d=[];let m=0,f=0;for(;m<h&&f<l;)if(a[m]===c[f]){let t=1;for(;m+t<h&&f+t<l&&a[m+t]===c[f+t];)t++;d.push({ai:m,bi:f,len:t}),m+=t,f+=t}else{let t=!1;for(let e=1;e<=3&&!t;e++)m+e<h&&a[m+e]===c[f]?(d.push({ai:m+e,bi:f,len:1}),m+=e+1,f+=1,t=!0):f+e<l&&a[m]===c[f+e]&&(d.push({ai:m,bi:f+e,len:1}),m+=1,f+=e+1,t=!0);t||(m++,f++)}const g=[];let y=0,w=0;for(const t of d){if(y<t.ai||w<t.bi){const e=n.slice(y,t.ai),s=o.slice(w,t.bi);g.push({posA:u[y]??0,posB:p[w]??0,del:this.concat(e),ins:this.concat(s),size:s.join("").length-e.join("").length})}y=t.ai+t.len,w=t.bi+t.len}if(y<h||w<l){const t=n.slice(y),e=o.slice(w);g.push({posA:u[y]??0,posB:p[w]??0,del:this.concat(t),ins:this.concat(e),size:e.join("").length-t.join("").length})}return g.filter((t=>t.del.length>0||t.ins.length>0))}findGroups(){const{contextLines:t}=this.options,e=(t,e,s)=>{const[r,i,n,o]=["delSize","insSize","totalSize","baseLen"].map((e=>t.reduce(((t,s)=>t+s[e]),0)));this.grouped.push({start:e,end:s,delSize:r,insSize:i,totalSize:n,line:t[0].line,entries:t,magnitude:this.magnitude(r,i,o)})};let s=[],r=0,i=0;for(const n of this.entries){const o=Math.max(0,n.line-t),a=n.line+t;!s.length||o<=i+1?(s.length||(r=o),i=Math.max(i,a),s.push(n)):(e(s,r,i),s=[n],r=o,i=a)}s.length&&e(s,r,i)}magnitude(t,e,s){const{maxMagnitudeSymbols:r}=this.options,i=t+e;if(0===i||0===s)return"";const n=Math.min(r,Math.max(Math.round(i/s*r),1)),o=Math.round(e/i*n),a=n-o;return"+".repeat(o)+"-".repeat(a)}output(t){const{mode:e,contextLines:s,groupedLines:r,expandLines:i,showChangeMagnitude:n,lineBreak:o}=this.options,{linesA:a,linesB:c,maxLen:h}=this.text2lines(),l=Math.max(4,h.toString().length),u=(e,s)=>t?`[${s}m${e}`:e,p=e=>t?`${e}`:`+[${e}]`,d=(t,e,s,r)=>{r&&m(r);for(let r=t;r<=e;r++)f(r,s??r);y.push("")},m=t=>{var e;y.push(`${" ".repeat(l)} ${e=`@@ -${t.line+1},${t.delSize} +${t.line+1},${t.insSize} @@`,u(e,"36")} ${n?(t=>u(t,"33"))(t.magnitude):""}`)},f=(t,e)=>{if(a[t]||c[t]){const r=this.entries.find((e=>e.line===t)),i=(t+1).toString().padStart(l," ");r&&e===t?(y.push(`${i} ${s=`- ${g(a[t],r.diffs,"del")}`,u(s,"31")}`),y.push(`${" ".repeat(l)} ${(t=>u(t,"32"))(`+ ${g(c[t],r.diffs,"ins")}`)}`)):y.push(`${i} ${(t=>u(t,"90"))(a[t])}`)}var s},g=(s,r,i)=>{if(!r.length||"line"===e)return s;let n="",o=0;for(const e of r){const r="del"===i?e.posA:e.posB,c="del"===i?e.del:e.ins;c&&(r>o&&(n+=s.slice(o,r)),n+="del"===i?(a=c,t?`${a}`:`-[${a}]`):p(c),o=r+c.length)}var a;return n+s.slice(o)};let y=[""];switch(!0){case i:d(0,h);break;case r:for(const t of this.grouped)d(t.start,t.end,void 0,t);break;default:for(const t of this.entries)d(t.line-s,t.line+s,t.line,t)}return y.join(o)}getStructuredDiff(){return this.entries}getGroupedDiff(){return this.grouped}getASCIIDiff(){return this.output(!1)}getCLIDiff(){return this.output(!0)}}class a{static FNV_PRIME=16777619;static HASH_OFFSET=2166136261;static fnv1a(t){const e=t.length;let s=this.HASH_OFFSET;const r=Math.floor(e/4);for(let e=0;e<r;e++){const r=4*e;s^=t.charCodeAt(r)|t.charCodeAt(r+1)<<8|t.charCodeAt(r+2)<<16|t.charCodeAt(r+3)<<24,s*=this.FNV_PRIME}const i=e%4;if(i>0){const e=4*r;for(let r=0;r<i;r++)s^=t.charCodeAt(e+r),s*=this.FNV_PRIME}return s^=s>>>16,s*=2246822507,s^=s>>>13,s*=3266489909,s^=s>>>16,s>>>0}}class c{static MAX_LEN=2048;static TABLE_SIZE=1e4;table=new Map;key(t,e,s=!1){for(const t of e)if(t.length>c.MAX_LEN)return!1;const r=e.map((t=>a.fnv1a(t)));return s&&r.sort(),[t,...r].join("-")}has(t){return this.table.has(t)}get(t){return this.table.get(t)}set(t,e,s=!0){return!(!(this.table.size<c.TABLE_SIZE)||!s&&this.table.has(t)||(this.table.set(t,e),0))}delete(t){this.table.delete(t)}clear(){this.table.clear()}size(){return this.table.size}}class h{static pipeline=new Map;static cache=new c;static getPipeline(t){if(h.pipeline.has(t))return h.pipeline.get(t);const e=[];t.includes("d")&&e.push((t=>t.normalize("NFD"))),t.includes("u")&&e.push((t=>t.normalize("NFC"))),t.includes("x")&&e.push((t=>t.normalize("NFKC"))),t.includes("w")&&e.push((t=>t.replace(/\s+/g," "))),t.includes("t")&&e.push((t=>t.trim())),t.includes("r")&&e.push((t=>t.replace(/(.)\1+/g,"$1"))),t.includes("s")&&e.push((t=>t.replace(/[^\p{L}\p{N}\s]/gu,""))),t.includes("k")&&e.push((t=>t.replace(/[^\p{L}]/gu,""))),t.includes("n")&&e.push((t=>t.replace(/\p{N}/gu,""))),t.includes("i")&&e.push((t=>t.toLowerCase()));const s=t=>{let s=t;for(const t of e)s=t(s);return s};return h.pipeline.set(t,s),s}static normalize(t,e){if(Array.isArray(t))return t.map((t=>h.normalize(t,e)));if(!e||"string"!=typeof e||!t)return t;const s=h.cache.key(e,[t]);if(s&&h.cache.has(s))return h.cache.get(s);const r=h.getPipeline(e)(t);return s&&h.cache.set(s,r),r}static async normalizeAsync(t,e){return await(Array.isArray(t)?Promise.all(t.map((t=>h.normalize(t,e)))):Promise.resolve(h.normalize(t,e)))}static clear(){h.pipeline.clear(),h.cache.clear()}}class l{static filters=new Map;static find(t,e){return l.filters.get(t)?.find((t=>t.id===e))}static add(t,e,s,r={}){const{priority:i=10,active:n=!0,overrideable:o=!0}=r,a=l.filters.get(t)??[],c=a.findIndex((t=>t.id===e));if(c>=0){if(!a[c].overrideable)return!1;a.splice(c,1)}return a.push({id:e,fn:s,priority:i,active:n,overrideable:o}),a.sort(((t,e)=>t.priority-e.priority)),l.filters.set(t,a),!0}static remove(t,e){const s=l.filters.get(t);if(!s)return!1;const r=s.findIndex((t=>t.id===e));return r>=0&&(s.splice(r,1),!0)}static pause(t,e){const s=l.find(t,e);return!!s&&(s.active=!1,!0)}static resume(t,e){const s=l.find(t,e);return!!s&&(s.active=!0,!0)}static list(t,e=!1){const s=l.filters.get(t)??[],r=[];for(const t of s)e&&!t.active||r.push(t.id);return r}static apply(t,e){const s=l.filters.get(t);if(!s||s.every((t=>!t.active)))return e;const r=t=>{for(const e of s)e.active&&(t=e.fn(t));return t};return Array.isArray(e)?e.map(r):r(e)}static async applyAsync(t,e){const s=l.filters.get(t);if(!s||s.every((t=>!t.active)))return e;const r=async t=>{for(const e of s)e.active&&(t=await Promise.resolve(e.fn(t)));return t};return Array.isArray(e)?Promise.all(e.map(r)):r(e)}static clear(t){t?l.filters.delete(t):l.filters.clear()}}const u=Object.create(null),p=Object.create(null);function d(t,e){if(t in u||t in p)throw new Error(`registry <${t}> already exists / overwriting is forbidden`);const s=Object.create(null),r={add(r,i,n=!1){if(!(i.prototype instanceof e))throw new TypeError(`class must extend <${t}>`);if(!n&&r in s)throw new Error(`entry <${r}> already exists / use <update=true> to overwrite`);s[r]=i},remove(t){delete s[t]},has:t=>t in s,list:()=>Object.keys(s),get(e){if(!(e in s))throw new Error(`class <${e}> not registered for <${t}>`);return s[e]}};return u[t]=r,p[t]=(e,...s)=>function(t,e,...s){e=function(t,e){if(!(t in u))throw new ReferenceError(`registry <${t}> does not exist`);return"string"==typeof e?u[t]?.get(e):e}(t,e);try{return new e(...s)}catch(t){throw new Error(`cannot instantiate class <${e}>`)}}(t,e,...s),r}const m=i.getInstance();class f{static cache=new c;metric;a;b;origA=[];origB=[];options;symmetric;results;static clear(){this.cache.clear()}static swap(t,e,s,r){return s>r?[e,t,r,s]:[t,e,s,r]}static clamp(t){return Math.max(0,Math.min(1,t))}constructor(t,e,s,r={},i=!1){if(this.metric=t,this.a=Array.isArray(e)?e:[e],this.b=Array.isArray(s)?s:[s],0===this.a.length||0===this.b.length)throw new Error("inputs <a> and <b> must not be empty");this.options=r,this.symmetric=i}preCompute(t,e,s,r){return t===e?{res:1}:0==s||0==r||s<2&&r<2?{res:0}:void 0}compute(t,e,s,r,i){throw new Error("method compute() must be overridden in a subclass")}runSingle(t,e){let s=String(this.a[t]),r=s,i=String(this.b[e]),n=i,o=r.length,a=n.length,c=this.preCompute(r,n,o,a);return c||(c=m.run((()=>{const t=f.cache.key(this.metric,[r,n],this.symmetric);return f.cache.get(t||"")??(()=>{this.symmetric&&([r,n,o,a]=f.swap(r,n,o,a));const e=this.compute(r,n,o,a,Math.max(o,a));return t&&f.cache.set(t,e),e})()}))),{metric:this.metric,a:this.origA[t]??s,b:this.origB[e]??i,...c}}async runSingleAsync(t,e){return Promise.resolve(this.runSingle(t,e))}runBatch(){const t=[];for(let e=0;e<this.a.length;e++)for(let s=0;s<this.b.length;s++)t.push(this.runSingle(e,s));this.results=t}async runBatchAsync(){const t=[];for(let e=0;e<this.a.length;e++)for(let s=0;s<this.b.length;s++)t.push(await this.runSingleAsync(e,s));this.results=t}runPairwise(){const t=[];for(let e=0;e<this.a.length;e++)t.push(this.runSingle(e,e));this.results=t}async runPairwiseAsync(){const t=[];for(let e=0;e<this.a.length;e++)t.push(await this.runSingleAsync(e,e));this.results=t}setOriginal(t,e){return t&&(this.origA=Array.isArray(t)?t:[t]),e&&(this.origB=Array.isArray(e)?e:[e]),this}isBatch(){return this.a.length>1||this.b.length>1}isSingle(){return!this.isBatch()}isPairwise(t=!1){return!(!this.isBatch()||this.a.length!==this.b.length)||!t&&(()=>{throw new Error("mode <pairwise> requires arrays of equal length")})()}isSymmetrical(){return this.symmetric}whichMode(t){return t??this.options?.mode??"default"}clear(){this.results=void 0}run(t,e=!0){switch(e&&this.clear(),this.whichMode(t)){case"default":if(this.isSingle()){this.results=this.runSingle(0,0);break}case"batch":this.runBatch();break;case"single":this.results=this.runSingle(0,0);break;case"pairwise":this.isPairwise()&&this.runPairwise();break;default:throw new Error(`unsupported mode <${t}>`)}}async runAsync(t,e=!0){switch(e&&this.clear(),this.whichMode(t)){case"default":if(this.isSingle()){this.results=await this.runSingleAsync(0,0);break}case"batch":await this.runBatchAsync();break;case"single":this.results=await this.runSingleAsync(0,0);break;case"pairwise":this.isPairwise()&&await this.runPairwiseAsync();break;default:throw new Error(`unsupported async mode <${t}>`)}}getMetricName(){return this.metric}getResults(){if(void 0===this.results)throw new Error("run() must be called before getResult()");return this.results}}const g=d("metric",f);class y{maxSize;buffers=[];pointer=0;constructor(t){this.maxSize=t}acquire(t,e){const s=this.buffers.length;for(let r=0;r<s;r++){const i=(this.pointer+r)%s,n=this.buffers[i];if(n.size>=t)return this.pointer=(i+1)%s,e||n.size===t?n:null}return null}release(t){this.buffers.length<this.maxSize?this.buffers.push(t):(this.buffers[this.pointer]=t,this.pointer=(this.pointer+1)%this.maxSize)}clear(){this.buffers=[],this.pointer=0}}class w{static CONFIG={uint16:{type:"uint16",maxSize:32,maxItemSize:2048,allowOversize:!0},"number[]":{type:"number[]",maxSize:16,maxItemSize:1024,allowOversize:!1},set:{type:"set",maxSize:8,maxItemSize:0,allowOversize:!1},map:{type:"map",maxSize:8,maxItemSize:0,allowOversize:!1}};static POOLS={uint16:new y(32),"number[]":new y(16),set:new y(8),map:new y(8)};static allocate(t,e){switch(t){case"uint16":return new Uint16Array(e);case"number[]":return new Array(e).fill(0);case"set":return new Set;case"map":return new Map}}static acquire(t,e){const s=this.CONFIG[t];if(e>s.maxItemSize)return this.allocate(t,e);const r=this.POOLS[t].acquire(e,s.allowOversize);return r?"uint16"===t?r.buffer.subarray(0,e):r.buffer:this.allocate(t,e)}static acquireMany(t,e){return e.map((e=>this.acquire(t,e)))}static release(t,e,s){s<=this.CONFIG[t].maxItemSize&&this.POOLS[t].release({buffer:e,size:s})}}g.add("cosine",class extends f{constructor(t,e,s={}){super("cosine",t,e,s,!0)}_termFreq(t,e){const s=t.split(e),r=w.acquire("map",s.length);for(const t of s)r.set(t,(r.get(t)||0)+1);return r}compute(t,e){const{delimiter:s=" "}=this.options,r=this._termFreq(t,s),i=this._termFreq(e,s);let n=0,o=0,a=0;for(const[t,e]of r)n+=e*(i.get(t)||0),o+=e*e;for(const t of i.values())a+=t*t;return o=Math.sqrt(o),a=Math.sqrt(a),w.release("map",r,r.size),w.release("map",i,i.size),{res:o&&a?f.clamp(n/(o*a)):0,raw:{dotProduct:n,magnitudeA:o,magnitudeB:a}}}}),g.add("damerau",class extends f{constructor(t,e,s={}){super("damerau",t,e,s,!0)}compute(t,e,s,r,i){const n=s+1,[o,a,c]=w.acquireMany("uint16",[n,n,n]);for(let t=0;t<=s;t++)a[t]=t;for(let i=1;i<=r;i++){c[0]=i;const r=e.charCodeAt(i-1);for(let n=1;n<=s;n++){const s=t.charCodeAt(n-1),h=s===r?0:1;let l=Math.min(c[n-1]+1,a[n]+1,a[n-1]+h);n>1&&i>1&&s===e.charCodeAt(i-2)&&r===t.charCodeAt(n-2)&&(l=Math.min(l,o[n-2]+h)),c[n]=l}o.set(a),a.set(c)}const h=a[s];return w.release("uint16",o,n),w.release("uint16",a,n),w.release("uint16",c,n),{res:0===i?1:f.clamp(1-h/i),raw:{dist:h,maxLen:i}}}}),g.add("dice",class extends f{constructor(t,e,s={}){super("dice",t,e,s,!0)}_bigrams(t){const e=t.length-1,s=w.acquire("set",e);for(let r=0;r<e;r++)s.add(t.substring(r,r+2));return s}compute(t,e){const s=this._bigrams(t),r=this._bigrams(e);let i=0;for(const t of s)r.has(t)&&i++;const n=s.size,o=r.size,a=n+o;return w.release("set",s,n),w.release("set",r,o),{res:0===a?1:f.clamp(2*i/a),raw:{intersection:i,size:a}}}}),g.add("hamming",class extends f{constructor(t,e,s={}){super("hamming",t,e,s,!0)}compute(t,e,s,r,i){if(s!==r){if(void 0===this.options.pad)throw new Error(`strings must be of equal length for Hamming Distance, a=${s} and b=${r} given, use option.pad for automatic adjustment`);s<i&&(t=t.padEnd(i,this.options.pad)),r<i&&(e=e.padEnd(i,this.options.pad)),s=r=i}let n=0;for(let s=0;s<t.length;s++)t[s]!==e[s]&&n++;return{res:0===s?1:f.clamp(1-n/s),raw:{dist:n}}}}),g.add("jaccard",class extends f{constructor(t,e,s={}){super("jaccard",t,e,s,!0)}compute(t,e,s,r){const[i,n]=w.acquireMany("set",[s,r]);for(const e of t)i.add(e);for(const t of e)n.add(t);let o=0;for(const t of i)n.has(t)&&o++;const a=i.size+n.size-o;return w.release("set",i,s),w.release("set",n,r),{res:0===a?1:f.clamp(o/a),raw:{intersection:o,union:a}}}}),g.add("jaroWinkler",class extends f{constructor(t,e,s={}){super("jaro-winkler",t,e,s,!0)}compute(t,e,s,r){const i=Math.max(0,Math.floor(r/2)-1),n=w.acquire("uint16",s),o=w.acquire("uint16",r);for(let t=0;t<s;t++)n[t]=0;for(let t=0;t<r;t++)o[t]=0;let a=0;for(let c=0;c<s;c++){const s=Math.max(0,c-i),h=Math.min(c+i+1,r);for(let r=s;r<h;r++)if(!o[r]&&t[c]===e[r]){n[c]=1,o[r]=1,a++;break}}let c=0,h=0,l=0,u=0;if(a>0){let i=0;for(let r=0;r<s;r++)if(n[r]){for(;!o[i];)i++;t[r]!==e[i]&&c++,i++}c/=2,h=(a/s+a/r+(a-c)/a)/3;for(let i=0;i<Math.min(4,s,r)&&t[i]===e[i];i++)l++;u=h+.1*l*(1-h)}return w.release("uint16",n,s),w.release("uint16",o,r),{res:f.clamp(u),raw:{matchWindow:i,matches:a,transpos:c,jaro:h,prefix:l}}}}),g.add("lcs",class extends f{constructor(t,e,s={}){super("lcs",t,e,s,!0)}compute(t,e,s,r,i){const n=s+1,[o,a]=w.acquireMany("uint16",[n,n]);for(let t=0;t<=s;t++)o[t]=0;for(let i=1;i<=r;i++){a[0]=0;const r=e.charCodeAt(i-1);for(let e=1;e<=s;e++)t.charCodeAt(e-1)===r?a[e]=o[e-1]+1:a[e]=Math.max(o[e],a[e-1]);o.set(a)}const c=o[s];return w.release("uint16",o,n),w.release("uint16",a,n),{res:0===i?1:f.clamp(c/i),raw:{lcs:c,maxLen:i}}}}),g.add("levenshtein",class extends f{constructor(t,e,s={}){super("levenshtein",t,e,s,!0)}compute(t,e,s,r,i){const n=s+1,[o,a]=w.acquireMany("uint16",[n,n]);for(let t=0;t<=s;t++)o[t]=t;for(let i=1;i<=r;i++){a[0]=i;const r=e.charCodeAt(i-1);for(let e=1;e<=s;e++){const s=t.charCodeAt(e-1)===r?0:1;a[e]=Math.min(a[e-1]+1,o[e]+1,o[e-1]+s)}o.set(a)}const c=o[s];return w.release("uint16",o,n),w.release("uint16",a,n),{res:0===i?1:f.clamp(1-c/i),raw:{dist:c,maxLen:i}}}}),g.add("needlemanWunsch",class extends f{constructor(t,e,s={}){super("needlemanWunsch",t,e,s,!0)}compute(t,e,s,r,i){const{match:n=1,mismatch:o=-1,gap:a=-1}=this.options,c=s+1,[h,l]=w.acquireMany("uint16",[c,c]);h[0]=0;for(let t=1;t<=s;t++)h[t]=h[t-1]+a;for(let i=1;i<=r;i++){l[0]=h[0]+a;const r=e.charCodeAt(i-1);for(let e=1;e<=s;e++){const s=t.charCodeAt(e-1)===r?n:o;l[e]=Math.max(h[e-1]+s,h[e]+a,l[e-1]+a)}h.set(l)}const u=h[s];w.release("uint16",h,c),w.release("uint16",l,c);const p=i*n;return{res:0===p?0:f.clamp(u/p),raw:{score:u,denum:p}}}}),g.add("qGram",class extends f{constructor(t,e,s={}){super("qgram",t,e,s,!0)}_qGrams(t,e){const s=Math.max(0,t.length-e+1),r=w.acquire("set",s);for(let i=0;i<s;i++)r.add(t.slice(i,i+e));return r}compute(t,e){const{q:s=2}=this.options,r=this._qGrams(t,s),i=this._qGrams(e,s);let n=0;for(const t of r)i.has(t)&&n++;const o=r.size,a=i.size,c=Math.max(o,a);return w.release("set",r,o),w.release("set",i,a),{res:0===c?1:f.clamp(n/c),raw:{intersection:n,size:c}}}}),g.add("smithWaterman",class extends f{constructor(t,e,s={}){super("smithWaterman",t,e,s,!0)}compute(t,e,s,r){const{match:i=2,mismatch:n=-1,gap:o=-2}=this.options,a=s+1,[c,h]=w.acquireMany("uint16",[a,a]);for(let t=0;t<=s;t++)c[t]=0;let l=0;for(let a=1;a<=r;a++){h[0]=0;const r=e.charCodeAt(a-1);for(let e=1;e<=s;e++){const s=t.charCodeAt(e-1)===r?i:n;h[e]=Math.max(0,c[e-1]+s,c[e]+o,h[e-1]+o),h[e]>l&&(l=h[e])}c.set(h)}w.release("uint16",c,a),w.release("uint16",h,a);const u=Math.min(s*i,r*i);return{res:0===u?0:f.clamp(l/u),raw:{score:l,denum:u}}}});const b=i.getInstance();class x{static cache=new c;static default;algo;options;map;static clear(){this.cache.clear()}constructor(t,e={}){this.options=r(this.constructor.default??{},e);const s=v.get(t,this.options.map);if(void 0===s)throw new Error(`requested mapping <${this.options.map}> is not declared`);this.algo=t,this.map=s}applyRules(t,e,s,r){const{ruleset:i=[]}=this.map;if(!i||!i.length)return;const n=s[e-1]||"",o=s[e-2]||"",a=s[e+1]||"",c=s[e+2]||"";for(const h of i)if((!h.char||h.char===t)&&("start"!==h.position||0===e)&&!("middle"===h.position&&e>0&&e<r)&&("end"!==h.position||e===r)&&(!h.prev||h.prev.includes(n))&&(!h.prevNot||!h.prevNot.includes(n))&&(!h.prev2||h.prev2.includes(o))&&(!h.prev2Not||!h.prev2Not.includes(o))&&(!h.next||h.next.includes(a))&&(!h.nextNot||!h.nextNot.includes(a))&&(!h.next2||h.next2.includes(c))&&(!h.next2Not||!h.next2Not.includes(c))&&(!h.leading||h.leading.includes(s.slice(0,h.leading.length).join("")))&&(!h.trailing||h.trailing.includes(s.slice(-h.trailing.length).join("")))&&(!h.match||h.match.every(((t,r)=>s[e+r]===t))))return h.code}encode(t){const{map:e={},ignore:s=[]}=this.map,r=this.word2Chars(t),i=r.length;let n="",o=null;for(let t=0;t<i;t++){const a=r[t];if(s.includes(a))continue;const c=this.mapChar(a,t,r,i,o,e);if(void 0!==c&&(n+=c,o=c,this.exitEarly(n,t)))break}return this.adjustCode(n,r)}mapChar(t,e,s,r,i,n){const{dedupe:o=!0}=this.options,a=this.applyRules(t,e,s,r)??n[t]??void 0;return o&&a===i?void 0:a}equalLen(t){const{length:e=-1,pad:s="0"}=this.options;return-1===e?t:(t+s.repeat(e)).slice(0,e)}word2Chars(t){return t.toLowerCase().split("")}exitEarly(t,e){const{length:s=-1}=this.options;return s>0&&t.length>=s}adjustCode(t,e){return t}loop(t){const e=[];for(const s of t){const t=x.cache.key(this.algo,[s]),r=x.cache.get(t||"")??(()=>{const e=this.encode(s);return t&&x.cache.set(t,e),e})();r&&r.length&&e.push(this.equalLen(r))}return e}async loopAsync(t){const e=[];for(const s of t){const t=await Promise.resolve(this.encode(s));t&&t.length&&e.push(this.equalLen(t))}return e}getAlgoName(){return this.algo}getIndex(t){const{delimiter:e=" "}=this.options;return b.run((()=>this.loop(t.split(e).filter(Boolean)).filter(Boolean)))}async getIndexAsync(t){const{delimiter:e=" "}=this.options;return(await b.runAsync((async()=>await this.loopAsync(t.split(e).filter(Boolean))))).filter(Boolean)}}const A=d("phonetic",x),v=(()=>{const t=Object.create(null),e=e=>t[e]||=Object.create(null);return{add(t,s,r,i=!1){const n=e(t);if(!i&&s in n)throw new Error(`entry <${s}> already exists / use <update=true> to overwrite`);n[s]=r},remove(t,s){delete e(t)[s]},has:(t,s)=>s in e(t),get:(t,s)=>e(t)[s],list:t=>Object.keys(e(t))}})();A.add("cologne",class extends x{static default={map:"default",delimiter:" ",length:-1,dedupe:!0};constructor(t={}){super("cologne",t)}adjustCode(t){return t.slice(0,1)+t.slice(1).replaceAll("0","")}}),v.add("cologne","default",{map:{a:"0","ä":"0",e:"0",i:"0",j:"0",o:"0","ö":"0",u:"0","ü":"0",y:"0",b:"1",p:"1",d:"2",t:"2",f:"3",v:"3",w:"3",g:"4",k:"4",q:"4",l:"5",m:"6",n:"6",r:"7",c:"8",s:"8","ß":"8",z:"8",x:"48"},ignore:["h"],ruleset:[{char:"p",next:["h"],code:"3"},{char:"c",position:"start",next:["a","h","k","l","o","q","r","u","x"],code:"4"},{char:"c",next:["a","h","k","o","q","u","x"],prevNot:["s","z"],code:"4"},{char:"d",next:["c","s","z"],code:"8"},{char:"t",next:["c","s","z"],code:"8"},{char:"x",prev:["c","k","q"],code:"8"}]}),A.add("metaphone",class extends x{static default={map:"en90",delimiter:" ",length:-1,pad:"",dedupe:!1};constructor(t={}){super("metaphone",t)}encode(t){return t=t.replace(/([A-BD-Z])\1+/gi,((t,e)=>"C"===e?t:e)),super.encode(t)}adjustCode(t){return t.slice(0,1)+t.slice(1).replace(/[AEIOU]/g,"")}}),v.add("metaphone","en90",{map:{a:"A",b:"B",c:"K",d:"T",e:"E",f:"F",g:"K",h:"H",i:"I",j:"J",k:"K",l:"L",m:"M",n:"N",o:"O",p:"P",q:"K",r:"R",s:"S",t:"T",u:"U",v:"F",w:"W",x:"KS",y:"Y",z:"S"},ruleset:[{char:"a",position:"start",next:["e"],code:""},{char:"g",position:"start",next:["n"],code:""},{char:"k",position:"start",next:["n"],code:""},{char:"p",position:"start",next:["n"],code:""},{char:"w",position:"start",next:["r"],code:""},{char:"b",position:"end",prev:["m"],code:""},{char:"c",next:["h"],prevNot:["s"],code:"X"},{char:"c",next:["i"],next2:["a"],code:"X"},{char:"c",next:["e","i","y"],code:"S"},{char:"d",next:["g"],next2:["e","i","y"],code:"J"},{char:"g",next:["h"],next2Not:["","a","e","i","o","u"],code:""},{char:"g",trailing:"n",code:""},{char:"g",trailing:"ned",code:""},{char:"g",next:["e","i","y"],prevNot:["g"],code:"J"},{char:"h",prev:["a","e","i","o","u"],nextNot:["a","e","i","o","u"],code:""},{char:"h",prev:["c","g","p","s","t"],code:""},{char:"k",prev:["c"],code:""},{char:"p",next:["h"],code:"F"},{char:"s",next:["h"],code:"X"},{char:"s",next:["i"],next2:["a","o"],code:"X"},{char:"t",next:["i"],next2:["a","o"],code:"X"},{char:"t",next:["h"],code:"0"},{char:"t",next:["c"],next2:["h"],code:""},{char:"w",nextNot:["a","e","i","o","u"],code:""},{char:"h",leading:"w",code:""},{char:"x",position:"start",code:"S"},{char:"y",nextNot:["a","e","i","o","u"],code:""}]}),A.add("soundex",class extends x{static default={map:"en",delimiter:" ",length:4,pad:"0",dedupe:!0};constructor(t={}){super("soundex",t)}adjustCode(t,e){return e[0].toUpperCase()+t.slice(1).replaceAll("0","")}}),v.add("soundex","en",{map:{a:"0",e:"0",h:"0",i:"0",o:"0",u:"0",w:"0",y:"0",b:"1",f:"1",p:"1",v:"1",c:"2",g:"2",j:"2",k:"2",q:"2",s:"2",x:"2",z:"2",d:"3",t:"3",l:"4",m:"5",n:"5",r:"6"}}),v.add("soundex","de",{map:{a:"0","ä":"0",e:"0",h:"0",i:"0",j:"0",o:"0","ö":"0",u:"0","ü":"0",y:"0",b:"1",f:"1",p:"1",v:"1",w:"1",c:"2",g:"2",k:"2",q:"2",s:"2","ß":"2",x:"2",z:"2",d:"3",t:"3",l:"4",m:"5",n:"5",r:"6"},ruleset:[{char:"c",next:["h"],code:"7"}]});const S=i.getInstance();class z{static filter={add:l.add,remove:l.remove,pause:l.pause,resume:l.resume,list:l.list,clear:l.clear};static metric={add:g.add,remove:g.remove,has:g.has,list:g.list};static phonetic={add:A.add,remove:A.remove,has:A.has,list:A.list,map:{add:v.add,remove:v.remove,has:v.has,list:v.list}};static profiler=S.services;static clearCache={normalizer:h.clear,metric:f.clear,phonetic:x.clear};static analyze(t){return new n(t)}static diff(t,e,s){return new o(t,e,s)}static create(t){return new z(t)}options=Object.create(null);constructor(t){t&&("string"==typeof t?this.setSerializedOptions(t):this.setOptions(t))}assert(t,e){switch(t){case"metric":if(!z.metric.has(e))throw new Error("CmpStr <metric> must be set, call .setMetric(), use CmpStr.metric.list() for available metrics");break;case"phonetic":if(!z.phonetic.has(e))throw new Error("CmpStr <phonetic> must be set, call .setPhonetic(), use CmpStr.phonetic.list() for available phonetic algorithms");break;default:throw new Error(`Cmpstr condition <${t}> unknown`)}}assertMany(...t){for(const[e,s]of t)this.assert(e,s)}resolveOptions(t){return r({...this.options??Object.create(null)},t)}normalize(t,e){return h.normalize(t,e??this.options.flags??"")}filter(t,e){return l.apply(e,t)}prepare(t,e){const{flags:s,processors:r}=e??this.options;return s?.length&&(t=this.normalize(t,s)),t=this.filter(t,"input"),r?.phonetic&&(t=this.index(t,r.phonetic)),t}postProcess(t,e){return e?.removeZero&&Array.isArray(t)&&(t=t.filter((t=>t.res>0))),t}index(t,{algo:e,opt:s}){this.assert("phonetic",e);const r=p.phonetic(e,s),i=s?.delimiter??" ";return Array.isArray(t)?t.map((t=>r.getIndex(t).join(i))):r.getIndex(t).join(i)}compute(t,e,s,r,i,n){const o=this.resolveOptions(s);this.assert("metric",o.metric);const a=n?t:this.prepare(t,o),c=n?e:this.prepare(e,o),h=p.metric(o.metric,a,c,o.opt);"prep"!==o.output&&h.setOriginal(t,e),h.run(r);const l=this.postProcess(h.getResults(),o);return this.output(l,i??o.raw)}output(t,e){return e??this.options.raw?t:Array.isArray(t)?t.map((t=>({source:t.a,target:t.b,match:t.res}))):{source:t.a,target:t.b,match:t.res}}clone(){return Object.assign(Object.create(Object.getPrototypeOf(this)),this)}reset(){for(const t in this.options)delete this.options[t];return this}setOptions(t){return this.options=t,this}mergeOptions(t){return r(this.options,t),this}setSerializedOptions(t){return this.options=JSON.parse(t),this}setOption(t,e){return s(this.options,t,e),this}rmvOption(t){return function(t,s,r=!1){const i=(t,e,s=0)=>{const n=e[s];if(!t||"object"!=typeof t)return!1;if(s===e.length-1)return delete t[n];if(!i(t[n],e,s+1))return!1;if(!r){const e=t[n];"object"==typeof e&&(Array.isArray(e)&&e.every((t=>null==t))||!Array.isArray(e)&&0===Object.keys(e).length)&&delete t[n]}return!0};i(t,e(s))}(this.options,t),this}setRaw(t){return this.setOption("raw",t)}setMetric(t){return this.setOption("metric",t)}setFlags(t){return this.setOption("flags",t)}rmvFlags(){return this.rmvOption("flags")}setProcessors(t){return this.setOption("processors",t)}rmvProcessors(){return this.rmvOption("processors")}getOptions(){return this.options}getSerializedOptions(){return JSON.stringify(this.options)}getOption(t){return function(t,s){return e(s).reduce(((t,e)=>t?.[e]??void 0),t)}(this.options,t)}test(t,e,s){return this.compute(t,e,s,"single")}compare(t,e,s){return this.compute(t,e,s,"single",!0).res}batchTest(t,e,s){return this.compute(t,e,s,"batch")}batchSorted(t,e,s="desc",r){return this.output(this.compute(t,e,r,"batch",!0).sort(((t,e)=>"asc"===s?t.res-e.res:e.res-t.res)),r?.raw??this.options.raw)}pairs(t,e,s){return this.compute(t,e,s,"pairwise")}match(t,e,s,r){return this.output(this.compute(t,e,r,"batch",!0).filter((t=>t.res>=s)).sort(((t,e)=>e.res-t.res)),r?.raw??this.options.raw)}closest(t,e,s=1,r){return this.batchSorted(t,e,"desc",r).slice(0,s)}furthest(t,e,s=1,r){return this.batchSorted(t,e,"asc",r).slice(0,s)}search(t,e,s,r){const i=this.resolveOptions({flags:s,processors:r}),n=this.prepare(t,i),o=this.prepare(e,i);return e.filter(((t,e)=>o[e].includes(n)))}matrix(t,e){return(t=this.prepare(t,this.resolveOptions(e))).map((e=>this.compute(e,t,void 0,"batch",!0,!0).map((t=>t.res??0))))}phoneticIndex(t,e,s){const{algo:r,opt:i}=this.options.processors?.phonetic??{};return this.index(t,{algo:e??r,opt:s??i})}}class C extends z{static create(t){return new C(t)}constructor(t){super(t)}async normalizeAsync(t,e){return h.normalizeAsync(t,e??this.options.flags??"")}async filterAsync(t,e){return l.applyAsync(e,t)}async prepareAsync(t,e){const{flags:s,processors:r}=e??this.options;return s?.length&&(t=await this.normalizeAsync(t,s)),t=await this.filterAsync(t,"input"),r?.phonetic&&(t=await this.indexAsync(t,r.phonetic)),t}async indexAsync(t,{algo:e,opt:s}){this.assert("phonetic",e);const r=p.phonetic(e,s),i=s?.delimiter??" ";return Array.isArray(t)?Promise.all(t.map((t=>r.getIndexAsync(t).then((t=>t.join(i)))))):r.getIndexAsync(t).then((t=>t.join(i)))}async computeAsync(t,e,s,r,i,n){const o=this.resolveOptions(s);this.assert("metric",o.metric);const a=n?t:await this.prepareAsync(t,o),c=n?e:await this.prepareAsync(e,o),h=p.metric(o.metric,a,c,o.opt);"prep"!==o.output&&h.setOriginal(t,e),await h.runAsync(r);const l=this.postProcess(h.getResults(),o);return this.output(l,i??o.raw)}async testAsync(t,e,s){return this.computeAsync(t,e,s,"single")}async compareAsync(t,e,s){return(await this.computeAsync(t,e,s,"single",!0)).res}async batchTestAsync(t,e,s){return this.computeAsync(t,e,s,"batch")}async batchSortedAsync(t,e,s="desc",r){const i=await this.computeAsync(t,e,r,"batch",!0);return this.output(i.sort(((t,e)=>"asc"===s?t.res-e.res:e.res-t.res)),r?.raw??this.options.raw)}async pairsAsync(t,e,s){return this.computeAsync(t,e,s,"pairwise")}async matchAsync(t,e,s,r){const i=await this.computeAsync(t,e,r,"batch",!0);return this.output(i.filter((t=>t.res>=s)).sort(((t,e)=>e.res-t.res)),r?.raw??this.options.raw)}async closestAsync(t,e,s=1,r){return(await this.batchSortedAsync(t,e,"desc",r)).slice(0,s)}async furthestAsync(t,e,s=1,r){return(await this.batchSortedAsync(t,e,"asc",r)).slice(0,s)}async searchAsync(t,e,s,r){const i=this.resolveOptions({flags:s,processors:r}),n=await this.prepareAsync(t,i),o=await this.prepareAsync(e,i);return e.filter(((t,e)=>o[e].includes(n)))}async matrixAsync(t,e){return t=await this.prepareAsync(t,this.resolveOptions(e)),Promise.all(t.map((async e=>await this.computeAsync(e,t,void 0,"batch",!0,!0).then((t=>t.map((t=>t.res??0)))))))}async phoneticIndexAsync(t,e,s){const{algo:r,opt:i}=this.options.processors?.phonetic??{};return this.indexAsync(t,{algo:e??r,opt:s??i})}}t.CmpStr=z,t.CmpStrAsync=C,t.DiffChecker=o,t.Normalizer=h,t.TextAnalyzer=n})); //# sourceMappingURL=CmpStr.umd.min.js.map