UNPKG

@aiquants/fuzzy-search

Version:

Advanced fuzzy search library with Levenshtein distance, n-gram indexing, and Web Worker support

2 lines 16.8 kB
var u={threshold:.4,caseSensitive:!1,learningWeight:1.2,debounceMs:300,multiTermOperator:"and",autoSearchOnIndexRebuild:!0,customWeights:{},ngramSize:2,minNgramOverlap:1,sortBy:"relevance",sortOrder:"desc",enableIndexFiltering:!0,enableLevenshtein:!0,parallelSearchStrategy:"balanced",indexWorkerOptions:{strategy:"hybrid",threshold:.4,ngramOverlapThreshold:.3,minCandidatesRatio:.1,maxCandidatesRatio:.5,jaroWinklerPrefix:.1,maxResults:1e3,relevanceFieldWeight:.2,relevancePerfectMatchBonus:.1},levenshteinWorkerOptions:{threshold:.3,lengthSimilarityThreshold:.1,partialMatchBonus:.1,lengthDiffPenalty:1,maxResults:1e3,relevanceFieldWeight:.15}};var P="[fuzzy-search]",z=class{constructor(){this.stats={totalSearches:0,totalProcessingTime:0,averageSearchTime:0,lastSearchTime:0,errorCount:0,timestamp:Date.now()};this.detailedStats=new Map;this.performanceHistory=[];this.operationDistribution=new Map;this.fieldStats=new Map;this.metrics={timings:{averageProcessingTime:0,fastestProcessing:1/0,slowestProcessing:0,lastProcessingTime:0},throughput:{operationsPerSecond:0,totalOperations:0},quality:{successRate:1,errorRate:0,timeoutRate:0}}}updateStats(h,r=!0){this.stats.totalSearches++,this.stats.totalProcessingTime+=h,this.stats.averageSearchTime=this.stats.totalProcessingTime/this.stats.totalSearches,this.stats.lastSearchTime=h,this.metrics.timings.lastProcessingTime=h,this.metrics.timings.averageProcessingTime=this.stats.averageSearchTime,this.metrics.timings.fastestProcessing=Math.min(this.metrics.timings.fastestProcessing,h),this.metrics.timings.slowestProcessing=Math.max(this.metrics.timings.slowestProcessing,h),this.metrics.throughput.totalOperations++,this.metrics.timings.averageProcessingTime>0&&(this.metrics.throughput.operationsPerSecond=1e3/this.metrics.timings.averageProcessingTime),r||this.stats.errorCount++;let e=this.metrics.throughput.totalOperations;this.metrics.quality.successRate=(e-this.stats.errorCount)/e,this.metrics.quality.errorRate=this.stats.errorCount/e}getStats(){return{...this.stats}}getMetrics(){return{...this.metrics}}resetStats(){this.stats={totalSearches:0,totalProcessingTime:0,averageSearchTime:0,lastSearchTime:0,errorCount:0,timestamp:Date.now()},this.metrics={timings:{averageProcessingTime:0,fastestProcessing:1/0,slowestProcessing:0,lastProcessingTime:0},throughput:{operationsPerSecond:0,totalOperations:0},quality:{successRate:1,errorRate:0,timeoutRate:0}},this.detailedStats.clear(),this.performanceHistory=[],this.operationDistribution.clear(),this.fieldStats.clear(),console.log(`${P} \u{1F4CA} ${this.getWorkerType()} Worker: Statistics reset completed`)}updateWorkerStats(h,r,e={}){this.updateStats(r,!0);let t=this.operationDistribution.get(h)||0;this.operationDistribution.set(h,t+1),Object.entries(e).forEach(([i,s])=>{this.detailedStats.set(i,s)}),this.addPerformanceEntry({timestamp:Date.now(),operationType:h,processingTime:r,queryLength:e.queryLength||0,itemCount:e.itemCount||0,resultCount:e.resultCount||0,additionalMetrics:e})}updateFieldStats(h,r){let e=this.fieldStats.get(h)||{matchCount:0,averageScore:0,totalScore:0,bestScore:0,worstScore:1};e.matchCount+=1,e.totalScore+=r,e.averageScore=e.totalScore/e.matchCount,e.bestScore=Math.max(e.bestScore,r),e.worstScore=Math.min(e.worstScore,r),this.fieldStats.set(h,e)}addPerformanceEntry(h){this.performanceHistory.push(h),this.performanceHistory.length>100&&(this.performanceHistory=this.performanceHistory.slice(-100))}getWorkerStats(){return{basic:{...this.stats,timestamp:Date.now()},workerSpecific:{workerType:this.getWorkerType(),operationCount:this.getTotalOperations(),operationDistribution:Object.fromEntries(this.operationDistribution),specificMetrics:this.getSpecificMetrics()},analysis:{fieldPerformance:Object.fromEntries(this.fieldStats),performanceInsights:this.generatePerformanceInsights(),recentHistory:this.performanceHistory.slice(-10)}}}sendStatsUpdate(){let h={type:"stats",id:`stats-${Date.now()}`,stats:this.getWorkerStats(),metrics:this.getMetrics(),processingTime:0,workerType:this.getWorkerType(),operationMeta:{}};console.log(`${P} \u{1F4CA} ${this.getWorkerType()} Worker: Sending unified stats update`),self.postMessage(h)}getTotalOperations(){return Array.from(this.operationDistribution.values()).reduce((h,r)=>h+r,0)}generatePerformanceInsights(){if(this.performanceHistory.length===0)return{recentAverageProcessingTime:0,recentAverageResultCount:0,throughputPerSecond:0,totalHistoryEntries:0,mostCommonQueryLength:0,bestPerformingField:"",similarityDistribution:{highSimilarity:0,mediumSimilarity:0,lowSimilarity:0,veryLowSimilarity:0}};let h=this.performanceHistory.slice(-10),r=h.reduce((a,c)=>a+c.processingTime,0)/h.length,e=h.reduce((a,c)=>a+c.resultCount,0)/h.length,t=new Map;this.performanceHistory.forEach(a=>{let c=t.get(a.queryLength)||0;t.set(a.queryLength,c+1)});let i=0,s=0;t.forEach((a,c)=>{a>s&&(s=a,i=c)});let n="",o=0;return this.fieldStats.forEach((a,c)=>{a.averageScore>o&&(o=a.averageScore,n=c)}),{recentAverageProcessingTime:r,recentAverageResultCount:e,throughputPerSecond:r>0?1e3/r:0,totalHistoryEntries:this.performanceHistory.length,mostCommonQueryLength:i,bestPerformingField:n,similarityDistribution:{highSimilarity:0,mediumSimilarity:0,lowSimilarity:0,veryLowSimilarity:0}}}handleMessage(h){console.warn(`${P} \u26A0\uFE0F BaseWorker.handleMessage: No implementation provided`)}};var k="[fuzzy-search]",I=class extends z{constructor(){super();this.ngramCache=new Map;this.wordCache=new Map;this.phoneticCache=new Map;this.currentItems=[];this.currentSearchFields=[];this.indexMetadata=null;this.indexStats={totalIndexBuilds:0,currentDataSize:0,lastIndexTime:0};this.similarityThresholdStats={above80:0,above60:0,above40:0,below40:0};this.setupMessageHandler()}getWorkerType(){return"index"}getSpecificMetrics(){return{totalIndexBuilds:this.indexStats.totalIndexBuilds,currentDataSize:this.indexStats.currentDataSize,lastIndexTime:this.indexStats.lastIndexTime,cacheSize:{ngram:this.ngramCache.size,word:this.wordCache.size,phonetic:this.phoneticCache.size},indexMetadata:this.indexMetadata||{},similarityThresholds:{...this.similarityThresholdStats}}}resetStats(){super.resetStats(),this.indexStats={totalIndexBuilds:0,currentDataSize:0,lastIndexTime:0},this.similarityThresholdStats={above80:0,above60:0,above40:0,below40:0},console.log(`${k} \u{1F527} Index Worker: All statistics reset completed`)}updateSimilarityStats(r){r>=.8?this.similarityThresholdStats.above80++:r>=.6?this.similarityThresholdStats.above60++:r>=.4?this.similarityThresholdStats.above40++:this.similarityThresholdStats.below40++}generatePerformanceInsights(){let r=super.generatePerformanceInsights(),e=this.similarityThresholdStats.above80+this.similarityThresholdStats.above60+this.similarityThresholdStats.above40+this.similarityThresholdStats.below40,t=e>0?{highSimilarity:Math.round(this.similarityThresholdStats.above80/e*100),mediumSimilarity:Math.round(this.similarityThresholdStats.above60/e*100),lowSimilarity:Math.round(this.similarityThresholdStats.above40/e*100),veryLowSimilarity:Math.round(this.similarityThresholdStats.below40/e*100)}:{highSimilarity:0,mediumSimilarity:0,lowSimilarity:0,veryLowSimilarity:0};return{...r,similarityDistribution:t}}setupMessageHandler(){self.onmessage=r=>{this.handleMessage(r)}}handleMessage(r){let e=performance.now(),{data:t}=r;try{switch(t.type){case"ping":self.postMessage({type:"pong",id:t.id});break;case"buildIndex":this.handleBuildIndex(t,e);break;case"indexSearch":this.handleIndexSearch(t,e);break;case"getStats":this.handleGetStats(t,e);break;case"resetStats":this.handleResetStats(t,e);break;default:this.sendError(t.id,`Unknown request type: ${t.type}`,e)}}catch(i){this.sendError(t.id,i instanceof Error?i.message:String(i),e)}}handleBuildIndex(r,e){if(!(r.items&&r.searchFields)){this.sendError(r.id,"Items and searchFields are required for buildIndex",e);return}let{items:t,searchFields:i,options:s}=r,n=s?.ngramSize||u.ngramSize;this.ngramCache.clear(),this.wordCache.clear(),this.phoneticCache.clear(),this.currentItems=t,this.currentSearchFields=i,t.forEach((c,m)=>{i.forEach(S=>{let p=String(c[S]??"");if(!p)return;let l=p.toLowerCase();this.buildNgramIndex(l,m,n),this.buildWordIndex(l,m),this.buildPhoneticIndex(l,m)})});let o=performance.now()-e;this.indexMetadata={itemCount:t.length,buildTime:o,createdAt:Date.now(),indexedFields:[...i],ngramSize:n,strategy:s?.indexWorkerOptions?.strategy||"hybrid",totalIndexSize:this.ngramCache.size+this.wordCache.size+this.phoneticCache.size,indexSizes:{ngram:this.ngramCache.size,word:this.wordCache.size,phonetic:this.phoneticCache.size}},this.indexStats.totalIndexBuilds++,this.indexStats.currentDataSize=t.length,this.indexStats.lastIndexTime=o,this.updateWorkerStats("buildIndex",o,{itemCount:t.length,fieldCount:i.length,ngramSize:n,indexSize:this.ngramCache.size+this.wordCache.size+this.phoneticCache.size,strategy:"hybrid"});let a={type:"indexReady",id:r.id,stats:this.getWorkerStats(),metrics:this.getMetrics(),processingTime:o,workerType:this.getWorkerType(),operationMeta:{itemCount:t.length,indexSize:{total:this.ngramCache.size+this.wordCache.size+this.phoneticCache.size,ngram:this.ngramCache.size,word:this.wordCache.size,phonetic:this.phoneticCache.size},strategy:"hybrid",ngramSize:n}};console.log(`${k} \u{1F527} Index Worker: Built composite index - N-gram: ${this.ngramCache.size}, Word: ${this.wordCache.size}, Phonetic: ${this.phoneticCache.size}`),self.postMessage(a)}handleGetStats(r,e){let t={type:"stats",id:r.id,stats:this.getWorkerStats(),metrics:this.getMetrics(),processingTime:performance.now()-e,workerType:this.getWorkerType(),operationMeta:{totalIndexBuilds:this.indexStats.totalIndexBuilds,currentDataSize:this.indexStats.currentDataSize,indexSizes:{total:this.ngramCache.size+this.wordCache.size+this.phoneticCache.size,ngram:this.ngramCache.size,word:this.wordCache.size,phonetic:this.phoneticCache.size}}};console.log(`${k} \u{1F4CA} Index Worker: Sending stats update`,t.stats),self.postMessage(t)}handleResetStats(r,e){this.resetStats();let t=performance.now()-e,i={type:"resetStats",id:r.id,stats:this.getWorkerStats(),metrics:this.getMetrics(),processingTime:t,workerType:this.getWorkerType(),operationMeta:{resetCompleted:!0,timestamp:Date.now()}};console.log(`${k} \u{1F504} Index Worker: Statistics reset completed`),self.postMessage(i)}handleIndexSearch(r,e){if(!r.query){this.sendError(r.id,"Query is required for indexSearch",e);return}if(this.currentItems.length===0){this.sendError(r.id,"Index is not built. Send buildIndex request first.",e);return}let{query:t,options:i}=r,s=i?.indexWorkerOptions?.strategy||"hybrid",n;switch(s){case"fast":n=this.getFastCandidates(t,i||{});break;case"accurate":n=this.getAccurateCandidates(t,i||{});break;default:n=this.getHybridCandidates(t,i||{});break}let o=this.createSearchResults(Array.from(n),t,i||{}),a=performance.now()-e;this.updateWorkerStats("indexSearch",a,{queryLength:t.length,itemCount:this.currentItems.length,resultCount:o.length,candidateCount:n.size,strategy:s}),o.forEach(m=>{m.matchedFields.forEach(S=>{this.updateFieldStats(S,m.score)})});let c={type:"indexSearchResults",id:r.id,results:o,stats:this.getWorkerStats(),metrics:this.getMetrics(),processingTime:a,workerType:this.getWorkerType(),operationMeta:{candidateCount:n.size,strategy:s,queryLength:t.length,threshold:i?.indexWorkerOptions?.threshold||u.indexWorkerOptions?.threshold||.4}};console.log(`${k} \u{1F50D} Index Worker: Found ${o.length} results for "${t}" (strategy: ${s})`),self.postMessage(c),this.sendStatsUpdate()}buildNgramIndex(r,e,t){this.generateNgrams(r,t).forEach(s=>{this.ngramCache.has(s)||this.ngramCache.set(s,new Set);let n=this.ngramCache.get(s);n&&n.add(e)})}buildWordIndex(r,e){r.split(/\s+/).filter(i=>i.length>0).forEach(i=>{this.wordCache.has(i)||this.wordCache.set(i,new Set);let s=this.wordCache.get(i);s&&s.add(e)})}buildPhoneticIndex(r,e){let t=this.generatePhoneticKey(r);this.phoneticCache.has(t)||this.phoneticCache.set(t,new Set);let i=this.phoneticCache.get(t);i&&i.add(e)}generateNgrams(r,e){let t=new Set,i=r.toLowerCase().trim();for(let s=0;s<=i.length-e;s++)t.add(i.substring(s,s+e));return t}generatePhoneticKey(r){return r.replace(/[ァ-ヶ]/g,e=>String.fromCharCode(e.charCodeAt(0)-96)).replace(/[っゃゅょ]/g,e=>({\u3063:"\u3064",\u3083:"\u3084",\u3085:"\u3086",\u3087:"\u3088"})[e]??e).replace(/ー/g,"").toLowerCase().replace(/\s+/g,"")}getFastCandidates(r,e){let t=new Set,i=e.caseSensitive?r:r.toLowerCase();return i.split(/\s+/).filter(n=>n.length>0).forEach(n=>{let o=this.wordCache.get(n);o&&o.forEach(a=>{t.add(a)})}),t.size===0&&this.generateNgrams(i,e.ngramSize||u.ngramSize).forEach(o=>{let a=this.ngramCache.get(o);a&&a.forEach(c=>{t.add(c)})}),t}getAccurateCandidates(r,e){let t=new Set,i=e.caseSensitive?r:r.toLowerCase(),s=e.ngramSize||u.ngramSize,n=e.minNgramOverlap||u.minNgramOverlap,o=new Map,a=this.generateNgrams(i,s);a.forEach(g=>{let b=this.ngramCache.get(g);b&&b.forEach(d=>{o.set(d,(o.get(d)||0)+1)})}),i.split(/\s+/).filter(g=>g.length>0).forEach(g=>{let b=this.wordCache.get(g);b&&b.forEach(d=>{t.add(d)})});let m=this.generatePhoneticKey(i),S=this.phoneticCache.get(m);S&&S.forEach(g=>{t.add(g)});let p=e.indexWorkerOptions?.ngramOverlapThreshold||u.indexWorkerOptions?.ngramOverlapThreshold||.3,l=Math.max(n,a.size*p);return o.forEach((g,b)=>{g>=l&&t.add(b)}),t}getHybridCandidates(r,e){let t=this.getFastCandidates(r,e),i=e.indexWorkerOptions?.minCandidatesRatio||u.indexWorkerOptions?.minCandidatesRatio||.1;t.size<Math.min(this.currentItems.length*i,10)&&this.getAccurateCandidates(r,e).forEach(o=>{t.add(o)});let s=e.indexWorkerOptions?.maxCandidatesRatio||u.indexWorkerOptions?.maxCandidatesRatio||.5;return t.size>this.currentItems.length*s&&(t=this.filterByNgramOverlap(r,t,e.ngramSize||u.ngramSize,e)),t}filterByNgramOverlap(r,e,t,i){let s=new Set,n=this.generateNgrams(r.toLowerCase(),t),o=i?.indexWorkerOptions?.ngramOverlapThreshold||u.indexWorkerOptions?.ngramOverlapThreshold||.3,a=Math.max(1,n.size*o);return e.forEach(c=>{let m=0;n.forEach(S=>{this.ngramCache.get(S)?.has(c)&&m++}),m>=a&&s.add(c)}),s}createSearchResults(r,e,t){let i=[],s=t.caseSensitive?e:e.toLowerCase(),n=this.getQueryTokens(e,t),a=this.getMultiTermOperator(t)==="and"&&n.length>1,c=t.sortBy||u.sortBy,m=c==="relevance",S=m?t.indexWorkerOptions?.relevanceFieldWeight??u.indexWorkerOptions?.relevanceFieldWeight??.2:0,p=m?t.indexWorkerOptions?.relevancePerfectMatchBonus??u.indexWorkerOptions?.relevancePerfectMatchBonus??.1:0;r.forEach(d=>{if(d>=this.currentItems.length)return;let f=this.currentItems[d],x=0,y=0,T=[],M=t.indexWorkerOptions?.threshold||u.indexWorkerOptions?.threshold||.4;if(a&&!this.itemMatchesTokens(f,n,t))return;for(let C of this.currentSearchFields){let W=String(f[C]??"");if(!W)continue;let O=t.caseSensitive?W:W.toLowerCase(),v=this.calculateJaroWinkler(s,O,t);v>0&&T.push(C),v>y&&(y=v),v>x&&(x=v)}let w=y;if(m&&T.length>0){let C=y>=.99?p:0;w=y+T.length*S+C}w>=M&&(this.updateSimilarityStats(w),i.push({item:f,score:w,baseScore:y,matchedFields:T,originalIndex:d}))});let l=t.sortOrder||u.sortOrder,g;c==="relevance"?g=i.sort((d,f)=>l==="asc"?d.score-f.score:f.score-d.score):c==="original"?g=i.sort((d,f)=>{let x=d.originalIndex??0,y=f.originalIndex??0;return l==="asc"?x-y:y-x}):g=i.sort((d,f)=>l==="asc"?d.score-f.score:f.score-d.score);let b=t.indexWorkerOptions?.maxResults;return typeof b=="number"?g.slice(0,b):g}getMultiTermOperator(r){return r.multiTermOperator??u.multiTermOperator??"or"}getQueryTokens(r,e){return(e.caseSensitive?r:r.toLowerCase()).split(/\s+/).filter(i=>i.length>0)}itemMatchesTokens(r,e,t){if(e.length===0)return!0;let i=new Set(e);for(let s of this.currentSearchFields){if(i.size===0)break;let n=String(r[s]??"");if(!n)continue;let o=t.caseSensitive?n:n.toLowerCase();for(let a of e)if(i.has(a)&&o.includes(a)&&(i.delete(a),i.size===0))break}return i.size===0}calculateJaroWinkler(r,e,t){let i=0,s=Math.floor(Math.max(r.length,e.length)/2)-1,n=new Array(r.length).fill(!1),o=new Array(e.length).fill(!1);for(let l=0;l<r.length;l++){let g=Math.max(0,l-s),b=Math.min(l+s+1,e.length);for(let d=g;d<b;d++)if(!o[d]&&r[l]===e[d]){n[l]=!0,o[d]=!0,i++;break}}if(i===0)return 0;let a=0,c=0;for(let l=0;l<r.length;l++)if(n[l]){for(;!o[c];)c++;r[l]!==e[c]&&a++,c++}let m=(i/r.length+i/e.length+(i-a/2)/i)/3,S=t?.indexWorkerOptions?.jaroWinklerPrefix||u.indexWorkerOptions?.jaroWinklerPrefix||.1,p=0;for(;p<4&&r[p]===e[p];)p++;return m+p*S*(1-m)}sendError(r,e,t){let i=performance.now()-t;this.updateWorkerStats("error",i,{errorMessage:e,hasError:!0});let s={type:"error",id:r,error:e,stats:this.getWorkerStats(),metrics:this.getMetrics(),processingTime:i,workerType:this.getWorkerType()};self.postMessage(s)}};new I; //# sourceMappingURL=indexWorker.mjs.map