UNPKG

slimsearch

Version:

Tiny but powerful full-text search engine for browser and Node

3 lines (2 loc) 17.1 kB
"use strict";const xt="ENTRIES",B="KEYS",G="VALUES",g="";class V{set;_type;_path;constructor(e,n){const o=e._tree,s=Array.from(o.keys());this.set=e,this._type=n,this._path=s.length>0?[{node:o,keys:s}]:[]}next(){const e=this.dive();return this.backtrack(),e}dive(){if(this._path.length===0)return{done:!0,value:void 0};const{node:e,keys:n}=S(this._path);if(S(n)===g)return{done:!1,value:this.result()};const o=e.get(S(n));return this._path.push({node:o,keys:Array.from(o.keys())}),this.dive()}backtrack(){if(this._path.length===0)return;const e=S(this._path).keys;e.pop(),!(e.length>0)&&(this._path.pop(),this.backtrack())}key(){return this.set._prefix+this._path.map(({keys:e})=>S(e)).filter(e=>e!==g).join("")}value(){return S(this._path).node.get(g)}result(){switch(this._type){case G:return this.value();case B:return this.key();default:return[this.key(),this.value()]}}[Symbol.iterator](){return this}}const S=t=>t[t.length-1],St=(t,e,n)=>{const o=new Map;if(typeof e!="string")return o;const s=e.length+1,r=s+n,i=new Uint8Array(r*s).fill(n+1);for(let c=0;c<s;++c)i[c]=c;for(let c=1;c<r;++c)i[c*s]=c;return K(t,e,n,o,i,1,s,""),o},K=(t,e,n,o,s,r,i,c)=>{const d=r*i;t:for(const u of t.keys())if(u===g){const a=s[d-1];a<=n&&o.set(c,[t.get(u),a])}else{let a=r;for(let h=0;h<u.length;++h,++a){const f=u[h],_=i*a,p=_-i;let l=s[_];const m=Math.max(0,a-n-1),y=Math.min(i-1,a+n);for(let w=m;w<y;++w){const C=f!==e[w],O=s[p+w]+ +C,v=s[p+w+1]+1,x=s[_+w]+1,z=s[_+w+1]=Math.min(O,v,x);z<l&&(l=z)}if(l>n)continue t}K(t.get(u),e,n,o,s,a,i,c+u)}};class I{_tree;_prefix;_size=void 0;constructor(e=new Map,n=""){this._tree=e,this._prefix=n}atPrefix(e){if(!e.startsWith(this._prefix))throw new Error("Mismatched prefix");const[n,o]=b(this._tree,e.slice(this._prefix.length));if(n===void 0){const[s,r]=D(o);for(const i of s.keys())if(i!==g&&i.startsWith(r)){const c=new Map;return c.set(i.slice(r.length),s.get(i)),new I(c,e)}}return new I(n,e)}clear(){this._size=void 0,this._tree.clear()}delete(e){return this._size=void 0,zt(this._tree,e)}entries(){return new V(this,xt)}forEach(e){for(const[n,o]of this)e(n,o,this)}fuzzyGet(e,n){return St(this._tree,e,n)}get(e){const n=T(this._tree,e);return n!==void 0?n.get(g):void 0}has(e){return T(this._tree,e)?.has(g)??!1}keys(){return new V(this,B)}set(e,n){if(typeof e!="string")throw new Error("key must be a string");return this._size=void 0,M(this._tree,e).set(g,n),this}get size(){if(this._size)return this._size;this._size=0;const e=this.entries();for(;!e.next().done;)this._size+=1;return this._size}update(e,n){if(typeof e!="string")throw new Error("key must be a string");this._size=void 0;const o=M(this._tree,e);return o.set(g,n(o.get(g))),this}fetch(e,n){if(typeof e!="string")throw new Error("key must be a string");this._size=void 0;const o=M(this._tree,e);let s=o.get(g);return s===void 0&&o.set(g,s=n()),s}values(){return new V(this,G)}[Symbol.iterator](){return this.entries()}static from(e){const n=new I;for(const[o,s]of e)n.set(o,s);return n}static fromObject(e){return I.from(Object.entries(e))}}const b=(t,e,n=[])=>{if(e.length===0||t==null)return[t,n];for(const o of t.keys())if(o!==g&&e.startsWith(o))return n.push([t,o]),b(t.get(o),e.slice(o.length),n);return n.push([t,e]),b(void 0,"",n)},T=(t,e)=>{if(e.length===0||!t)return t;for(const n of t.keys())if(n!==g&&e.startsWith(n))return T(t.get(n),e.slice(n.length))},M=(t,e)=>{const n=e.length;t:for(let o=0;t&&o<n;){for(const r of t.keys())if(r!==g&&e[o]===r[0]){const i=Math.min(n-o,r.length);let c=1;for(;c<i&&e[o+c]===r[c];)++c;const d=t.get(r);if(c===r.length)t=d;else{const u=new Map;u.set(r.slice(c),d),t.set(e.slice(o,o+c),u),t.delete(r),t=u}o+=c;continue t}const s=new Map;return t.set(e.slice(o),s),s}return t},zt=(t,e)=>{const[n,o]=b(t,e);if(n!==void 0){if(n.delete(g),n.size===0)Q(o);else if(n.size===1){const[s,r]=n.entries().next().value;Y(o,s,r)}}},Q=t=>{if(t.length===0)return;const[e,n]=D(t);if(e.delete(n),e.size===0)Q(t.slice(0,-1));else if(e.size===1){const[o,s]=e.entries().next().value;o!==g&&Y(t.slice(0,-1),o,s)}},Y=(t,e,n)=>{if(t.length===0)return;const[o,s]=D(t);o.set(s+e,n),o.delete(s)},D=t=>t[t.length-1],Z=(t,e)=>t._idToShortId.has(e),vt=(t,e)=>{const n=t._idToShortId.get(e);if(n!=null)return t._storedFields.get(n)},bt=/[\n\r\p{Z}\p{P}]+/u,L="or",H="and",Ft="and_not",X=t=>new Promise(e=>setTimeout(e,t)),kt=(t,e)=>{t.includes(e)||t.push(e)},tt=(t,e)=>{for(const n of e)t.includes(n)||t.push(n)},et=({score:t},{score:e})=>e-t,nt=()=>new Map,F=t=>{const e=new Map;for(const n of Object.keys(t))e.set(parseInt(n,10),t[n]);return e},k=async t=>{const e=new Map;let n=0;for(const o of Object.keys(t))e.set(parseInt(o,10),t[o]),++n%1e3===0&&await X(0);return e},A=(t,e)=>Object.prototype.hasOwnProperty.call(t,e)?t[e]:void 0,ot={[L]:(t,e)=>{for(const n of e.keys()){const o=t.get(n);if(o==null)t.set(n,e.get(n));else{const{score:s,terms:r,match:i}=e.get(n);o.score=o.score+s,o.match=Object.assign(o.match,i),tt(o.terms,r)}}return t},[H]:(t,e)=>{const n=new Map;for(const o of e.keys()){const s=t.get(o);if(s==null)continue;const{score:r,terms:i,match:c}=e.get(o);tt(s.terms,i),n.set(o,{score:s.score+r,terms:s.terms,match:Object.assign(s.match,c)})}return n},[Ft]:(t,e)=>{for(const n of e.keys())t.delete(n);return t}},Ct=(t,e,n,o,s,r)=>{const{k:i,b:c,d}=r;return Math.log(1+(n-e+.5)/(e+.5))*(d+t*(i+1)/(t+i*(1-c+c*o/s)))},Ot=t=>(e,n,o)=>({term:e,fuzzy:typeof t.fuzzy=="function"?t.fuzzy(e,n,o):t.fuzzy??!1,prefix:typeof t.prefix=="function"?t.prefix(e,n,o):t.prefix===!0,termBoost:typeof t.boostTerm=="function"?t.boostTerm(e,n,o):1}),st=(t,e,n,o)=>{for(const s of Object.keys(t._fieldIds))if(t._fieldIds[s]===n){t._options.logger("warn",`SlimSearch: document with ID ${t._documentIds.get(e)} has changed before removal: term "${o}" was not present in field "${s}". Removing a document after it has changed can corrupt the index!`,"version_conflict");return}},it=(t,e,n,o)=>{const s=t._index.fetch(o,nt);let r=s.get(e);if(r==null)r=new Map,r.set(n,1),s.set(e,r);else{const i=r.get(n);r.set(n,(i??0)+1)}},E=(t,e,n,o)=>{if(!t._index.has(o)){st(t,n,e,o);return}const s=t._index.fetch(o,nt),r=s.get(e),i=r?.get(n);!r||typeof i>"u"?st(t,n,e,o):i<=1?r.size<=1?s.delete(e):r.delete(n):r.set(n,i-1),t._index.get(o).size===0&&t._index.delete(o)},Vt=(t,e,n,o,s)=>{let r=t._fieldLength.get(e);r==null&&t._fieldLength.set(e,r=[]),r[n]=s;const i=(t._avgFieldLength[n]||0)*o+s;t._avgFieldLength[n]=i/(o+1)},Tt=(t,e)=>{const n=t._nextId;return t._idToShortId.set(e,n),t._documentIds.set(n,e),t._documentCount+=1,t._nextId+=1,n},Mt=(t,e,n)=>{const{storeFields:o,extractField:s}=t._options;if(o?.length===0)return;let r=t._storedFields.get(e);r===void 0&&t._storedFields.set(e,r={});for(const i of o){const c=s(n,i);c!=null&&(r[i]=c)}},j=(t,e)=>{const{extractField:n,tokenize:o,processTerm:s,fields:r,idField:i}=t._options,c=n(e,i);if(c==null)throw new Error(`SlimSearch: document does not have ID field "${i}"`);if(Z(t,c))throw new Error(`SlimSearch: duplicate ID ${c}`);const d=Tt(t,c);Mt(t,d,e);for(const u of r){const a=n(e,u);if(a==null)continue;const h=o(a.toString(),u),f=t._fieldIds[u],_=new Set(h).size;Vt(t,d,f,t._documentCount-1,_);for(const p of h){const l=s(p,u);if(Array.isArray(l))for(const m of l)it(t,f,d,m);else l&&it(t,f,d,l)}}},q=(t,e)=>{for(const n of e)j(t,n)},Dt=(t,e,n={})=>{const{chunkSize:o=10}=n,s={chunk:[],promise:Promise.resolve()},{chunk:r,promise:i}=e.reduce(({chunk:c,promise:d},u,a)=>(c.push(u),(a+1)%o===0?{chunk:[],promise:d.then(()=>new Promise(h=>setTimeout(h,0))).then(()=>q(t,c))}:{chunk:c,promise:d}),s);return i.then(()=>q(t,r))},Lt={k:1.2,b:.7,d:.5},N={idField:"id",extractField:(t,e)=>t[e],tokenize:t=>t.split(bt),processTerm:t=>t.toLowerCase(),fields:void 0,searchOptions:void 0,storeFields:[],logger:(t,e)=>{console?.[t]?.(e)},autoVacuum:!0},rt={combineWith:L,prefix:!1,fuzzy:!1,maxFuzzy:6,boost:{},weights:{fuzzy:.45,prefix:.375},bm25:Lt},At={combineWith:H,prefix:(t,e,n)=>e===n.length-1},$={batchSize:1e3,batchWait:10},W={minDirtFactor:.1,minDirtCount:20},P={...$,...W},Et=t=>{if(N.hasOwnProperty(t))return A(N,t);throw new Error(`SlimSearch: unknown option "${t}"`)},J=Symbol("*"),jt=(t,e)=>{const n=new Map,o={...t._options.searchOptions,...e};for(const[s,r]of t._documentIds){const i=o.boostDocument?o.boostDocument(r,"",t._storedFields.get(s)):1;n.set(s,{score:i,terms:[],match:{}})}return n},ct=(t,e=L)=>{if(t.length===0)return new Map;const n=e.toLowerCase();if(!(n in ot))throw new Error(`Invalid combination operator: ${e}`);return t.reduce(ot[n])},R=(t,e,n,o,s,r,i,c,d,u=new Map)=>{if(r==null)return u;for(const a of Object.keys(i)){const h=i[a],f=t._fieldIds[a],_=r.get(f);if(_==null)continue;let p=_.size;const l=t._avgFieldLength[f];for(const m of _.keys()){if(!t._documentIds.has(m)){E(t,f,m,n),p-=1;continue}const y=c?c(t._documentIds.get(m),n,t._storedFields.get(m)):1;if(!y)continue;const w=_.get(m),C=t._fieldLength.get(m)[f],O=Ct(w,p,t._documentCount,C,l,d),v=o*s*h*y*O,x=u.get(m);if(x){x.score+=v,kt(x.terms,e);const z=A(x.match,n);z?z.push(a):x.match[n]=[a]}else u.set(m,{score:v,terms:[e],match:{[n]:[a]}})}}return u},qt=(t,e,n)=>{const o={...t._options.searchOptions,...n},s=(o.fields??t._options.fields).reduce((l,m)=>({...l,[m]:A(o.boost,m)||1}),{}),{boostDocument:r,weights:i,maxFuzzy:c,bm25:d}=o,{fuzzy:u,prefix:a}={...rt.weights,...i},h=t._index.get(e.term),f=R(t,e.term,e.term,1,e.termBoost,h,s,r,d);let _,p;if(e.prefix&&(_=t._index.atPrefix(e.term)),e.fuzzy){const l=e.fuzzy===!0?.2:e.fuzzy,m=l<1?Math.min(c,Math.round(e.term.length*l)):l;m&&(p=t._index.fuzzyGet(e.term,m))}if(_)for(const[l,m]of _){const y=l.length-e.term.length;if(!y)continue;p?.delete(l);const w=a*l.length/(l.length+.3*y);R(t,e.term,l,w,e.termBoost,m,s,r,d,f)}if(p)for(const l of p.keys()){const[m,y]=p.get(l);if(!y)continue;const w=u*l.length/(l.length+y);R(t,e.term,l,w,e.termBoost,m,s,r,d,f)}return f},dt=(t,e,n={})=>{if(e===J)return jt(t,n);if(typeof e!="string"){const a={...n,...e,queries:void 0},h=e.queries.map(f=>dt(t,f,a));return ct(h,a.combineWith)}const{tokenize:o,processTerm:s,searchOptions:r}=t._options,i={tokenize:o,processTerm:s,...r,...n},{tokenize:c,processTerm:d}=i,u=c(e).flatMap(a=>d(a)).filter(a=>!!a).map(Ot(i)).map(a=>qt(t,a,i));return ct(u,i.combineWith)},ut=(t,e,n={})=>{const{searchOptions:o}=t._options,s={...o,...n},r=dt(t,e,n),i=[];for(const[c,{score:d,terms:u,match:a}]of r){const h=u.length||1,f={id:t._documentIds.get(c),score:d*h,terms:Object.keys(a),queryTerms:u,match:a};Object.assign(f,t._storedFields.get(c)),(s.filter==null||s.filter(f))&&i.push(f)}return e===J&&s.boostDocument==null||i.sort(et),i},Nt=(t,e,n={})=>{n={...t._options.autoSuggestOptions,...n};const o=new Map;for(const{score:r,terms:i}of ut(t,e,n)){const c=i.join(" "),d=o.get(c);d!=null?(d.score+=r,d.count+=1):o.set(c,{score:r,terms:i,count:1})}const s=[];for(const[r,{score:i,terms:c,count:d}]of o)s.push({suggestion:r,terms:c,score:i/d});return s.sort(et),s};class $t{_options;_index;_documentCount;_documentIds;_idToShortId;_fieldIds;_fieldLength;_avgFieldLength;_nextId;_storedFields;_dirtCount;_currentVacuum;_enqueuedVacuum;_enqueuedVacuumConditions;constructor(e){if(!e?.fields)throw new Error('SlimSearch: option "fields" must be provided');const n=e.autoVacuum==null||e.autoVacuum===!0?P:e.autoVacuum;this._options={...N,...e,autoVacuum:n,searchOptions:{...rt,...e.searchOptions},autoSuggestOptions:{...At,...e.autoSuggestOptions}},this._index=new I,this._documentCount=0,this._documentIds=new Map,this._idToShortId=new Map,this._fieldIds={},this._fieldLength=new Map,this._avgFieldLength=[],this._nextId=0,this._storedFields=new Map,this._dirtCount=0,this._currentVacuum=null,this._enqueuedVacuum=null,this._enqueuedVacuumConditions=W,this.addFields(this._options.fields)}get isVacuuming(){return this._currentVacuum!=null}get dirtCount(){return this._dirtCount}get dirtFactor(){return this._dirtCount/(1+this._documentCount+this._dirtCount)}get documentCount(){return this._documentCount}get termCount(){return this._index.size}toJSON(){const e=[];for(const[n,o]of this._index){const s={};for(const[r,i]of o)s[r]=Object.fromEntries(i);e.push([n,s])}return{documentCount:this._documentCount,nextId:this._nextId,documentIds:Object.fromEntries(this._documentIds),fieldIds:this._fieldIds,fieldLength:Object.fromEntries(this._fieldLength),averageFieldLength:this._avgFieldLength,storedFields:Object.fromEntries(this._storedFields),dirtCount:this._dirtCount,index:e,version:2}}addFields(e){for(let n=0;n<e.length;n++)this._fieldIds[e[n]]=n}}const at=t=>`SlimSearch: ${t} should be given the same options used when serializing the index`,lt=t=>new $t(t),ht=({documentCount:t,nextId:e,fieldIds:n,averageFieldLength:o,dirtCount:s,version:r},i)=>{if(r!==2)throw new Error("SlimSearch: cannot deserialize an index created with an incompatible version");const c=lt(i);return c._documentCount=t,c._nextId=e,c._idToShortId=new Map,c._fieldIds=n,c._avgFieldLength=o,c._dirtCount=s??0,c._index=new I,c},ft=(t,e)=>{const{index:n,documentIds:o,fieldLength:s,storedFields:r}=t,i=ht(t,e);i._documentIds=F(o),i._fieldLength=F(s),i._storedFields=F(r);for(const[c,d]of i._documentIds)i._idToShortId.set(d,c);for(const[c,d]of n){const u=new Map;for(const a of Object.keys(d))u.set(parseInt(a,10),F(d[a]));i._index.set(c,u)}return i},mt=async(t,e)=>{const{index:n,documentIds:o,fieldLength:s,storedFields:r}=t,i=ht(t,e);i._documentIds=await k(o),i._fieldLength=await k(s),i._storedFields=await k(r);for(const[d,u]of i._documentIds)i._idToShortId.set(u,d);let c=0;for(const[d,u]of n){const a=new Map;for(const h of Object.keys(u))a.set(parseInt(h,10),await k(u[h]));++c%1e3===0&&await X(0),i._index.set(d,a)}return i},Wt=(t,e)=>{if(!e)throw new Error(at("loadJSONIndex"));return ft(JSON.parse(t),e)},Pt=(t,e)=>{if(!e)throw new Error(at("loadJSONIndexAsync"));return mt(JSON.parse(t),e)},_t=(t,e)=>{if(e==null)return!0;const{minDirtCount:n=P.minDirtCount,minDirtFactor:o=P.minDirtFactor}=e;return t.dirtCount>=n&&t.dirtFactor>=o},gt=async(t,e,n)=>{const o=t._dirtCount;if(_t(t,n)){const s=e.batchSize??$.batchSize,r=e.batchWait??$.batchWait;let i=1;for(const[c,d]of t._index){for(const[u,a]of d)for(const[h]of a)t._documentIds.has(h)||(a.size<=1?d.delete(u):a.delete(h));t._index.get(c).size===0&&t._index.delete(c),i%s===0&&await new Promise(u=>setTimeout(u,r)),i+=1}t._dirtCount-=o}await null,t._currentVacuum=t._enqueuedVacuum,t._enqueuedVacuum=null},pt=(t,e,n)=>t._currentVacuum?(t._enqueuedVacuumConditions=t._enqueuedVacuumConditions&&n,t._enqueuedVacuum!=null||(t._enqueuedVacuum=t._currentVacuum.then(()=>{const o=t._enqueuedVacuumConditions;return t._enqueuedVacuumConditions=W,gt(t,e,o)})),t._enqueuedVacuum):_t(t,n)?(t._currentVacuum=gt(t,e),t._currentVacuum):Promise.resolve(),wt=t=>{if(t._options.autoVacuum===!1)return;const{minDirtFactor:e,minDirtCount:n,batchSize:o,batchWait:s}=t._options.autoVacuum;pt(t,{batchSize:o,batchWait:s},{minDirtCount:n,minDirtFactor:e})},Jt=(t,e={})=>pt(t,e),yt=(t,e,n,o)=>{if(n===1){t._avgFieldLength[e]=0;return}const s=t._avgFieldLength[e]*n-o;t._avgFieldLength[e]=s/(n-1)},U=(t,e)=>{const n=t._idToShortId.get(e);if(n==null)throw new Error(`SlimSearch: cannot discard document with ID ${e}: it is not in the index`);t._idToShortId.delete(e),t._documentIds.delete(n),t._storedFields.delete(n),t._fieldLength.get(n)?.forEach((o,s)=>{yt(t,s,t._documentCount,o)}),t._fieldLength.delete(n),t._documentCount-=1,t._dirtCount+=1,wt(t)},Rt=(t,e)=>{const n=t._options.autoVacuum;try{t._options.autoVacuum=!1;for(const o of e)U(t,o)}finally{t._options.autoVacuum=n}wt(t)},It=(t,e)=>{const{tokenize:n,processTerm:o,extractField:s,fields:r,idField:i}=t._options,c=s(e,i);if(c==null)throw new Error(`SlimSearch: document does not have ID field "${i}"`);const d=t._idToShortId.get(c);if(d==null)throw new Error(`SlimSearch: cannot remove document with ID ${c}: it is not in the index`);for(const u of r){const a=s(e,u);if(a==null)continue;const h=n(a.toString(),u),f=t._fieldIds[u],_=new Set(h).size;yt(t,f,t._documentCount,_);for(const p of h){const l=o(p,u);if(Array.isArray(l))for(const m of l)E(t,f,d,m);else l&&E(t,f,d,l)}}t._storedFields.delete(d),t._documentIds.delete(d),t._idToShortId.delete(c),t._fieldLength.delete(d),t._documentCount-=1},Ut=function(t,e){if(e)for(const n of e)It(t,n);else{if(arguments.length>1)throw new Error("Expected documents to be present. Omit the argument to remove all documents.");t._index=new I,t._documentCount=0,t._documentIds=new Map,t._idToShortId=new Map,t._fieldLength=new Map,t._avgFieldLength=[],t._storedFields=new Map,t._nextId=0}},Bt=(t,e)=>{const{idField:n,extractField:o}=t._options,s=o(e,n);U(t,s),j(t,e)};exports.SearchableMap=I,exports.WILDCARD=J,exports.add=j,exports.addAll=q,exports.addAllAsync=Dt,exports.autoSuggest=Nt,exports.createIndex=lt,exports.discard=U,exports.discardAll=Rt,exports.getDefaultValue=Et,exports.getStoredFields=vt,exports.has=Z,exports.loadIndex=ft,exports.loadIndexAsync=mt,exports.loadJSONIndex=Wt,exports.loadJSONIndexAsync=Pt,exports.remove=It,exports.removeAll=Ut,exports.replace=Bt,exports.search=ut,exports.vacuum=Jt; //# sourceMappingURL=index.cjs.map