slimsearch
Version:
Tiny but powerful full-text search engine for browser and Node
3 lines (2 loc) • 17.1 kB
JavaScript
const It="ENTRIES",U="KEYS",B="VALUES",Bt="";class L{set;_type;_path;constructor(e,n){const o=e._tree,s=Array.from(o.keys());this.set=e,this._type=n,this._path=s.length>0?[{node:o,keys:s}]:[]}next(){const e=this.dive();return this.backtrack(),e}dive(){if(this._path.length===0)return{done:!0,value:void 0};const{node:e,keys:n}=x(this._path);if(x(n)==="")return{done:!1,value:this.result()};const o=e.get(x(n));return this._path.push({node:o,keys:Array.from(o.keys())}),this.dive()}backtrack(){if(this._path.length===0)return;const e=x(this._path).keys;e.pop(),!(e.length>0)&&(this._path.pop(),this.backtrack())}key(){return this.set._prefix+this._path.map(({keys:e})=>x(e)).filter(e=>e!=="").join("")}value(){return x(this._path).node.get("")}result(){switch(this._type){case B:return this.value();case U:return this.key();default:return[this.key(),this.value()]}}[Symbol.iterator](){return this}}const x=t=>t[t.length-1],xt=(t,e,n)=>{const o=new Map;if(typeof e!="string")return o;const s=e.length+1,r=s+n,i=new Uint8Array(r*s).fill(n+1);for(let c=0;c<s;++c)i[c]=c;for(let c=1;c<r;++c)i[c*s]=c;return G(t,e,n,o,i,1,s,""),o},G=(t,e,n,o,s,r,i,c)=>{const a=r*i;t:for(const d of t.keys())if(d===""){const u=s[a-1];u<=n&&o.set(c,[t.get(d),u])}else{let u=r;for(let f=0;f<d.length;++f,++u){const m=d[f],_=i*u,g=_-i;let h=s[_];const l=Math.max(0,u-n-1),p=Math.min(i-1,u+n);for(let w=l;w<p;++w){const k=m!==e[w],C=s[g+w]+ +k,z=s[g+w+1]+1,I=s[_+w]+1,F=s[_+w+1]=Math.min(C,z,I);F<h&&(h=F)}if(h>n)continue t}G(t.get(d),e,n,o,s,u,i,c+d)}};class y{_tree;_prefix;_size=void 0;constructor(e=new Map,n=""){this._tree=e,this._prefix=n}atPrefix(e){if(!e.startsWith(this._prefix))throw new Error("Mismatched prefix");const[n,o]=S(this._tree,e.slice(this._prefix.length));if(n===void 0){const[s,r]=A(o);for(const i of s.keys())if(i!==""&&i.startsWith(r)){const c=new Map;return c.set(i.slice(r.length),s.get(i)),new y(c,e)}}return new y(n,e)}clear(){this._size=void 0,this._tree.clear()}delete(e){return this._size=void 0,Ft(this._tree,e)}entries(){return new L(this,It)}forEach(e){for(const[n,o]of this)e(n,o,this)}fuzzyGet(e,n){return xt(this._tree,e,n)}get(e){const n=O(this._tree,e);return n!==void 0?n.get(""):void 0}has(e){return O(this._tree,e)?.has("")??!1}keys(){return new L(this,U)}set(e,n){if(typeof e!="string")throw new Error("key must be a string");return this._size=void 0,E(this._tree,e).set("",n),this}get size(){if(this._size)return this._size;this._size=0;const e=this.entries();for(;!e.next().done;)this._size+=1;return this._size}update(e,n){if(typeof e!="string")throw new Error("key must be a string");this._size=void 0;const o=E(this._tree,e);return o.set("",n(o.get(""))),this}fetch(e,n){if(typeof e!="string")throw new Error("key must be a string");this._size=void 0;const o=E(this._tree,e);let s=o.get("");return s===void 0&&o.set("",s=n()),s}values(){return new L(this,B)}[Symbol.iterator](){return this.entries()}static from(e){const n=new y;for(const[o,s]of e)n.set(o,s);return n}static fromObject(e){return y.from(Object.entries(e))}}const S=(t,e,n=[])=>{if(e.length===0||t==null)return[t,n];for(const o of t.keys())if(o!==""&&e.startsWith(o))return n.push([t,o]),S(t.get(o),e.slice(o.length),n);return n.push([t,e]),S(void 0,"",n)},O=(t,e)=>{if(e.length===0||!t)return t;for(const n of t.keys())if(n!==""&&e.startsWith(n))return O(t.get(n),e.slice(n.length))},E=(t,e)=>{const n=e.length;t:for(let o=0;t&&o<n;){for(const r of t.keys())if(r!==""&&e[o]===r[0]){const i=Math.min(n-o,r.length);let c=1;for(;c<i&&e[o+c]===r[c];)++c;const a=t.get(r);if(c===r.length)t=a;else{const d=new Map;d.set(r.slice(c),a),t.set(e.slice(o,o+c),d),t.delete(r),t=d}o+=c;continue t}const s=new Map;return t.set(e.slice(o),s),s}return t},Ft=(t,e)=>{const[n,o]=S(t,e);if(n!==void 0){if(n.delete(""),n.size===0)K(o);else if(n.size===1){const[s,r]=n.entries().next().value;Q(o,s,r)}}},K=t=>{if(t.length===0)return;const[e,n]=A(t);if(e.delete(n),e.size===0)K(t.slice(0,-1));else if(e.size===1){const[o,s]=e.entries().next().value;o!==""&&Q(t.slice(0,-1),o,s)}},Q=(t,e,n)=>{if(t.length===0)return;const[o,s]=A(t);o.set(s+e,n),o.delete(s)},A=t=>t[t.length-1],Y=(t,e)=>t._idToShortId.has(e),zt=(t,e)=>{const n=t._idToShortId.get(e);if(n!=null)return t._storedFields.get(n)},St=/[\n\r\p{Z}\p{P}]+/u,V="or",Z="and",bt="and_not",H=t=>new Promise(e=>setTimeout(e,t)),vt=(t,e)=>{t.includes(e)||t.push(e)},X=(t,e)=>{for(const n of e)t.includes(n)||t.push(n)},tt=({score:t},{score:e})=>e-t,et=()=>new Map,b=t=>{const e=new Map;for(const n of Object.keys(t))e.set(parseInt(n,10),t[n]);return e},v=async t=>{const e=new Map;let n=0;for(const o of Object.keys(t))e.set(parseInt(o,10),t[o]),++n%1e3===0&&await H(0);return e},T=(t,e)=>Object.prototype.hasOwnProperty.call(t,e)?t[e]:void 0,nt={[V]:(t,e)=>{for(const n of e.keys()){const o=t.get(n);if(o==null)t.set(n,e.get(n));else{const{score:s,terms:r,match:i}=e.get(n);o.score=o.score+s,o.match=Object.assign(o.match,i),X(o.terms,r)}}return t},[Z]:(t,e)=>{const n=new Map;for(const o of e.keys()){const s=t.get(o);if(s==null)continue;const{score:r,terms:i,match:c}=e.get(o);X(s.terms,i),n.set(o,{score:s.score+r,terms:s.terms,match:Object.assign(s.match,c)})}return n},[bt]:(t,e)=>{for(const n of e.keys())t.delete(n);return t}},kt=(t,e,n,o,s,r)=>{const{k:i,b:c,d:a}=r;return Math.log(1+(n-e+.5)/(e+.5))*(a+t*(i+1)/(t+i*(1-c+c*o/s)))},Ct=t=>(e,n,o)=>({term:e,fuzzy:typeof t.fuzzy=="function"?t.fuzzy(e,n,o):t.fuzzy??!1,prefix:typeof t.prefix=="function"?t.prefix(e,n,o):t.prefix===!0,termBoost:typeof t.boostTerm=="function"?t.boostTerm(e,n,o):1}),ot=(t,e,n,o)=>{for(const s of Object.keys(t._fieldIds))if(t._fieldIds[s]===n){t._options.logger("warn",`SlimSearch: document with ID ${t._documentIds.get(e)} has changed before removal: term "${o}" was not present in field "${s}". Removing a document after it has changed can corrupt the index!`,"version_conflict");return}},st=(t,e,n,o)=>{const s=t._index.fetch(o,et);let r=s.get(e);if(r==null)r=new Map,r.set(n,1),s.set(e,r);else{const i=r.get(n);r.set(n,(i??0)+1)}},M=(t,e,n,o)=>{if(!t._index.has(o)){ot(t,n,e,o);return}const s=t._index.fetch(o,et),r=s.get(e),i=r?.get(n);!r||typeof i>"u"?ot(t,n,e,o):i<=1?r.size<=1?s.delete(e):r.delete(n):r.set(n,i-1),t._index.get(o).size===0&&t._index.delete(o)},Lt=(t,e,n,o,s)=>{let r=t._fieldLength.get(e);r==null&&t._fieldLength.set(e,r=[]),r[n]=s;const i=(t._avgFieldLength[n]||0)*o+s;t._avgFieldLength[n]=i/(o+1)},Ot=(t,e)=>{const n=t._nextId;return t._idToShortId.set(e,n),t._documentIds.set(n,e),t._documentCount+=1,t._nextId+=1,n},Et=(t,e,n)=>{const{storeFields:o,extractField:s}=t._options;if(o?.length===0)return;let r=t._storedFields.get(e);r===void 0&&t._storedFields.set(e,r={});for(const i of o){const c=s(n,i);c!=null&&(r[i]=c)}},D=(t,e)=>{const{extractField:n,stringifyField:o,tokenize:s,processTerm:r,fields:i,idField:c}=t._options,a=n(e,c);if(a==null)throw new Error(`SlimSearch: document does not have ID field "${c}"`);if(Y(t,a))throw new Error(`SlimSearch: duplicate ID ${a}`);const d=Ot(t,a);Et(t,d,e);for(const u of i){const f=n(e,u);if(f==null)continue;const m=s(o(f,u),u),_=t._fieldIds[u],g=new Set(m).size;Lt(t,d,_,t._documentCount-1,g);for(const h of m){const l=r(h,u);if(Array.isArray(l))for(const p of l)st(t,_,d,p);else l&&st(t,_,d,l)}}},j=(t,e)=>{for(const n of e)D(t,n)},At=(t,e,n={})=>{const{chunkSize:o=10}=n,s={chunk:[],promise:Promise.resolve()},{chunk:r,promise:i}=e.reduce(({chunk:c,promise:a},d,u)=>(c.push(d),(u+1)%o===0?{chunk:[],promise:a.then(()=>new Promise(f=>setTimeout(f,0))).then(()=>j(t,c))}:{chunk:c,promise:a}),s);return i.then(()=>j(t,r))},Vt={k:1.2,b:.7,d:.5},q={idField:"id",extractField:(t,e)=>t[e],stringifyField:t=>t.toString(),tokenize:t=>t.split(St),processTerm:t=>t.toLowerCase(),fields:void 0,searchOptions:void 0,storeFields:[],logger:(t,e)=>{console?.[t]?.(e)},autoVacuum:!0},it={combineWith:V,prefix:!1,fuzzy:!1,maxFuzzy:6,boost:{},weights:{fuzzy:.45,prefix:.375},bm25:Vt},Tt={combineWith:Z,prefix:(t,e,n)=>e===n.length-1},$={batchSize:1e3,batchWait:10},N={minDirtFactor:.1,minDirtCount:20},W={...$,...N},Mt=t=>{if(q.hasOwnProperty(t))return T(q,t);throw new Error(`SlimSearch: unknown option "${t}"`)},P=Symbol("*"),Dt=(t,e)=>{const n=new Map,o={...t._options.searchOptions,...e};for(const[s,r]of t._documentIds){const i=o.boostDocument?o.boostDocument(r,"",t._storedFields.get(s)):1;n.set(s,{score:i,terms:[],match:{}})}return n},rt=(t,e=V)=>{if(t.length===0)return new Map;const n=e.toLowerCase();if(!(n in nt))throw new Error(`Invalid combination operator: ${e}`);return t.reduce(nt[n])},R=(t,e,n,o,s,r,i,c,a,d=new Map)=>{if(r==null)return d;for(const u of Object.keys(i)){const f=i[u],m=t._fieldIds[u],_=r.get(m);if(_==null)continue;let g=_.size;const h=t._avgFieldLength[m];for(const l of _.keys()){if(!t._documentIds.has(l)){M(t,m,l,n),g-=1;continue}const p=c?c(t._documentIds.get(l),n,t._storedFields.get(l)):1;if(!p)continue;const w=_.get(l),k=t._fieldLength.get(l)[m],C=kt(w,g,t._documentCount,k,h,a),z=o*s*f*p*C,I=d.get(l);if(I){I.score+=z,vt(I.terms,e);const F=T(I.match,n);F?F.push(u):I.match[n]=[u]}else d.set(l,{score:z,terms:[e],match:{[n]:[u]}})}}return d},jt=(t,e,n)=>{const o={...t._options.searchOptions,...n},s=(o.fields??t._options.fields).reduce((h,l)=>({...h,[l]:T(o.boost,l)||1}),{}),{boostDocument:r,weights:i,maxFuzzy:c,bm25:a}=o,{fuzzy:d,prefix:u}={...it.weights,...i},f=t._index.get(e.term),m=R(t,e.term,e.term,1,e.termBoost,f,s,r,a);let _,g;if(e.prefix&&(_=t._index.atPrefix(e.term)),e.fuzzy){const h=e.fuzzy===!0?.2:e.fuzzy,l=h<1?Math.min(c,Math.round(e.term.length*h)):h;l&&(g=t._index.fuzzyGet(e.term,l))}if(_)for(const[h,l]of _){const p=h.length-e.term.length;if(!p)continue;g?.delete(h);const w=u*h.length/(h.length+.3*p);R(t,e.term,h,w,e.termBoost,l,s,r,a,m)}if(g)for(const h of g.keys()){const[l,p]=g.get(h);if(!p)continue;const w=d*h.length/(h.length+p);R(t,e.term,h,w,e.termBoost,l,s,r,a,m)}return m},ct=(t,e,n={})=>{if(e===P)return Dt(t,n);if(typeof e!="string"){const u={...n,...e,queries:void 0},f=e.queries.map(m=>ct(t,m,u));return rt(f,u.combineWith)}const{tokenize:o,processTerm:s,searchOptions:r}=t._options,i={tokenize:o,processTerm:s,...r,...n},{tokenize:c,processTerm:a}=i,d=c(e).flatMap(u=>a(u)).filter(u=>!!u).map(Ct(i)).map(u=>jt(t,u,i));return rt(d,i.combineWith)},ut=(t,e,n={})=>{const{searchOptions:o}=t._options,s={...o,...n},r=ct(t,e,n),i=[];for(const[c,{score:a,terms:d,match:u}]of r){const f=d.length||1,m={id:t._documentIds.get(c),score:a*f,terms:Object.keys(u),queryTerms:d,match:u};Object.assign(m,t._storedFields.get(c)),(s.filter==null||s.filter(m))&&i.push(m)}return e===P&&s.boostDocument==null||i.sort(tt),i},qt=(t,e,n={})=>{n={...t._options.autoSuggestOptions,...n};const o=new Map;for(const{score:r,terms:i}of ut(t,e,n)){const c=i.join(" "),a=o.get(c);a!=null?(a.score+=r,a.count+=1):o.set(c,{score:r,terms:i,count:1})}const s=[];for(const[r,{score:i,terms:c,count:a}]of o)s.push({suggestion:r,terms:c,score:i/a});return s.sort(tt),s};class $t{_options;_index;_documentCount;_documentIds;_idToShortId;_fieldIds;_fieldLength;_avgFieldLength;_nextId;_storedFields;_dirtCount;_currentVacuum;_enqueuedVacuum;_enqueuedVacuumConditions;constructor(e){if(!e?.fields)throw new Error('SlimSearch: option "fields" must be provided');const n=e.autoVacuum==null||e.autoVacuum===!0?W:e.autoVacuum;this._options={...q,...e,autoVacuum:n,searchOptions:{...it,...e.searchOptions},autoSuggestOptions:{...Tt,...e.autoSuggestOptions}},this._index=new y,this._documentCount=0,this._documentIds=new Map,this._idToShortId=new Map,this._fieldIds={},this._fieldLength=new Map,this._avgFieldLength=[],this._nextId=0,this._storedFields=new Map,this._dirtCount=0,this._currentVacuum=null,this._enqueuedVacuum=null,this._enqueuedVacuumConditions=N,this.addFields(this._options.fields)}get isVacuuming(){return this._currentVacuum!=null}get dirtCount(){return this._dirtCount}get dirtFactor(){return this._dirtCount/(1+this._documentCount+this._dirtCount)}get documentCount(){return this._documentCount}get termCount(){return this._index.size}toJSON(){const e=[];for(const[n,o]of this._index){const s={};for(const[r,i]of o)s[r]=Object.fromEntries(i);e.push([n,s])}return{documentCount:this._documentCount,nextId:this._nextId,documentIds:Object.fromEntries(this._documentIds),fieldIds:this._fieldIds,fieldLength:Object.fromEntries(this._fieldLength),averageFieldLength:this._avgFieldLength,storedFields:Object.fromEntries(this._storedFields),dirtCount:this._dirtCount,index:e,version:2}}addFields(e){for(let n=0;n<e.length;n++)this._fieldIds[e[n]]=n}}const dt=t=>`SlimSearch: ${t} should be given the same options used when serializing the index`,at=t=>new $t(t),lt=({documentCount:t,nextId:e,fieldIds:n,averageFieldLength:o,dirtCount:s,version:r},i)=>{if(r!==2)throw new Error("SlimSearch: cannot deserialize an index created with an incompatible version");const c=at(i);return c._documentCount=t,c._nextId=e,c._idToShortId=new Map,c._fieldIds=n,c._avgFieldLength=o,c._dirtCount=s??0,c._index=new y,c},ht=(t,e)=>{const{index:n,documentIds:o,fieldLength:s,storedFields:r}=t,i=lt(t,e);i._documentIds=b(o),i._fieldLength=b(s),i._storedFields=b(r);for(const[c,a]of i._documentIds)i._idToShortId.set(a,c);for(const[c,a]of n){const d=new Map;for(const u of Object.keys(a))d.set(parseInt(u,10),b(a[u]));i._index.set(c,d)}return i},ft=async(t,e)=>{const{index:n,documentIds:o,fieldLength:s,storedFields:r}=t,i=lt(t,e);i._documentIds=await v(o),i._fieldLength=await v(s),i._storedFields=await v(r);for(const[a,d]of i._documentIds)i._idToShortId.set(d,a);let c=0;for(const[a,d]of n){const u=new Map;for(const f of Object.keys(d))u.set(parseInt(f,10),await v(d[f]));++c%1e3===0&&await H(0),i._index.set(a,u)}return i},Nt=(t,e)=>{if(!e)throw new Error(dt("loadJSONIndex"));return ht(JSON.parse(t),e)},Wt=(t,e)=>{if(!e)throw new Error(dt("loadJSONIndexAsync"));return ft(JSON.parse(t),e)},mt=(t,e)=>{if(e==null)return!0;const{minDirtCount:n=W.minDirtCount,minDirtFactor:o=W.minDirtFactor}=e;return t.dirtCount>=n&&t.dirtFactor>=o},_t=async(t,e,n)=>{const o=t._dirtCount;if(mt(t,n)){const s=e.batchSize??$.batchSize,r=e.batchWait??$.batchWait;let i=1;for(const[c,a]of t._index){for(const[d,u]of a)for(const[f]of u)t._documentIds.has(f)||(u.size<=1?a.delete(d):u.delete(f));t._index.get(c).size===0&&t._index.delete(c),i%s===0&&await new Promise(d=>setTimeout(d,r)),i+=1}t._dirtCount-=o}await null,t._currentVacuum=t._enqueuedVacuum,t._enqueuedVacuum=null},gt=(t,e,n)=>t._currentVacuum?(t._enqueuedVacuumConditions=t._enqueuedVacuumConditions&&n,t._enqueuedVacuum!=null||(t._enqueuedVacuum=t._currentVacuum.then(()=>{const o=t._enqueuedVacuumConditions;return t._enqueuedVacuumConditions=N,_t(t,e,o)})),t._enqueuedVacuum):mt(t,n)?(t._currentVacuum=_t(t,e),t._currentVacuum):Promise.resolve(),pt=t=>{if(t._options.autoVacuum===!1)return;const{minDirtFactor:e,minDirtCount:n,batchSize:o,batchWait:s}=t._options.autoVacuum;gt(t,{batchSize:o,batchWait:s},{minDirtCount:n,minDirtFactor:e})},Pt=(t,e={})=>gt(t,e),wt=(t,e,n,o)=>{if(n===1){t._avgFieldLength[e]=0;return}const s=t._avgFieldLength[e]*n-o;t._avgFieldLength[e]=s/(n-1)},J=(t,e)=>{const n=t._idToShortId.get(e);if(n==null)throw new Error(`SlimSearch: cannot discard document with ID ${e}: it is not in the index`);t._idToShortId.delete(e),t._documentIds.delete(n),t._storedFields.delete(n),t._fieldLength.get(n)?.forEach((o,s)=>{wt(t,s,t._documentCount,o)}),t._fieldLength.delete(n),t._documentCount-=1,t._dirtCount+=1,pt(t)},Rt=(t,e)=>{const n=t._options.autoVacuum;try{t._options.autoVacuum=!1;for(const o of e)J(t,o)}finally{t._options.autoVacuum=n}pt(t)},yt=(t,e)=>{const{tokenize:n,processTerm:o,extractField:s,stringifyField:r,fields:i,idField:c}=t._options,a=s(e,c);if(a==null)throw new Error(`SlimSearch: document does not have ID field "${c}"`);const d=t._idToShortId.get(a);if(d==null)throw new Error(`SlimSearch: cannot remove document with ID ${a}: it is not in the index`);for(const u of i){const f=s(e,u);if(f==null)continue;const m=n(r(f,u),u),_=t._fieldIds[u],g=new Set(m).size;wt(t,_,t._documentCount,g);for(const h of m){const l=o(h,u);if(Array.isArray(l))for(const p of l)M(t,_,d,p);else l&&M(t,_,d,l)}}t._storedFields.delete(d),t._documentIds.delete(d),t._idToShortId.delete(a),t._fieldLength.delete(d),t._documentCount-=1},Jt=function(t,e){if(e)for(const n of e)yt(t,n);else{if(arguments.length>1)throw new Error("Expected documents to be present. Omit the argument to remove all documents.");t._index=new y,t._documentCount=0,t._documentIds=new Map,t._idToShortId=new Map,t._fieldLength=new Map,t._avgFieldLength=[],t._storedFields=new Map,t._nextId=0}},Ut=(t,e)=>{const{idField:n,extractField:o}=t._options,s=o(e,n);J(t,s),D(t,e)};export{y as SearchableMap,P as WILDCARD,D as add,j as addAll,At as addAllAsync,qt as autoSuggest,at as createIndex,J as discard,Rt as discardAll,Mt as getDefaultValue,zt as getStoredFields,Y as has,ht as loadIndex,ft as loadIndexAsync,Nt as loadJSONIndex,Wt as loadJSONIndexAsync,yt as remove,Jt as removeAll,Ut as replace,ut as search,Pt as vacuum};
//# sourceMappingURL=index.js.map