@orama/plugin-match-highlight
Version:
Orama plugin for search match highlighting
1 lines • 7.95 kB
Source Map (JSON)
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["import {\n AnyDocument,\n AnyOrama,\n Language,\n RawData,\n Result,\n Results,\n SearchParamsFullText,\n TypedDocument,\n load,\n save,\n search\n} from '@orama/orama'\nimport { boundedLevenshtein } from '@orama/orama/internals'\n\nexport interface Position {\n start: number\n length: number\n}\n\nexport type OramaWithHighlight<T extends AnyOrama> = T & {\n data: { positions: Record<string, Record<string, Record<string, Position[]>>> }\n}\n\nexport type ResultWithPositions<ResultDocument> = Result<ResultDocument> & {\n positions: Record<string, Record<string, Position[]>>\n}\n\nexport type SearchResultWithHighlight<ResultDocument> = Omit<Results<ResultDocument>, 'hits'> & {\n hits: ResultWithPositions<ResultDocument>[]\n}\n\nexport type RawDataWithPositions = RawData & {\n positions: Record<string, Record<string, Record<string, Position[]>>>\n}\n\nexport async function afterInsert<T extends AnyOrama>(orama: T, id: string): Promise<void> {\n if (!('positions' in orama.data)) {\n Object.assign(orama.data, { positions: {} })\n }\n\n await recursivePositionInsertion(\n orama as OramaWithHighlight<T>,\n (await orama.documentsStore.get(orama.data.docs, id))!,\n id\n )\n}\n\nconst wordRegEx = /[\\p{L}0-9_'-]+/gimu\n\nasync function recursivePositionInsertion<T extends AnyOrama, ResultDocument = TypedDocument<T>>(\n orama: OramaWithHighlight<T>,\n doc: ResultDocument,\n id: string,\n prefix = '',\n schema: T['schema'] = orama.schema\n): Promise<void> {\n orama.data.positions[id] = Object.create(null)\n for (const key of Object.keys(doc as object) as Array<keyof ResultDocument>) {\n const isNested = typeof doc[key] === 'object'\n const isSchemaNested = typeof schema[key] === 'object'\n const propName = `${prefix}${String(key)}`\n if (isNested && key in schema && isSchemaNested) {\n recursivePositionInsertion(orama, doc[key], id, propName + '.', schema[key])\n }\n if (!(typeof doc[key] === 'string' && key in schema && !isSchemaNested)) {\n continue\n }\n orama.data.positions[id][propName] = Object.create(null)\n const text = doc[key] as string\n let regExResult: RegExpExecArray | null\n while ((regExResult = wordRegEx.exec(text)) !== null) {\n const word = regExResult[0].toLowerCase()\n const key = `${orama.tokenizer.language}:${word}`\n let token: string\n if (orama.tokenizer.normalizationCache.has(key)) {\n token = orama.tokenizer.normalizationCache.get(key)!\n } else {\n ;[token] = orama.tokenizer.tokenize(word)\n orama.tokenizer.normalizationCache.set(key, token)\n }\n if (!Array.isArray(orama.data.positions[id][propName][token])) {\n orama.data.positions[id][propName][token] = []\n }\n const start = regExResult.index\n const length = regExResult[0].length\n orama.data.positions[id][propName][token].push({ start, length })\n }\n }\n}\n\nexport async function searchWithHighlight<T extends AnyOrama, ResultDocument = TypedDocument<T>>(\n orama: T,\n params: SearchParamsFullText<T, ResultDocument>,\n language?: Language\n): Promise<SearchResultWithHighlight<ResultDocument>> {\n const result = await search(orama, params, language)\n const queryTokens: string[] = orama.tokenizer.tokenize(params.term ?? '', language)\n\n const hitsWithPosition: ResultWithPositions<ResultDocument>[] = []\n for (const hit of result.hits) {\n const hitPositions = Object.entries<any>((orama as OramaWithHighlight<T>).data.positions[hit.id])\n\n const hits: AnyDocument[] = []\n for (const [propName, tokens] of hitPositions) {\n const matchWithSearchTokens: [string, unknown][] = []\n\n const tokenEntries = Object.entries(tokens)\n for (const tokenEntry of tokenEntries) {\n const [token] = tokenEntry\n\n for (const queryToken of queryTokens) {\n if (params.tolerance) {\n const distance = boundedLevenshtein(token, queryToken, params.tolerance)\n if (distance.isBounded) {\n matchWithSearchTokens.push(tokenEntry)\n break\n }\n } else if (token.startsWith(queryToken)) {\n matchWithSearchTokens.push(tokenEntry)\n break\n }\n }\n }\n hits.push([propName, Object.fromEntries(matchWithSearchTokens)])\n }\n\n hitsWithPosition.push(Object.assign(hit, { positions: Object.fromEntries(hits) }))\n }\n\n result.hits = hitsWithPosition\n\n return result as SearchResultWithHighlight<ResultDocument>\n}\n\nexport function saveWithHighlight<T extends AnyOrama>(orama: T): RawDataWithPositions {\n const data = save(orama)\n\n return {\n ...data,\n positions: (orama as OramaWithHighlight<T>).data.positions\n }\n}\n\nexport function loadWithHighlight<T extends AnyOrama>(orama: T, raw: RawDataWithPositions): void {\n load(orama, raw)\n ;(orama as OramaWithHighlight<T>).data.positions = raw.positions\n}\n"],"mappings":"yaAAA,IAAAA,EAAA,GAAAC,EAAAD,EAAA,iBAAAE,EAAA,sBAAAC,EAAA,sBAAAC,EAAA,wBAAAC,IAAA,eAAAC,EAAAN,GAAA,IAAAO,EAYO,wBACPC,EAAmC,kCAuBnC,eAAsBN,EAAgCO,EAAUC,EAA2B,CACnF,cAAeD,EAAM,MACzB,OAAO,OAAOA,EAAM,KAAM,CAAE,UAAW,CAAC,CAAE,CAAC,EAG7C,MAAME,EACJF,EACC,MAAMA,EAAM,eAAe,IAAIA,EAAM,KAAK,KAAMC,CAAE,EACnDA,CACF,CACF,CAEA,IAAME,EAAY,qBAElB,eAAeD,EACbF,EACAI,EACAH,EACAI,EAAS,GACTC,EAAsBN,EAAM,OACb,CACfA,EAAM,KAAK,UAAUC,CAAE,EAAI,OAAO,OAAO,IAAI,EAC7C,QAAWM,KAAO,OAAO,KAAKH,CAAa,EAAkC,CAC3E,IAAMI,EAAW,OAAOJ,EAAIG,CAAG,GAAM,SAC/BE,EAAiB,OAAOH,EAAOC,CAAG,GAAM,SACxCG,EAAW,GAAGL,CAAM,GAAG,OAAOE,CAAG,CAAC,GAIxC,GAHIC,GAAYD,KAAOD,GAAUG,GAC/BP,EAA2BF,EAAOI,EAAIG,CAAG,EAAGN,EAAIS,EAAW,IAAKJ,EAAOC,CAAG,CAAC,EAEzE,EAAE,OAAOH,EAAIG,CAAG,GAAM,UAAYA,KAAOD,GAAU,CAACG,GACtD,SAEFT,EAAM,KAAK,UAAUC,CAAE,EAAES,CAAQ,EAAI,OAAO,OAAO,IAAI,EACvD,IAAMC,EAAOP,EAAIG,CAAG,EAChBK,EACJ,MAAQA,EAAcT,EAAU,KAAKQ,CAAI,KAAO,MAAM,CACpD,IAAME,EAAOD,EAAY,CAAC,EAAE,YAAY,EAClCL,EAAM,GAAGP,EAAM,UAAU,QAAQ,IAAIa,CAAI,GAC3CC,EACAd,EAAM,UAAU,mBAAmB,IAAIO,CAAG,EAC5CO,EAAQd,EAAM,UAAU,mBAAmB,IAAIO,CAAG,GAEjD,CAACO,CAAK,EAAId,EAAM,UAAU,SAASa,CAAI,EACxCb,EAAM,UAAU,mBAAmB,IAAIO,EAAKO,CAAK,GAE9C,MAAM,QAAQd,EAAM,KAAK,UAAUC,CAAE,EAAES,CAAQ,EAAEI,CAAK,CAAC,IAC1Dd,EAAM,KAAK,UAAUC,CAAE,EAAES,CAAQ,EAAEI,CAAK,EAAI,CAAC,GAE/C,IAAMC,EAAQH,EAAY,MACpBI,EAASJ,EAAY,CAAC,EAAE,OAC9BZ,EAAM,KAAK,UAAUC,CAAE,EAAES,CAAQ,EAAEI,CAAK,EAAE,KAAK,CAAE,MAAAC,EAAO,OAAAC,CAAO,CAAC,CAClE,CACF,CACF,CAEA,eAAsBpB,EACpBI,EACAiB,EACAC,EACoD,CACpD,IAAMC,EAAS,QAAM,UAAOnB,EAAOiB,EAAQC,CAAQ,EAC7CE,EAAwBpB,EAAM,UAAU,SAASiB,EAAO,MAAQ,GAAIC,CAAQ,EAE5EG,EAA0D,CAAC,EACjE,QAAWC,KAAOH,EAAO,KAAM,CAC7B,IAAMI,EAAe,OAAO,QAAcvB,EAAgC,KAAK,UAAUsB,EAAI,EAAE,CAAC,EAE1FE,EAAsB,CAAC,EAC7B,OAAW,CAACd,EAAUe,CAAM,IAAKF,EAAc,CAC7C,IAAMG,EAA6C,CAAC,EAE9CC,EAAe,OAAO,QAAQF,CAAM,EAC1C,QAAWG,KAAcD,EAAc,CACrC,GAAM,CAACb,CAAK,EAAIc,EAEhB,QAAWC,KAAcT,EACvB,GAAIH,EAAO,WAET,MADiB,sBAAmBH,EAAOe,EAAYZ,EAAO,SAAS,EAC1D,UAAW,CACtBS,EAAsB,KAAKE,CAAU,EACrC,KACF,UACSd,EAAM,WAAWe,CAAU,EAAG,CACvCH,EAAsB,KAAKE,CAAU,EACrC,KACF,CAEJ,CACAJ,EAAK,KAAK,CAACd,EAAU,OAAO,YAAYgB,CAAqB,CAAC,CAAC,CACjE,CAEAL,EAAiB,KAAK,OAAO,OAAOC,EAAK,CAAE,UAAW,OAAO,YAAYE,CAAI,CAAE,CAAC,CAAC,CACnF,CAEA,OAAAL,EAAO,KAAOE,EAEPF,CACT,CAEO,SAASxB,EAAsCK,EAAgC,CAGpF,MAAO,CACL,MAHW,QAAKA,CAAK,EAIrB,UAAYA,EAAgC,KAAK,SACnD,CACF,CAEO,SAASN,EAAsCM,EAAU8B,EAAiC,IAC/F,QAAK9B,EAAO8B,CAAG,EACb9B,EAAgC,KAAK,UAAY8B,EAAI,SACzD","names":["src_exports","__export","afterInsert","loadWithHighlight","saveWithHighlight","searchWithHighlight","__toCommonJS","import_orama","import_internals","orama","id","recursivePositionInsertion","wordRegEx","doc","prefix","schema","key","isNested","isSchemaNested","propName","text","regExResult","word","token","start","length","params","language","result","queryTokens","hitsWithPosition","hit","hitPositions","hits","tokens","matchWithSearchTokens","tokenEntries","tokenEntry","queryToken","raw"]}