UNPKG

@orama/orama

Version:

A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.

87 lines 3.55 kB
import { getDocumentIdFromInternalId } from '../components/internal-document-id-store.js'; import { createError } from '../errors.js'; import { getNested } from '../utils.js'; import { MODE_FULLTEXT_SEARCH, MODE_HYBRID_SEARCH, MODE_VECTOR_SEARCH } from '../constants.js'; import { fullTextSearch } from './search-fulltext.js'; import { searchVector } from './search-vector.js'; import { hybridSearch } from './search-hybrid.js'; export function search(orama, params, language) { const mode = params.mode ?? MODE_FULLTEXT_SEARCH; if (mode === MODE_FULLTEXT_SEARCH) { return fullTextSearch(orama, params, language); } if (mode === MODE_VECTOR_SEARCH) { return searchVector(orama, params); } if (mode === MODE_HYBRID_SEARCH) { return hybridSearch(orama, params); } throw createError('INVALID_SEARCH_MODE', mode); } export function fetchDocumentsWithDistinct(orama, uniqueDocsArray, offset, limit, distinctOn) { const docs = orama.data.docs; // Keep track which values we already seen const values = new Map(); // We cannot know how many results we will have in the end, // so we need cannot pre-allocate the array. const results = []; const resultIDs = new Set(); const uniqueDocsArrayLength = uniqueDocsArray.length; let count = 0; for (let i = 0; i < uniqueDocsArrayLength; i++) { const idAndScore = uniqueDocsArray[i]; // If there are no more results, just break the loop if (typeof idAndScore === 'undefined') { continue; } const [id, score] = idAndScore; if (resultIDs.has(id)) { continue; } const doc = orama.documentsStore.get(docs, id); const value = getNested(doc, distinctOn); if (typeof value === 'undefined' || values.has(value)) { continue; } values.set(value, true); count++; // We shouldn't consider the document if it's not in the offset range if (count <= offset) { continue; } results.push({ id: getDocumentIdFromInternalId(orama.internalDocumentIDStore, id), score, document: doc }); resultIDs.add(id); // reached the limit, break the loop if (count >= offset + limit) { break; } } return results; } export function fetchDocuments(orama, uniqueDocsArray, offset, limit) { const docs = orama.data.docs; const results = Array.from({ length: limit }); const resultIDs = new Set(); // We already have the list of ALL the document IDs containing the search terms. // We loop over them starting from a positional value "offset" and ending at "offset + limit" // to provide pagination capabilities to the search. for (let i = offset; i < limit + offset; i++) { const idAndScore = uniqueDocsArray[i]; // If there are no more results, just break the loop if (typeof idAndScore === 'undefined') { break; } const [id, score] = idAndScore; if (!resultIDs.has(id)) { // We retrieve the full document only AFTER making sure that we really want it. // We never retrieve the full document preventively. const fullDoc = orama.documentsStore.get(docs, id); results[i] = { id: getDocumentIdFromInternalId(orama.internalDocumentIDStore, id), score, document: fullDoc }; resultIDs.add(id); } } return results; } //# sourceMappingURL=search.js.map