UNPKG

@orama/orama

Version:

A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.

208 lines 9.57 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.defaultBM25Params = void 0; exports.innerFullTextSearch = innerFullTextSearch; exports.fullTextSearch = fullTextSearch; const facets_js_1 = require("../components/facets.js"); const groups_js_1 = require("../components/groups.js"); const hooks_js_1 = require("../components/hooks.js"); const internal_document_id_store_js_1 = require("../components/internal-document-id-store.js"); const index_js_1 = require("../components/index.js"); const pinning_manager_js_1 = require("../components/pinning-manager.js"); const errors_js_1 = require("../errors.js"); const utils_js_1 = require("../utils.js"); const docs_js_1 = require("./docs.js"); const search_js_1 = require("./search.js"); function innerFullTextSearch(orama, params, language) { const { term, properties } = params; const index = orama.data.index; // Get searchable string properties let propertiesToSearch = orama.caches['propertiesToSearch']; if (!propertiesToSearch) { const propertiesToSearchWithTypes = orama.index.getSearchablePropertiesWithTypes(index); propertiesToSearch = orama.index.getSearchableProperties(index); propertiesToSearch = propertiesToSearch.filter((prop) => propertiesToSearchWithTypes[prop].startsWith('string')); orama.caches['propertiesToSearch'] = propertiesToSearch; } if (properties && properties !== '*') { for (const prop of properties) { if (!propertiesToSearch.includes(prop)) { throw (0, errors_js_1.createError)('UNKNOWN_INDEX', prop, propertiesToSearch.join(', ')); } } propertiesToSearch = propertiesToSearch.filter((prop) => properties.includes(prop)); } // If filters are enabled, we need to get the IDs of the documents that match the filters. const hasFilters = Object.keys(params.where ?? {}).length > 0; let whereFiltersIDs; if (hasFilters) { whereFiltersIDs = orama.index.searchByWhereClause(index, orama.tokenizer, params.where, language); } let uniqueDocsIDs; // We need to perform the search if: // - we have a search term // - or we have properties to search // in this case, we need to return all the documents that contains at least one of the given properties const threshold = params.threshold !== undefined && params.threshold !== null ? params.threshold : 1; if (term || properties) { const docsCount = (0, docs_js_1.count)(orama); uniqueDocsIDs = orama.index.search(index, term || '', orama.tokenizer, language, propertiesToSearch, params.exact || false, params.tolerance || 0, params.boost || {}, applyDefault(params.relevance), docsCount, whereFiltersIDs, threshold); // When exact is true and we have a term, filter results to only include documents // where the original text contains the exact search term (case-sensitive). // This is a highly requested feature and although Orama is not case-sensitive by design, // this is a reasonable compromise. if (params.exact && term) { const searchTerms = term.trim().split(/\s+/); uniqueDocsIDs = uniqueDocsIDs.filter(([docId]) => { const doc = orama.documentsStore.get(orama.data.docs, docId); if (!doc) return false; // Check if any of the specified properties contain the exact search term for (const prop of propertiesToSearch) { const propValue = getPropValue(doc, prop); if (typeof propValue === 'string') { // Check if all search terms appear as complete words in the property value const hasAllTerms = searchTerms.every((searchTerm) => { // Create a regex that matches the term as a complete word (case-sensitive) const regex = new RegExp(`\\b${escapeRegex(searchTerm)}\\b`); return regex.test(propValue); }); if (hasAllTerms) { return true; } } } return false; }); } } else { // Check if this is a geosearch-only query first if (hasFilters) { const geoResults = (0, index_js_1.searchByGeoWhereClause)(index, params.where); if (geoResults) { // This is a geosearch-only query with distance scoring uniqueDocsIDs = geoResults; } else { // Regular filter query without search term const docIds = whereFiltersIDs ? Array.from(whereFiltersIDs) : []; uniqueDocsIDs = docIds.map((k) => [+k, 0]); } } else { // No search term and no filters - return all documents const docIds = Object.keys(orama.documentsStore.getAll(orama.data.docs)); uniqueDocsIDs = docIds.map((k) => [+k, 0]); } } return uniqueDocsIDs; } // Helper function to escape regex special characters function escapeRegex(str) { return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } // Helper function to get nested property value function getPropValue(obj, path) { const keys = path.split('.'); let value = obj; for (const key of keys) { if (value && typeof value === 'object' && key in value) { value = value[key]; } else { return undefined; } } return value; } function fullTextSearch(orama, params, language) { const timeStart = (0, utils_js_1.getNanosecondsTime)(); function performSearchLogic() { const vectorProperties = Object.keys(orama.data.index.vectorIndexes); const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0; const { limit = 10, offset = 0, distinctOn, includeVectors = false } = params; const isPreflight = params.preflight === true; let uniqueDocsArray = innerFullTextSearch(orama, params, language); if (params.sortBy) { if (typeof params.sortBy === 'function') { const ids = uniqueDocsArray.map(([id]) => id); const docs = orama.documentsStore.getMultiple(orama.data.docs, ids); const docsWithIdAndScore = docs.map((d, i) => [ uniqueDocsArray[i][0], uniqueDocsArray[i][1], d ]); docsWithIdAndScore.sort(params.sortBy); uniqueDocsArray = docsWithIdAndScore.map(([id, score]) => [id, score]); } else { uniqueDocsArray = orama.sorter .sortBy(orama.data.sorting, uniqueDocsArray, params.sortBy) .map(([id, score]) => [(0, internal_document_id_store_js_1.getInternalDocumentId)(orama.internalDocumentIDStore, id), score]); } } else { uniqueDocsArray = uniqueDocsArray.sort(utils_js_1.sortTokenScorePredicate); } // Apply pinning rules after sorting but before pagination uniqueDocsArray = (0, pinning_manager_js_1.applyPinningRules)(orama, orama.data.pinning, uniqueDocsArray, params.term); let results; if (!isPreflight) { results = distinctOn ? (0, search_js_1.fetchDocumentsWithDistinct)(orama, uniqueDocsArray, offset, limit, distinctOn) : (0, search_js_1.fetchDocuments)(orama, uniqueDocsArray, offset, limit); } const searchResult = { elapsed: { formatted: '', raw: 0 }, hits: [], count: uniqueDocsArray.length }; if (typeof results !== 'undefined') { searchResult.hits = results.filter(Boolean); if (!includeVectors) { (0, utils_js_1.removeVectorsFromHits)(searchResult, vectorProperties); } } if (shouldCalculateFacets) { const facets = (0, facets_js_1.getFacets)(orama, uniqueDocsArray, params.facets); searchResult.facets = facets; } if (params.groupBy) { searchResult.groups = (0, groups_js_1.getGroups)(orama, uniqueDocsArray, params.groupBy); } searchResult.elapsed = orama.formatElapsedTime((0, utils_js_1.getNanosecondsTime)() - timeStart); return searchResult; } async function executeSearchAsync() { if (orama.beforeSearch) { await (0, hooks_js_1.runBeforeSearch)(orama.beforeSearch, orama, params, language); } const searchResult = performSearchLogic(); if (orama.afterSearch) { await (0, hooks_js_1.runAfterSearch)(orama.afterSearch, orama, params, language, searchResult); } return searchResult; } const asyncNeeded = orama.beforeSearch?.length || orama.afterSearch?.length; if (asyncNeeded) { return executeSearchAsync(); } return performSearchLogic(); } exports.defaultBM25Params = { k: 1.2, b: 0.75, d: 0.5 }; function applyDefault(bm25Relevance) { const r = bm25Relevance ?? {}; r.k = r.k ?? exports.defaultBM25Params.k; r.b = r.b ?? exports.defaultBM25Params.b; r.d = r.d ?? exports.defaultBM25Params.d; return r; } //# sourceMappingURL=search-fulltext.js.map