@orama/orama
Version:
A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.
208 lines • 9.57 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.defaultBM25Params = void 0;
exports.innerFullTextSearch = innerFullTextSearch;
exports.fullTextSearch = fullTextSearch;
const facets_js_1 = require("../components/facets.js");
const groups_js_1 = require("../components/groups.js");
const hooks_js_1 = require("../components/hooks.js");
const internal_document_id_store_js_1 = require("../components/internal-document-id-store.js");
const index_js_1 = require("../components/index.js");
const pinning_manager_js_1 = require("../components/pinning-manager.js");
const errors_js_1 = require("../errors.js");
const utils_js_1 = require("../utils.js");
const docs_js_1 = require("./docs.js");
const search_js_1 = require("./search.js");
function innerFullTextSearch(orama, params, language) {
const { term, properties } = params;
const index = orama.data.index;
// Get searchable string properties
let propertiesToSearch = orama.caches['propertiesToSearch'];
if (!propertiesToSearch) {
const propertiesToSearchWithTypes = orama.index.getSearchablePropertiesWithTypes(index);
propertiesToSearch = orama.index.getSearchableProperties(index);
propertiesToSearch = propertiesToSearch.filter((prop) => propertiesToSearchWithTypes[prop].startsWith('string'));
orama.caches['propertiesToSearch'] = propertiesToSearch;
}
if (properties && properties !== '*') {
for (const prop of properties) {
if (!propertiesToSearch.includes(prop)) {
throw (0, errors_js_1.createError)('UNKNOWN_INDEX', prop, propertiesToSearch.join(', '));
}
}
propertiesToSearch = propertiesToSearch.filter((prop) => properties.includes(prop));
}
// If filters are enabled, we need to get the IDs of the documents that match the filters.
const hasFilters = Object.keys(params.where ?? {}).length > 0;
let whereFiltersIDs;
if (hasFilters) {
whereFiltersIDs = orama.index.searchByWhereClause(index, orama.tokenizer, params.where, language);
}
let uniqueDocsIDs;
// We need to perform the search if:
// - we have a search term
// - or we have properties to search
// in this case, we need to return all the documents that contains at least one of the given properties
const threshold = params.threshold !== undefined && params.threshold !== null ? params.threshold : 1;
if (term || properties) {
const docsCount = (0, docs_js_1.count)(orama);
uniqueDocsIDs = orama.index.search(index, term || '', orama.tokenizer, language, propertiesToSearch, params.exact || false, params.tolerance || 0, params.boost || {}, applyDefault(params.relevance), docsCount, whereFiltersIDs, threshold);
// When exact is true and we have a term, filter results to only include documents
// where the original text contains the exact search term (case-sensitive).
// This is a highly requested feature and although Orama is not case-sensitive by design,
// this is a reasonable compromise.
if (params.exact && term) {
const searchTerms = term.trim().split(/\s+/);
uniqueDocsIDs = uniqueDocsIDs.filter(([docId]) => {
const doc = orama.documentsStore.get(orama.data.docs, docId);
if (!doc)
return false;
// Check if any of the specified properties contain the exact search term
for (const prop of propertiesToSearch) {
const propValue = getPropValue(doc, prop);
if (typeof propValue === 'string') {
// Check if all search terms appear as complete words in the property value
const hasAllTerms = searchTerms.every((searchTerm) => {
// Create a regex that matches the term as a complete word (case-sensitive)
const regex = new RegExp(`\\b${escapeRegex(searchTerm)}\\b`);
return regex.test(propValue);
});
if (hasAllTerms) {
return true;
}
}
}
return false;
});
}
}
else {
// Check if this is a geosearch-only query first
if (hasFilters) {
const geoResults = (0, index_js_1.searchByGeoWhereClause)(index, params.where);
if (geoResults) {
// This is a geosearch-only query with distance scoring
uniqueDocsIDs = geoResults;
}
else {
// Regular filter query without search term
const docIds = whereFiltersIDs ? Array.from(whereFiltersIDs) : [];
uniqueDocsIDs = docIds.map((k) => [+k, 0]);
}
}
else {
// No search term and no filters - return all documents
const docIds = Object.keys(orama.documentsStore.getAll(orama.data.docs));
uniqueDocsIDs = docIds.map((k) => [+k, 0]);
}
}
return uniqueDocsIDs;
}
// Helper function to escape regex special characters
function escapeRegex(str) {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
// Helper function to get nested property value
function getPropValue(obj, path) {
const keys = path.split('.');
let value = obj;
for (const key of keys) {
if (value && typeof value === 'object' && key in value) {
value = value[key];
}
else {
return undefined;
}
}
return value;
}
function fullTextSearch(orama, params, language) {
const timeStart = (0, utils_js_1.getNanosecondsTime)();
function performSearchLogic() {
const vectorProperties = Object.keys(orama.data.index.vectorIndexes);
const shouldCalculateFacets = params.facets && Object.keys(params.facets).length > 0;
const { limit = 10, offset = 0, distinctOn, includeVectors = false } = params;
const isPreflight = params.preflight === true;
let uniqueDocsArray = innerFullTextSearch(orama, params, language);
if (params.sortBy) {
if (typeof params.sortBy === 'function') {
const ids = uniqueDocsArray.map(([id]) => id);
const docs = orama.documentsStore.getMultiple(orama.data.docs, ids);
const docsWithIdAndScore = docs.map((d, i) => [
uniqueDocsArray[i][0],
uniqueDocsArray[i][1],
d
]);
docsWithIdAndScore.sort(params.sortBy);
uniqueDocsArray = docsWithIdAndScore.map(([id, score]) => [id, score]);
}
else {
uniqueDocsArray = orama.sorter
.sortBy(orama.data.sorting, uniqueDocsArray, params.sortBy)
.map(([id, score]) => [(0, internal_document_id_store_js_1.getInternalDocumentId)(orama.internalDocumentIDStore, id), score]);
}
}
else {
uniqueDocsArray = uniqueDocsArray.sort(utils_js_1.sortTokenScorePredicate);
}
// Apply pinning rules after sorting but before pagination
uniqueDocsArray = (0, pinning_manager_js_1.applyPinningRules)(orama, orama.data.pinning, uniqueDocsArray, params.term);
let results;
if (!isPreflight) {
results = distinctOn
? (0, search_js_1.fetchDocumentsWithDistinct)(orama, uniqueDocsArray, offset, limit, distinctOn)
: (0, search_js_1.fetchDocuments)(orama, uniqueDocsArray, offset, limit);
}
const searchResult = {
elapsed: {
formatted: '',
raw: 0
},
hits: [],
count: uniqueDocsArray.length
};
if (typeof results !== 'undefined') {
searchResult.hits = results.filter(Boolean);
if (!includeVectors) {
(0, utils_js_1.removeVectorsFromHits)(searchResult, vectorProperties);
}
}
if (shouldCalculateFacets) {
const facets = (0, facets_js_1.getFacets)(orama, uniqueDocsArray, params.facets);
searchResult.facets = facets;
}
if (params.groupBy) {
searchResult.groups = (0, groups_js_1.getGroups)(orama, uniqueDocsArray, params.groupBy);
}
searchResult.elapsed = orama.formatElapsedTime((0, utils_js_1.getNanosecondsTime)() - timeStart);
return searchResult;
}
async function executeSearchAsync() {
if (orama.beforeSearch) {
await (0, hooks_js_1.runBeforeSearch)(orama.beforeSearch, orama, params, language);
}
const searchResult = performSearchLogic();
if (orama.afterSearch) {
await (0, hooks_js_1.runAfterSearch)(orama.afterSearch, orama, params, language, searchResult);
}
return searchResult;
}
const asyncNeeded = orama.beforeSearch?.length || orama.afterSearch?.length;
if (asyncNeeded) {
return executeSearchAsync();
}
return performSearchLogic();
}
exports.defaultBM25Params = {
k: 1.2,
b: 0.75,
d: 0.5
};
function applyDefault(bm25Relevance) {
const r = bm25Relevance ?? {};
r.k = r.k ?? exports.defaultBM25Params.k;
r.b = r.b ?? exports.defaultBM25Params.b;
r.d = r.d ?? exports.defaultBM25Params.d;
return r;
}
//# sourceMappingURL=search-fulltext.js.map