@orama/orama
Version:
A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.
132 lines • 5.35 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getGroups = getGroups;
const errors_js_1 = require("../errors.js");
const utils_js_1 = require("../utils.js");
const internal_document_id_store_js_1 = require("./internal-document-id-store.js");
const DEFAULT_REDUCE = {
reducer: (_, acc, res, index) => {
acc[index] = res;
return acc;
},
getInitialValue: (length) => Array.from({ length })
};
const ALLOWED_TYPES = ['string', 'number', 'boolean'];
function getGroups(orama, results, groupBy) {
const properties = groupBy.properties;
const propertiesLength = properties.length;
const schemaProperties = orama.index.getSearchablePropertiesWithTypes(orama.data.index);
for (let i = 0; i < propertiesLength; i++) {
const property = properties[i];
if (typeof schemaProperties[property] === 'undefined') {
throw (0, errors_js_1.createError)('UNKNOWN_GROUP_BY_PROPERTY', property);
}
if (!ALLOWED_TYPES.includes(schemaProperties[property])) {
throw (0, errors_js_1.createError)('INVALID_GROUP_BY_PROPERTY', property, ALLOWED_TYPES.join(', '), schemaProperties[property]);
}
}
const allIDs = results.map(([id]) => (0, internal_document_id_store_js_1.getDocumentIdFromInternalId)(orama.internalDocumentIDStore, id));
// allDocs is already sorted by the sortBy algorithm
// We leverage on that to limit the number of documents returned
const allDocs = orama.documentsStore.getMultiple(orama.data.docs, allIDs);
const allDocsLength = allDocs.length;
const returnedCount = groupBy.maxResult || Number.MAX_SAFE_INTEGER;
const listOfValues = [];
// We want to understand which documents have which values
// and group them by the property and values
const g = {};
for (let i = 0; i < propertiesLength; i++) {
const groupByKey = properties[i];
const group = {
property: groupByKey,
perValue: {}
};
const values = new Set();
for (let j = 0; j < allDocsLength; j++) {
const doc = allDocs[j];
const value = (0, utils_js_1.getNested)(doc, groupByKey);
// we don't want to consider undefined values
if (typeof value === 'undefined') {
continue;
}
const keyValue = typeof value !== 'boolean' ? value : '' + value;
const perValue = group.perValue[keyValue] ?? {
indexes: [],
count: 0
};
if (perValue.count >= returnedCount) {
// We stop early because for this value we react the limit
continue;
}
// We use the index to keep track of the original order
perValue.indexes.push(j);
perValue.count++;
group.perValue[keyValue] = perValue;
values.add(value);
}
listOfValues.push(Array.from(values));
g[groupByKey] = group;
}
const combinations = calculateCombination(listOfValues);
const combinationsLength = combinations.length;
const groups = [];
for (let i = 0; i < combinationsLength; i++) {
const combination = combinations[i];
const combinationLength = combination.length;
const group = {
values: [],
indexes: []
};
const indexes = [];
for (let j = 0; j < combinationLength; j++) {
const value = combination[j];
const property = properties[j];
indexes.push(g[property].perValue[(typeof value !== 'boolean' ? value : '' + value)].indexes);
group.values.push(value);
}
// We leverage on the index to sort the results by the original order
group.indexes = (0, utils_js_1.intersect)(indexes).sort((a, b) => a - b);
// don't generate empty groups
if (group.indexes.length === 0) {
continue;
}
groups.push(group);
}
const groupsLength = groups.length;
const res = Array.from({ length: groupsLength });
for (let i = 0; i < groupsLength; i++) {
const group = groups[i];
const reduce = (groupBy.reduce || DEFAULT_REDUCE);
const docs = group.indexes.map((index) => {
return {
id: allIDs[index],
score: results[index][1],
document: allDocs[index]
};
});
const func = reduce.reducer.bind(null, group.values);
const initialValue = reduce.getInitialValue(group.indexes.length);
const aggregationValue = docs.reduce(func, initialValue);
res[i] = {
values: group.values,
result: aggregationValue
};
}
return res;
}
function calculateCombination(arrs, index = 0) {
if (index + 1 === arrs.length)
return arrs[index].map((item) => [item]);
const head = arrs[index];
const c = calculateCombination(arrs, index + 1);
const combinations = [];
for (const value of head) {
for (const combination of c) {
const result = [value];
(0, utils_js_1.safeArrayPush)(result, combination);
combinations.push(result);
}
}
return combinations;
}
//# sourceMappingURL=groups.js.map