@orama/orama
Version:
A complete search engine and RAG pipeline in your browser, server, or edge network with support for full-text, vector, and hybrid search in less than 2kb.
240 lines • 11.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.insert = insert;
exports.insertMultiple = insertMultiple;
exports.innerInsertMultiple = innerInsertMultiple;
const components_js_1 = require("../components.js");
const utils_js_1 = require("../utils.js");
const hooks_js_1 = require("../components/hooks.js");
const errors_js_1 = require("../errors.js");
const internal_document_id_store_js_1 = require("../components/internal-document-id-store.js");
function insert(orama, doc, language, skipHooks, options) {
const errorProperty = orama.validateSchema(doc, orama.schema);
if (errorProperty) {
throw (0, errors_js_1.createError)('SCHEMA_VALIDATION_FAILURE', errorProperty);
}
const asyncNeeded = (0, utils_js_1.isAsyncFunction)(orama.beforeInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.afterInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.beforeInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.insert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.afterInsert);
if (asyncNeeded) {
return innerInsertAsync(orama, doc, language, skipHooks, options);
}
return innerInsertSync(orama, doc, language, skipHooks, options);
}
const ENUM_TYPE = new Set(['enum', 'enum[]']);
const STRING_NUMBER_TYPE = new Set(['string', 'number']);
async function innerInsertAsync(orama, doc, language, skipHooks, options) {
const { index, docs } = orama.data;
const id = orama.getDocumentIndexId(doc);
if (typeof id !== 'string') {
throw (0, errors_js_1.createError)('DOCUMENT_ID_MUST_BE_STRING', typeof id);
}
const internalId = (0, internal_document_id_store_js_1.getInternalDocumentId)(orama.internalDocumentIDStore, id);
if (!orama.documentsStore.store(docs, id, internalId, doc)) {
throw (0, errors_js_1.createError)('DOCUMENT_ALREADY_EXISTS', id);
}
const docsCount = orama.documentsStore.count(docs);
if (!skipHooks) {
await (0, hooks_js_1.runSingleHook)(orama.beforeInsert, orama, id, doc);
}
const indexableProperties = orama.index.getSearchableProperties(index);
const indexablePropertiesWithTypes = orama.index.getSearchablePropertiesWithTypes(index);
const indexableValues = orama.getDocumentProperties(doc, indexableProperties);
for (const [key, value] of Object.entries(indexableValues)) {
if (typeof value === 'undefined')
continue;
const actualType = typeof value;
const expectedType = indexablePropertiesWithTypes[key];
validateDocumentProperty(actualType, expectedType, key, value);
}
await indexAndSortDocument(orama, id, indexableProperties, indexableValues, docsCount, language, doc, options);
if (!skipHooks) {
await (0, hooks_js_1.runSingleHook)(orama.afterInsert, orama, id, doc);
}
return id;
}
function innerInsertSync(orama, doc, language, skipHooks, options) {
const { index, docs } = orama.data;
const id = orama.getDocumentIndexId(doc);
if (typeof id !== 'string') {
throw (0, errors_js_1.createError)('DOCUMENT_ID_MUST_BE_STRING', typeof id);
}
const internalId = (0, internal_document_id_store_js_1.getInternalDocumentId)(orama.internalDocumentIDStore, id);
if (!orama.documentsStore.store(docs, id, internalId, doc)) {
throw (0, errors_js_1.createError)('DOCUMENT_ALREADY_EXISTS', id);
}
const docsCount = orama.documentsStore.count(docs);
if (!skipHooks) {
(0, hooks_js_1.runSingleHook)(orama.beforeInsert, orama, id, doc);
}
const indexableProperties = orama.index.getSearchableProperties(index);
const indexablePropertiesWithTypes = orama.index.getSearchablePropertiesWithTypes(index);
const indexableValues = orama.getDocumentProperties(doc, indexableProperties);
for (const [key, value] of Object.entries(indexableValues)) {
if (typeof value === 'undefined')
continue;
const actualType = typeof value;
const expectedType = indexablePropertiesWithTypes[key];
validateDocumentProperty(actualType, expectedType, key, value);
}
indexAndSortDocumentSync(orama, id, indexableProperties, indexableValues, docsCount, language, doc, options);
if (!skipHooks) {
(0, hooks_js_1.runSingleHook)(orama.afterInsert, orama, id, doc);
}
return id;
}
function validateDocumentProperty(actualType, expectedType, key, value) {
if ((0, components_js_1.isGeoPointType)(expectedType) &&
typeof value === 'object' &&
typeof value.lon === 'number' &&
typeof value.lat === 'number') {
return;
}
if ((0, components_js_1.isVectorType)(expectedType) && Array.isArray(value))
return;
if ((0, components_js_1.isArrayType)(expectedType) && Array.isArray(value))
return;
if (ENUM_TYPE.has(expectedType) && STRING_NUMBER_TYPE.has(actualType))
return;
if (actualType !== expectedType) {
throw (0, errors_js_1.createError)('INVALID_DOCUMENT_PROPERTY', key, expectedType, actualType);
}
}
async function indexAndSortDocument(orama, id, indexableProperties, indexableValues, docsCount, language, doc, options) {
for (const prop of indexableProperties) {
const value = indexableValues[prop];
if (typeof value === 'undefined')
continue;
const expectedType = orama.index.getSearchablePropertiesWithTypes(orama.data.index)[prop];
await orama.index.beforeInsert?.(orama.data.index, prop, id, value, expectedType, language, orama.tokenizer, docsCount);
const internalId = orama.internalDocumentIDStore.idToInternalId.get(id);
await orama.index.insert(orama.index, orama.data.index, prop, id, internalId, value, expectedType, language, orama.tokenizer, docsCount, options);
await orama.index.afterInsert?.(orama.data.index, prop, id, value, expectedType, language, orama.tokenizer, docsCount);
}
const sortableProperties = orama.sorter.getSortableProperties(orama.data.sorting);
const sortableValues = orama.getDocumentProperties(doc, sortableProperties);
for (const prop of sortableProperties) {
const value = sortableValues[prop];
if (typeof value === 'undefined')
continue;
const expectedType = orama.sorter.getSortablePropertiesWithTypes(orama.data.sorting)[prop];
orama.sorter.insert(orama.data.sorting, prop, id, value, expectedType, language);
}
}
function indexAndSortDocumentSync(orama, id, indexableProperties, indexableValues, docsCount, language, doc, options) {
for (const prop of indexableProperties) {
const value = indexableValues[prop];
if (typeof value === 'undefined')
continue;
const expectedType = orama.index.getSearchablePropertiesWithTypes(orama.data.index)[prop];
const internalDocumentId = (0, internal_document_id_store_js_1.getInternalDocumentId)(orama.internalDocumentIDStore, id);
orama.index.beforeInsert?.(orama.data.index, prop, id, value, expectedType, language, orama.tokenizer, docsCount);
orama.index.insert(orama.index, orama.data.index, prop, id, internalDocumentId, value, expectedType, language, orama.tokenizer, docsCount, options);
orama.index.afterInsert?.(orama.data.index, prop, id, value, expectedType, language, orama.tokenizer, docsCount);
}
const sortableProperties = orama.sorter.getSortableProperties(orama.data.sorting);
const sortableValues = orama.getDocumentProperties(doc, sortableProperties);
for (const prop of sortableProperties) {
const value = sortableValues[prop];
if (typeof value === 'undefined')
continue;
const expectedType = orama.sorter.getSortablePropertiesWithTypes(orama.data.sorting)[prop];
orama.sorter.insert(orama.data.sorting, prop, id, value, expectedType, language);
}
}
function insertMultiple(orama, docs, batchSize, language, skipHooks, timeout) {
const asyncNeeded = (0, utils_js_1.isAsyncFunction)(orama.afterInsertMultiple) ||
(0, utils_js_1.isAsyncFunction)(orama.beforeInsertMultiple) ||
(0, utils_js_1.isAsyncFunction)(orama.index.beforeInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.insert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.afterInsert);
if (asyncNeeded) {
return innerInsertMultipleAsync(orama, docs, batchSize, language, skipHooks, timeout);
}
return innerInsertMultipleSync(orama, docs, batchSize, language, skipHooks, timeout);
}
async function innerInsertMultipleAsync(orama, docs, batchSize = 1000, language, skipHooks, timeout = 0) {
const ids = [];
const processNextBatch = async (startIndex) => {
const endIndex = Math.min(startIndex + batchSize, docs.length);
const batch = docs.slice(startIndex, endIndex);
for (const doc of batch) {
const options = { avlRebalanceThreshold: batch.length };
const id = await insert(orama, doc, language, skipHooks, options);
ids.push(id);
}
return endIndex;
};
const processAllBatches = async () => {
let currentIndex = 0;
while (currentIndex < docs.length) {
const startTime = Date.now();
currentIndex = await processNextBatch(currentIndex);
if (timeout > 0) {
const elapsedTime = Date.now() - startTime;
const waitTime = timeout - elapsedTime;
if (waitTime > 0) {
(0, utils_js_1.sleep)(waitTime);
}
}
}
};
await processAllBatches();
if (!skipHooks) {
await (0, hooks_js_1.runMultipleHook)(orama.afterInsertMultiple, orama, docs);
}
return ids;
}
function innerInsertMultipleSync(orama, docs, batchSize = 1000, language, skipHooks, timeout = 0) {
const ids = [];
let i = 0;
function processNextBatch() {
const batch = docs.slice(i * batchSize, (i + 1) * batchSize);
if (batch.length === 0)
return false;
for (const doc of batch) {
const options = { avlRebalanceThreshold: batch.length };
const id = insert(orama, doc, language, skipHooks, options);
ids.push(id);
}
i++;
return true;
}
function processAllBatches() {
const startTime = Date.now();
// eslint-disable-next-line no-constant-condition
while (true) {
const hasMoreBatches = processNextBatch();
if (!hasMoreBatches)
break;
if (timeout > 0) {
const elapsedTime = Date.now() - startTime;
if (elapsedTime >= timeout) {
const remainingTime = timeout - (elapsedTime % timeout);
if (remainingTime > 0) {
(0, utils_js_1.sleep)(remainingTime);
}
}
}
}
}
processAllBatches();
if (!skipHooks) {
(0, hooks_js_1.runMultipleHook)(orama.afterInsertMultiple, orama, docs);
}
return ids;
}
function innerInsertMultiple(orama, docs, batchSize, language, skipHooks, timeout) {
const asyncNeeded = (0, utils_js_1.isAsyncFunction)(orama.beforeInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.afterInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.beforeInsert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.insert) ||
(0, utils_js_1.isAsyncFunction)(orama.index.afterInsert);
if (asyncNeeded) {
return innerInsertMultipleAsync(orama, docs, batchSize, language, skipHooks, timeout);
}
return innerInsertMultipleSync(orama, docs, batchSize, language, skipHooks, timeout);
}
//# sourceMappingURL=insert.js.map