@andrejs1979/document
Version:
MongoDB-compatible document database for NoSQL
555 lines • 21.5 kB
JavaScript
/**
* NoSQL - Bulk Operations and Streaming
* High-performance bulk document operations with streaming support
*/
import { DocumentError } from '../types';
/**
* Bulk operations manager with streaming capabilities
*/
export class BulkOperationsManager {
storage;
config;
activeStreams = new Map();
constructor(storage, config) {
this.storage = storage;
this.config = config;
}
/**
* Execute bulk write operations
*/
async bulkWrite(collection, operations, options = {}) {
if (operations.length === 0) {
return this.getEmptyBulkResult();
}
const startTime = Date.now();
const result = {
acknowledged: true,
insertedCount: 0,
insertedIds: {},
matchedCount: 0,
modifiedCount: 0,
deletedCount: 0,
upsertedCount: 0,
upsertedIds: {}
};
try {
const batchSize = this.config.bulkWriteBatchSize || 1000;
const parallelism = this.config.bulkWriteParallelism || 4;
if (options.ordered !== false) {
// Ordered execution - process sequentially
await this.executeOrderedBulkOperations(collection, operations, result, batchSize);
}
else {
// Unordered execution - process in parallel batches
await this.executeUnorderedBulkOperations(collection, operations, result, batchSize, parallelism);
}
console.log(`Bulk write completed in ${Date.now() - startTime}ms: ${operations.length} operations`);
return result;
}
catch (error) {
throw new DocumentError(`Bulk write failed: ${error.message}`, 'BULK_WRITE_ERROR');
}
}
/**
* Stream insert large datasets
*/
async streamInsert(collection, documentStream, options = {}) {
const streamId = this.generateStreamId();
const batchSize = options.batchSize || this.config.bulkWriteBatchSize || 1000;
const stats = {
insertedCount: 0,
failedCount: 0,
errors: []
};
try {
let batch = [];
let totalProcessed = 0;
for await (const document of documentStream) {
batch.push(document);
if (batch.length >= batchSize) {
const batchResult = await this.processBatch(collection, batch, options);
this.updateStats(stats, batchResult);
totalProcessed += batch.length;
if (options.onProgress) {
options.onProgress(stats.insertedCount + stats.failedCount, totalProcessed);
}
batch = [];
}
}
// Process remaining documents
if (batch.length > 0) {
const batchResult = await this.processBatch(collection, batch, options);
this.updateStats(stats, batchResult);
}
console.log(`Stream insert completed: ${stats.insertedCount} inserted, ${stats.failedCount} failed`);
return stats;
}
catch (error) {
throw new DocumentError(`Stream insert failed: ${error.message}`, 'STREAM_INSERT_ERROR');
}
finally {
this.activeStreams.delete(streamId);
}
}
/**
* Create a document stream for real-time processing
*/
createDocumentStream(collection, config) {
const streamId = this.generateStreamId();
const stream = new DocumentStream(streamId, collection, config, this.storage);
this.activeStreams.set(streamId, stream);
return stream;
}
/**
* Bulk update with streaming
*/
async bulkUpdate(collection, updates, options = {}) {
const batchSize = options.batchSize || 100;
const parallelism = options.parallelism || 4;
const result = {
matchedCount: 0,
modifiedCount: 0,
upsertedCount: 0,
upsertedIds: []
};
try {
if (options.ordered !== false) {
// Sequential processing
for (let i = 0; i < updates.length; i += batchSize) {
const batch = updates.slice(i, i + batchSize);
const batchResult = await this.processBulkUpdateBatch(collection, batch);
this.mergeBulkUpdateResult(result, batchResult);
}
}
else {
// Parallel processing
const batches = this.createBatches(updates, batchSize);
const batchPromises = batches.map(batch => this.processBulkUpdateBatch(collection, batch));
// Process in chunks to limit concurrency
for (let i = 0; i < batchPromises.length; i += parallelism) {
const chunk = batchPromises.slice(i, i + parallelism);
const chunkResults = await Promise.all(chunk);
for (const batchResult of chunkResults) {
this.mergeBulkUpdateResult(result, batchResult);
}
}
}
return result;
}
catch (error) {
throw new DocumentError(`Bulk update failed: ${error.message}`, 'BULK_UPDATE_ERROR');
}
}
/**
* Bulk delete with streaming
*/
async bulkDelete(collection, filters, options = {}) {
const batchSize = options.batchSize || 100;
const parallelism = options.parallelism || 4;
let totalDeleted = 0;
try {
if (options.ordered !== false) {
// Sequential processing
for (let i = 0; i < filters.length; i += batchSize) {
const batch = filters.slice(i, i + batchSize);
const batchDeleted = await this.processBulkDeleteBatch(collection, batch);
totalDeleted += batchDeleted;
}
}
else {
// Parallel processing
const batches = this.createBatches(filters, batchSize);
const batchPromises = batches.map(batch => this.processBulkDeleteBatch(collection, batch));
// Process in chunks to limit concurrency
for (let i = 0; i < batchPromises.length; i += parallelism) {
const chunk = batchPromises.slice(i, i + parallelism);
const chunkResults = await Promise.all(chunk);
totalDeleted += chunkResults.reduce((sum, count) => sum + count, 0);
}
}
return { deletedCount: totalDeleted };
}
catch (error) {
throw new DocumentError(`Bulk delete failed: ${error.message}`, 'BULK_DELETE_ERROR');
}
}
/**
* Parallel document processing with worker-like pattern
*/
async parallelProcess(collection, filter, processor, options = {}) {
const batchSize = options.batchSize || 100;
const parallelism = options.parallelism || 4;
try {
// Get total count for progress tracking
const totalCount = await this.storage.countDocuments(collection, filter);
let processedCount = 0;
const results = [];
// Process documents in batches
let skip = 0;
while (skip < totalCount) {
const batch = await this.storage.find(collection, filter, {
skip,
limit: batchSize * parallelism
});
if (batch.length === 0)
break;
// Process batch in parallel
const batchPromises = batch.map(async (doc) => {
try {
const result = await processor(doc);
processedCount++;
if (options.onProgress) {
options.onProgress(processedCount, totalCount);
}
return result;
}
catch (error) {
if (options.onError) {
options.onError(doc, error);
}
throw error;
}
});
// Limit concurrency
const batchResults = await this.limitConcurrency(batchPromises, parallelism);
results.push(...batchResults);
skip += batch.length;
}
return results;
}
catch (error) {
throw new DocumentError(`Parallel processing failed: ${error.message}`, 'PARALLEL_PROCESS_ERROR');
}
}
/**
* Get active stream statistics
*/
getActiveStreams() {
return Array.from(this.activeStreams.values()).map(stream => ({
id: stream.id,
collection: stream.collection,
documentsProcessed: stream.stats.documentsProcessed,
bytesProcessed: stream.stats.bytesProcessed,
startTime: stream.stats.startTime,
isActive: stream.isActive
}));
}
/**
* Stop all active streams
*/
async stopAllStreams() {
const stopPromises = Array.from(this.activeStreams.values()).map(stream => stream.stop());
await Promise.all(stopPromises);
this.activeStreams.clear();
}
// ===============================
// Private Methods
// ===============================
async executeOrderedBulkOperations(collection, operations, result, batchSize) {
for (let i = 0; i < operations.length; i += batchSize) {
const batch = operations.slice(i, i + batchSize);
for (let j = 0; j < batch.length; j++) {
const operation = batch[j];
const operationIndex = i + j;
try {
await this.executeOperation(collection, operation, operationIndex, result);
}
catch (error) {
// In ordered mode, stop on first error
throw new DocumentError(`Operation ${operationIndex} failed: ${error.message}`, 'BULK_OPERATION_ERROR');
}
}
}
}
async executeUnorderedBulkOperations(collection, operations, result, batchSize, parallelism) {
const batches = this.createBatches(operations, batchSize);
// Process batches with limited parallelism
for (let i = 0; i < batches.length; i += parallelism) {
const batchChunk = batches.slice(i, i + parallelism);
const batchPromises = batchChunk.map(async (batch, batchIndex) => {
const batchStartIndex = (i + batchIndex) * batchSize;
for (let j = 0; j < batch.length; j++) {
const operation = batch[j];
const operationIndex = batchStartIndex + j;
try {
await this.executeOperation(collection, operation, operationIndex, result);
}
catch (error) {
// In unordered mode, continue on errors
console.warn(`Operation ${operationIndex} failed:`, error.message);
}
}
});
await Promise.all(batchPromises);
}
}
async executeOperation(collection, operation, operationIndex, result) {
if (operation.insertOne) {
const insertResult = await this.storage.insertOne(collection, operation.insertOne.document);
result.insertedCount++;
result.insertedIds[operationIndex] = insertResult.insertedId;
}
else if (operation.updateOne) {
const updateResult = await this.storage.updateOne(collection, operation.updateOne.filter, operation.updateOne.update, {
upsert: operation.updateOne.upsert,
arrayFilters: operation.updateOne.arrayFilters
});
result.matchedCount += updateResult.matchedCount;
result.modifiedCount += updateResult.modifiedCount;
if (updateResult.upsertedId) {
result.upsertedCount++;
result.upsertedIds[operationIndex] = updateResult.upsertedId;
}
}
else if (operation.updateMany) {
// For updateMany, we need to implement this in DocumentStorage
const updateResult = await this.storage.updateOne(collection, operation.updateMany.filter, operation.updateMany.update, {
upsert: operation.updateMany.upsert,
arrayFilters: operation.updateMany.arrayFilters,
multi: true
});
result.matchedCount += updateResult.matchedCount;
result.modifiedCount += updateResult.modifiedCount;
if (updateResult.upsertedId) {
result.upsertedCount++;
result.upsertedIds[operationIndex] = updateResult.upsertedId;
}
}
else if (operation.replaceOne) {
// Replace is similar to update but replaces entire document
const updateResult = await this.storage.updateOne(collection, operation.replaceOne.filter, operation.replaceOne.replacement, { upsert: operation.replaceOne.upsert });
result.matchedCount += updateResult.matchedCount;
result.modifiedCount += updateResult.modifiedCount;
if (updateResult.upsertedId) {
result.upsertedCount++;
result.upsertedIds[operationIndex] = updateResult.upsertedId;
}
}
else if (operation.deleteOne) {
const deleteResult = await this.storage.deleteOne(collection, operation.deleteOne.filter);
result.deletedCount += deleteResult.deletedCount;
}
else if (operation.deleteMany) {
const deleteResult = await this.storage.deleteMany(collection, operation.deleteMany.filter);
result.deletedCount += deleteResult.deletedCount;
}
}
async processBatch(collection, batch, options) {
const batchResult = {
insertedCount: 0,
failedCount: 0,
errors: []
};
if (options.ordered !== false) {
// Sequential insertion
for (const document of batch) {
try {
await this.storage.insertOne(collection, document);
batchResult.insertedCount++;
}
catch (error) {
batchResult.failedCount++;
batchResult.errors.push({ document, error: error.message });
if (options.onError) {
options.onError(error, document);
}
}
}
}
else {
// Parallel insertion using insertMany
try {
const insertResult = await this.storage.insertMany(collection, batch);
batchResult.insertedCount = insertResult.insertedCount;
}
catch (error) {
// Fall back to individual inserts to identify specific failures
for (const document of batch) {
try {
await this.storage.insertOne(collection, document);
batchResult.insertedCount++;
}
catch (docError) {
batchResult.failedCount++;
batchResult.errors.push({ document, error: docError.message });
if (options.onError) {
options.onError(docError, document);
}
}
}
}
}
return batchResult;
}
async processBulkUpdateBatch(collection, updates) {
const result = {
matchedCount: 0,
modifiedCount: 0,
upsertedCount: 0,
upsertedIds: []
};
for (const update of updates) {
try {
const updateResult = await this.storage.updateOne(collection, update.filter, update.update, { upsert: update.upsert, multi: update.multi });
result.matchedCount += updateResult.matchedCount;
result.modifiedCount += updateResult.modifiedCount;
if (updateResult.upsertedId) {
result.upsertedCount++;
result.upsertedIds.push(updateResult.upsertedId);
}
}
catch (error) {
console.warn('Update operation failed:', error.message);
}
}
return result;
}
async processBulkDeleteBatch(collection, filters) {
let deletedCount = 0;
for (const filter of filters) {
try {
const deleteResult = await this.storage.deleteMany(collection, filter);
deletedCount += deleteResult.deletedCount;
}
catch (error) {
console.warn('Delete operation failed:', error.message);
}
}
return deletedCount;
}
updateStats(stats, batchResult) {
stats.insertedCount += batchResult.insertedCount;
stats.failedCount += batchResult.failedCount;
stats.errors.push(...batchResult.errors);
}
mergeBulkUpdateResult(target, source) {
target.matchedCount += source.matchedCount;
target.modifiedCount += source.modifiedCount;
target.upsertedCount += source.upsertedCount;
target.upsertedIds.push(...source.upsertedIds);
}
createBatches(items, batchSize) {
const batches = [];
for (let i = 0; i < items.length; i += batchSize) {
batches.push(items.slice(i, i + batchSize));
}
return batches;
}
async limitConcurrency(promises, limit) {
const results = [];
for (let i = 0; i < promises.length; i += limit) {
const chunk = promises.slice(i, i + limit);
const chunkResults = await Promise.all(chunk);
results.push(...chunkResults);
}
return results;
}
getEmptyBulkResult() {
return {
acknowledged: true,
insertedCount: 0,
insertedIds: {},
matchedCount: 0,
modifiedCount: 0,
deletedCount: 0,
upsertedCount: 0,
upsertedIds: {}
};
}
generateStreamId() {
return `stream_${Date.now()}_${Math.random().toString(36).substring(2, 15)}`;
}
}
/**
* Document stream for real-time processing
*/
export class DocumentStream {
id;
collection;
config;
storage;
buffer = [];
flushTimer = null;
stats = {
documentsProcessed: 0,
bytesProcessed: 0,
startTime: new Date(),
lastFlush: new Date()
};
_isActive = true;
constructor(id, collection, config, storage) {
this.id = id;
this.collection = collection;
this.config = config;
this.storage = storage;
this.startFlushTimer();
}
get isActive() {
return this._isActive;
}
/**
* Add document to stream
*/
async write(document) {
if (!this._isActive) {
throw new DocumentError('Stream is not active', 'STREAM_INACTIVE');
}
// Apply transformation if configured
const processedDoc = this.config.transform ? this.config.transform(document) : document;
this.buffer.push(processedDoc);
this.stats.bytesProcessed += JSON.stringify(processedDoc).length;
if (this.buffer.length >= this.config.batchSize) {
await this.flush();
}
}
/**
* Flush buffered documents
*/
async flush() {
if (this.buffer.length === 0) {
return;
}
try {
const batch = [...this.buffer];
this.buffer = [];
const insertResult = await this.storage.insertMany(this.collection, batch);
this.stats.documentsProcessed += insertResult.insertedCount;
this.stats.lastFlush = new Date();
}
catch (error) {
if (this.config.errorHandler) {
this.config.errorHandler(error, this.buffer);
}
else {
throw error;
}
}
}
/**
* Stop the stream and flush remaining documents
*/
async stop() {
this._isActive = false;
if (this.flushTimer) {
clearInterval(this.flushTimer);
this.flushTimer = null;
}
await this.flush();
}
startFlushTimer() {
this.flushTimer = setInterval(async () => {
try {
await this.flush();
}
catch (error) {
if (this.config.errorHandler) {
this.config.errorHandler(error, []);
}
else {
console.error('Stream flush error:', error);
}
}
}, this.config.flushInterval);
}
}
//# sourceMappingURL=bulk-operations.js.map