UNPKG

@andrejs1979/document

Version:

MongoDB-compatible document database for NoSQL

555 lines 21.5 kB
/** * NoSQL - Bulk Operations and Streaming * High-performance bulk document operations with streaming support */ import { DocumentError } from '../types'; /** * Bulk operations manager with streaming capabilities */ export class BulkOperationsManager { storage; config; activeStreams = new Map(); constructor(storage, config) { this.storage = storage; this.config = config; } /** * Execute bulk write operations */ async bulkWrite(collection, operations, options = {}) { if (operations.length === 0) { return this.getEmptyBulkResult(); } const startTime = Date.now(); const result = { acknowledged: true, insertedCount: 0, insertedIds: {}, matchedCount: 0, modifiedCount: 0, deletedCount: 0, upsertedCount: 0, upsertedIds: {} }; try { const batchSize = this.config.bulkWriteBatchSize || 1000; const parallelism = this.config.bulkWriteParallelism || 4; if (options.ordered !== false) { // Ordered execution - process sequentially await this.executeOrderedBulkOperations(collection, operations, result, batchSize); } else { // Unordered execution - process in parallel batches await this.executeUnorderedBulkOperations(collection, operations, result, batchSize, parallelism); } console.log(`Bulk write completed in ${Date.now() - startTime}ms: ${operations.length} operations`); return result; } catch (error) { throw new DocumentError(`Bulk write failed: ${error.message}`, 'BULK_WRITE_ERROR'); } } /** * Stream insert large datasets */ async streamInsert(collection, documentStream, options = {}) { const streamId = this.generateStreamId(); const batchSize = options.batchSize || this.config.bulkWriteBatchSize || 1000; const stats = { insertedCount: 0, failedCount: 0, errors: [] }; try { let batch = []; let totalProcessed = 0; for await (const document of documentStream) { batch.push(document); if (batch.length >= batchSize) { const batchResult = await this.processBatch(collection, batch, options); this.updateStats(stats, batchResult); totalProcessed += batch.length; if (options.onProgress) { options.onProgress(stats.insertedCount + stats.failedCount, totalProcessed); } batch = []; } } // Process remaining documents if (batch.length > 0) { const batchResult = await this.processBatch(collection, batch, options); this.updateStats(stats, batchResult); } console.log(`Stream insert completed: ${stats.insertedCount} inserted, ${stats.failedCount} failed`); return stats; } catch (error) { throw new DocumentError(`Stream insert failed: ${error.message}`, 'STREAM_INSERT_ERROR'); } finally { this.activeStreams.delete(streamId); } } /** * Create a document stream for real-time processing */ createDocumentStream(collection, config) { const streamId = this.generateStreamId(); const stream = new DocumentStream(streamId, collection, config, this.storage); this.activeStreams.set(streamId, stream); return stream; } /** * Bulk update with streaming */ async bulkUpdate(collection, updates, options = {}) { const batchSize = options.batchSize || 100; const parallelism = options.parallelism || 4; const result = { matchedCount: 0, modifiedCount: 0, upsertedCount: 0, upsertedIds: [] }; try { if (options.ordered !== false) { // Sequential processing for (let i = 0; i < updates.length; i += batchSize) { const batch = updates.slice(i, i + batchSize); const batchResult = await this.processBulkUpdateBatch(collection, batch); this.mergeBulkUpdateResult(result, batchResult); } } else { // Parallel processing const batches = this.createBatches(updates, batchSize); const batchPromises = batches.map(batch => this.processBulkUpdateBatch(collection, batch)); // Process in chunks to limit concurrency for (let i = 0; i < batchPromises.length; i += parallelism) { const chunk = batchPromises.slice(i, i + parallelism); const chunkResults = await Promise.all(chunk); for (const batchResult of chunkResults) { this.mergeBulkUpdateResult(result, batchResult); } } } return result; } catch (error) { throw new DocumentError(`Bulk update failed: ${error.message}`, 'BULK_UPDATE_ERROR'); } } /** * Bulk delete with streaming */ async bulkDelete(collection, filters, options = {}) { const batchSize = options.batchSize || 100; const parallelism = options.parallelism || 4; let totalDeleted = 0; try { if (options.ordered !== false) { // Sequential processing for (let i = 0; i < filters.length; i += batchSize) { const batch = filters.slice(i, i + batchSize); const batchDeleted = await this.processBulkDeleteBatch(collection, batch); totalDeleted += batchDeleted; } } else { // Parallel processing const batches = this.createBatches(filters, batchSize); const batchPromises = batches.map(batch => this.processBulkDeleteBatch(collection, batch)); // Process in chunks to limit concurrency for (let i = 0; i < batchPromises.length; i += parallelism) { const chunk = batchPromises.slice(i, i + parallelism); const chunkResults = await Promise.all(chunk); totalDeleted += chunkResults.reduce((sum, count) => sum + count, 0); } } return { deletedCount: totalDeleted }; } catch (error) { throw new DocumentError(`Bulk delete failed: ${error.message}`, 'BULK_DELETE_ERROR'); } } /** * Parallel document processing with worker-like pattern */ async parallelProcess(collection, filter, processor, options = {}) { const batchSize = options.batchSize || 100; const parallelism = options.parallelism || 4; try { // Get total count for progress tracking const totalCount = await this.storage.countDocuments(collection, filter); let processedCount = 0; const results = []; // Process documents in batches let skip = 0; while (skip < totalCount) { const batch = await this.storage.find(collection, filter, { skip, limit: batchSize * parallelism }); if (batch.length === 0) break; // Process batch in parallel const batchPromises = batch.map(async (doc) => { try { const result = await processor(doc); processedCount++; if (options.onProgress) { options.onProgress(processedCount, totalCount); } return result; } catch (error) { if (options.onError) { options.onError(doc, error); } throw error; } }); // Limit concurrency const batchResults = await this.limitConcurrency(batchPromises, parallelism); results.push(...batchResults); skip += batch.length; } return results; } catch (error) { throw new DocumentError(`Parallel processing failed: ${error.message}`, 'PARALLEL_PROCESS_ERROR'); } } /** * Get active stream statistics */ getActiveStreams() { return Array.from(this.activeStreams.values()).map(stream => ({ id: stream.id, collection: stream.collection, documentsProcessed: stream.stats.documentsProcessed, bytesProcessed: stream.stats.bytesProcessed, startTime: stream.stats.startTime, isActive: stream.isActive })); } /** * Stop all active streams */ async stopAllStreams() { const stopPromises = Array.from(this.activeStreams.values()).map(stream => stream.stop()); await Promise.all(stopPromises); this.activeStreams.clear(); } // =============================== // Private Methods // =============================== async executeOrderedBulkOperations(collection, operations, result, batchSize) { for (let i = 0; i < operations.length; i += batchSize) { const batch = operations.slice(i, i + batchSize); for (let j = 0; j < batch.length; j++) { const operation = batch[j]; const operationIndex = i + j; try { await this.executeOperation(collection, operation, operationIndex, result); } catch (error) { // In ordered mode, stop on first error throw new DocumentError(`Operation ${operationIndex} failed: ${error.message}`, 'BULK_OPERATION_ERROR'); } } } } async executeUnorderedBulkOperations(collection, operations, result, batchSize, parallelism) { const batches = this.createBatches(operations, batchSize); // Process batches with limited parallelism for (let i = 0; i < batches.length; i += parallelism) { const batchChunk = batches.slice(i, i + parallelism); const batchPromises = batchChunk.map(async (batch, batchIndex) => { const batchStartIndex = (i + batchIndex) * batchSize; for (let j = 0; j < batch.length; j++) { const operation = batch[j]; const operationIndex = batchStartIndex + j; try { await this.executeOperation(collection, operation, operationIndex, result); } catch (error) { // In unordered mode, continue on errors console.warn(`Operation ${operationIndex} failed:`, error.message); } } }); await Promise.all(batchPromises); } } async executeOperation(collection, operation, operationIndex, result) { if (operation.insertOne) { const insertResult = await this.storage.insertOne(collection, operation.insertOne.document); result.insertedCount++; result.insertedIds[operationIndex] = insertResult.insertedId; } else if (operation.updateOne) { const updateResult = await this.storage.updateOne(collection, operation.updateOne.filter, operation.updateOne.update, { upsert: operation.updateOne.upsert, arrayFilters: operation.updateOne.arrayFilters }); result.matchedCount += updateResult.matchedCount; result.modifiedCount += updateResult.modifiedCount; if (updateResult.upsertedId) { result.upsertedCount++; result.upsertedIds[operationIndex] = updateResult.upsertedId; } } else if (operation.updateMany) { // For updateMany, we need to implement this in DocumentStorage const updateResult = await this.storage.updateOne(collection, operation.updateMany.filter, operation.updateMany.update, { upsert: operation.updateMany.upsert, arrayFilters: operation.updateMany.arrayFilters, multi: true }); result.matchedCount += updateResult.matchedCount; result.modifiedCount += updateResult.modifiedCount; if (updateResult.upsertedId) { result.upsertedCount++; result.upsertedIds[operationIndex] = updateResult.upsertedId; } } else if (operation.replaceOne) { // Replace is similar to update but replaces entire document const updateResult = await this.storage.updateOne(collection, operation.replaceOne.filter, operation.replaceOne.replacement, { upsert: operation.replaceOne.upsert }); result.matchedCount += updateResult.matchedCount; result.modifiedCount += updateResult.modifiedCount; if (updateResult.upsertedId) { result.upsertedCount++; result.upsertedIds[operationIndex] = updateResult.upsertedId; } } else if (operation.deleteOne) { const deleteResult = await this.storage.deleteOne(collection, operation.deleteOne.filter); result.deletedCount += deleteResult.deletedCount; } else if (operation.deleteMany) { const deleteResult = await this.storage.deleteMany(collection, operation.deleteMany.filter); result.deletedCount += deleteResult.deletedCount; } } async processBatch(collection, batch, options) { const batchResult = { insertedCount: 0, failedCount: 0, errors: [] }; if (options.ordered !== false) { // Sequential insertion for (const document of batch) { try { await this.storage.insertOne(collection, document); batchResult.insertedCount++; } catch (error) { batchResult.failedCount++; batchResult.errors.push({ document, error: error.message }); if (options.onError) { options.onError(error, document); } } } } else { // Parallel insertion using insertMany try { const insertResult = await this.storage.insertMany(collection, batch); batchResult.insertedCount = insertResult.insertedCount; } catch (error) { // Fall back to individual inserts to identify specific failures for (const document of batch) { try { await this.storage.insertOne(collection, document); batchResult.insertedCount++; } catch (docError) { batchResult.failedCount++; batchResult.errors.push({ document, error: docError.message }); if (options.onError) { options.onError(docError, document); } } } } } return batchResult; } async processBulkUpdateBatch(collection, updates) { const result = { matchedCount: 0, modifiedCount: 0, upsertedCount: 0, upsertedIds: [] }; for (const update of updates) { try { const updateResult = await this.storage.updateOne(collection, update.filter, update.update, { upsert: update.upsert, multi: update.multi }); result.matchedCount += updateResult.matchedCount; result.modifiedCount += updateResult.modifiedCount; if (updateResult.upsertedId) { result.upsertedCount++; result.upsertedIds.push(updateResult.upsertedId); } } catch (error) { console.warn('Update operation failed:', error.message); } } return result; } async processBulkDeleteBatch(collection, filters) { let deletedCount = 0; for (const filter of filters) { try { const deleteResult = await this.storage.deleteMany(collection, filter); deletedCount += deleteResult.deletedCount; } catch (error) { console.warn('Delete operation failed:', error.message); } } return deletedCount; } updateStats(stats, batchResult) { stats.insertedCount += batchResult.insertedCount; stats.failedCount += batchResult.failedCount; stats.errors.push(...batchResult.errors); } mergeBulkUpdateResult(target, source) { target.matchedCount += source.matchedCount; target.modifiedCount += source.modifiedCount; target.upsertedCount += source.upsertedCount; target.upsertedIds.push(...source.upsertedIds); } createBatches(items, batchSize) { const batches = []; for (let i = 0; i < items.length; i += batchSize) { batches.push(items.slice(i, i + batchSize)); } return batches; } async limitConcurrency(promises, limit) { const results = []; for (let i = 0; i < promises.length; i += limit) { const chunk = promises.slice(i, i + limit); const chunkResults = await Promise.all(chunk); results.push(...chunkResults); } return results; } getEmptyBulkResult() { return { acknowledged: true, insertedCount: 0, insertedIds: {}, matchedCount: 0, modifiedCount: 0, deletedCount: 0, upsertedCount: 0, upsertedIds: {} }; } generateStreamId() { return `stream_${Date.now()}_${Math.random().toString(36).substring(2, 15)}`; } } /** * Document stream for real-time processing */ export class DocumentStream { id; collection; config; storage; buffer = []; flushTimer = null; stats = { documentsProcessed: 0, bytesProcessed: 0, startTime: new Date(), lastFlush: new Date() }; _isActive = true; constructor(id, collection, config, storage) { this.id = id; this.collection = collection; this.config = config; this.storage = storage; this.startFlushTimer(); } get isActive() { return this._isActive; } /** * Add document to stream */ async write(document) { if (!this._isActive) { throw new DocumentError('Stream is not active', 'STREAM_INACTIVE'); } // Apply transformation if configured const processedDoc = this.config.transform ? this.config.transform(document) : document; this.buffer.push(processedDoc); this.stats.bytesProcessed += JSON.stringify(processedDoc).length; if (this.buffer.length >= this.config.batchSize) { await this.flush(); } } /** * Flush buffered documents */ async flush() { if (this.buffer.length === 0) { return; } try { const batch = [...this.buffer]; this.buffer = []; const insertResult = await this.storage.insertMany(this.collection, batch); this.stats.documentsProcessed += insertResult.insertedCount; this.stats.lastFlush = new Date(); } catch (error) { if (this.config.errorHandler) { this.config.errorHandler(error, this.buffer); } else { throw error; } } } /** * Stop the stream and flush remaining documents */ async stop() { this._isActive = false; if (this.flushTimer) { clearInterval(this.flushTimer); this.flushTimer = null; } await this.flush(); } startFlushTimer() { this.flushTimer = setInterval(async () => { try { await this.flush(); } catch (error) { if (this.config.errorHandler) { this.config.errorHandler(error, []); } else { console.error('Stream flush error:', error); } } }, this.config.flushInterval); } } //# sourceMappingURL=bulk-operations.js.map