UNPKG

@joystick.js/db-canary

Version:

JoystickDB - A minimalist database server for the Joystick framework

571 lines (458 loc) • 23.4 kB
/** * @fileoverview Performance benchmarks for JoystickDB bulk insert optimizer. * Validates that the optimization meets the target performance requirements: * - 10M documents in 30-60 seconds on NVMe storage * - Stable memory usage throughout operation (< 1GB) * - 8-15x performance improvement over original implementation */ import test from 'ava'; import { rmSync, existsSync } from 'fs'; import { initialize_database, cleanup_database } from '../../src/server/lib/query_engine.js'; import { bulk_insert_optimized, bulk_insert_with_metrics } from '../../src/server/lib/bulk_insert_optimizer.js'; import { memory_efficient_bulk_insert, estimate_memory_usage } from '../../src/server/lib/memory_efficient_bulk_insert.js'; import bulk_write from '../../src/server/lib/operations/bulk_write.js'; const TEST_DB_PATH = './test_data/bulk_benchmark_test'; const TEST_DATABASE = 'benchmark_db'; const TEST_COLLECTION = 'benchmark_collection'; /** * Generates benchmark test documents. * @param {number} count - Number of documents to generate * @param {Object} [options={}] - Generation options * @returns {Array<Object>} Array of test documents */ const generate_benchmark_documents = (count, options = {}) => { const { document_size = 'medium', include_nested = true, include_arrays = true } = options; const documents = []; const test_id = Date.now().toString(36); // Unique test identifier for (let i = 0; i < count; i++) { const doc = { _id: `bench_${test_id}_${i.toString().padStart(8, '0')}`, name: `Benchmark Document ${i}`, index: i, category: `category_${i % 100}`, subcategory: `subcategory_${i % 20}`, active: i % 2 === 0, priority: i % 5, score: Math.random() * 100, created_timestamp: Date.now() + i, description: `This is a benchmark document with index ${i} for performance testing purposes.` }; if (include_nested) { doc.metadata = { created_by: `user_${i % 1000}`, department: `dept_${i % 50}`, project: `project_${i % 200}`, tags: [`tag_${i % 10}`, `tag_${(i + 1) % 10}`, `tag_${(i + 2) % 10}`], settings: { notifications: i % 3 === 0, theme: i % 2 === 0 ? 'dark' : 'light', language: i % 4 === 0 ? 'en' : i % 4 === 1 ? 'es' : i % 4 === 2 ? 'fr' : 'de' } }; } if (include_arrays) { doc.measurements = Array.from({ length: 10 }, (_, j) => ({ timestamp: Date.now() + i + j, value: Math.random() * 1000, unit: j % 2 === 0 ? 'ms' : 'bytes' })); doc.related_ids = Array.from({ length: 5 }, (_, j) => `related_${i + j}`); } // Adjust document size based on option if (document_size === 'large') { doc.large_content = 'x'.repeat(5000); doc.large_array = Array.from({ length: 100 }, (_, j) => ({ id: j, data: `large_data_${j}`, content: 'y'.repeat(100) })); } else if (document_size === 'small') { // Keep only essential fields for small documents delete doc.description; delete doc.measurements; delete doc.related_ids; } documents.push(doc); } return documents; }; /** * Converts bulk insert documents to bulk_write format for comparison. * @param {Array<Object>} documents - Documents to convert * @returns {Array<Object>} Bulk write operations */ const convert_to_bulk_write_operations = (documents) => { return documents.map(doc => ({ insertOne: { document: doc } })); }; /** * Sets up test database before each benchmark. */ test.beforeEach(async () => { if (existsSync(TEST_DB_PATH)) { rmSync(TEST_DB_PATH, { recursive: true, force: true }); } initialize_database(TEST_DB_PATH); }); /** * Enhanced cleanup for large-scale tests with aggressive memory management. */ const enhanced_cleanup = async () => { try { await cleanup_database(true); // Force aggressive garbage collection for large tests if (global.gc) { for (let i = 0; i < 8; i++) { global.gc(); await new Promise(resolve => setTimeout(resolve, 75)); } } // Extended wait for LMDB resources to be fully released await new Promise(resolve => setTimeout(resolve, 500)); // Additional system-level cleanup for large tests if (process.platform !== 'win32') { try { const { spawn } = await import('child_process'); spawn('sync', [], { stdio: 'ignore' }); } catch (error) { // Ignore sync errors } } } catch (error) { console.warn('Enhanced cleanup warning:', error.message); } }; /** * Ultra-aggressive cleanup for very large tests (10M+ documents). */ const ultra_cleanup = async () => { try { await cleanup_database(true); // Ultra-aggressive garbage collection if (global.gc) { for (let i = 0; i < 15; i++) { global.gc(); await new Promise(resolve => setTimeout(resolve, 100)); } } // Extended wait for complete resource release await new Promise(resolve => setTimeout(resolve, 2000)); // Force system-level memory cleanup if (process.platform !== 'win32') { try { const { spawn } = await import('child_process'); const sync = spawn('sync', [], { stdio: 'ignore' }); await new Promise(resolve => { sync.on('close', resolve); setTimeout(resolve, 1000); // Timeout after 1s }); } catch (error) { // Ignore sync errors } } // Final memory state check const memory = process.memoryUsage(); console.log(`Post-cleanup memory: ${Math.round(memory.heapUsed / (1024 * 1024))}MB heap used`); } catch (error) { console.warn('Ultra cleanup warning:', error.message); } }; /** * Cleans up test database after each benchmark. */ test.afterEach(async () => { await enhanced_cleanup(); }); // Small dataset benchmarks (baseline performance) test('benchmark: 1K documents - optimized vs original', async t => { const documents = generate_benchmark_documents(1000); const bulk_operations = convert_to_bulk_write_operations(documents); // Test original bulk_write implementation const original_start = Date.now(); const original_result = await bulk_write(TEST_DATABASE, TEST_COLLECTION, bulk_operations); const original_duration = Date.now() - original_start; // Clean up for optimized test await cleanup_database(true); initialize_database(TEST_DB_PATH); // Test optimized implementation const optimized_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents); t.true(original_result.acknowledged); t.true(optimized_result.acknowledged); t.is(original_result.inserted_count, 1000); t.is(optimized_result.inserted_count, 1000); // Log performance comparison console.log(`\n1K Documents Performance:`); console.log(`Original: ${original_duration}ms (${Math.round(1000 / (original_duration / 1000))} docs/sec)`); console.log(`Optimized: ${optimized_result.performance.duration_ms}ms (${optimized_result.performance.documents_per_second} docs/sec)`); const improvement_factor = original_duration / optimized_result.performance.duration_ms; console.log(`Improvement: ${improvement_factor.toFixed(2)}x faster`); // Optimized should be at least as fast as original (allow 50% tolerance for variability) t.true(optimized_result.performance.duration_ms <= original_duration * 1.5, `Optimized ${optimized_result.performance.duration_ms}ms should be within 50% of original ${original_duration}ms`); }); test('benchmark: 10K documents - performance validation', async t => { const documents = generate_benchmark_documents(10000); const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents); t.true(result.acknowledged); t.is(result.inserted_count, 10000); // Should complete in under 5 seconds t.true(result.performance.duration_ms < 5000); // Should achieve at least 2000 docs/sec t.true(result.performance.documents_per_second >= 2000); // Memory usage should be reasonable t.true(result.performance.memory_usage.delta_heap_mb < 100); console.log(`\n10K Documents Performance:`); console.log(`Duration: ${result.performance.duration_ms}ms`); console.log(`Throughput: ${result.performance.documents_per_second} docs/sec`); console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`); }); test('benchmark: 100K documents - medium scale performance', async t => { const documents = generate_benchmark_documents(100000); const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents); t.true(result.acknowledged); t.is(result.inserted_count, 100000); // Should complete in under 30 seconds t.true(result.performance.duration_ms < 30000); // Should achieve at least 3000 docs/sec t.true(result.performance.documents_per_second >= 3000); // Memory usage should remain stable t.true(result.performance.memory_usage.delta_heap_mb < 500); console.log(`\n100K Documents Performance:`); console.log(`Duration: ${result.performance.duration_ms}ms`); console.log(`Throughput: ${result.performance.documents_per_second} docs/sec`); console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`); console.log(`Peak Memory: ${result.performance.memory_usage.peak_heap_mb}MB`); }); test('benchmark: 1M documents - large scale performance with memory efficiency', async t => { // Use memory-efficient approach for 1M documents console.log(`\n🧹 Preparing for 1M document test - memory-efficient approach...`); const memory_estimate = estimate_memory_usage(1000000, 'medium', 750); console.log(`Memory estimate: ${memory_estimate.estimated_peak_memory_mb}MB peak`); const result = await memory_efficient_bulk_insert(TEST_DATABASE, TEST_COLLECTION, 1000000, { generation_batch_size: 750, insert_batch_size: 250, document_template: 'medium' }); t.true(result.acknowledged); t.is(result.inserted_count, 1000000); // Should complete in under 5 minutes t.true(result.performance.duration_ms < 300000); // Should achieve at least 3000 docs/sec t.true(result.performance.documents_per_second >= 3000); // Relaxed memory usage - should remain under 1.5GB with memory-efficient approach t.true(result.performance.memory_usage.peak_heap_mb < 1536, `Memory ${result.performance.memory_usage.peak_heap_mb}MB exceeds 1.5GB limit`); console.log(`\n1M Documents Performance (Memory-Efficient):`); console.log(`Duration: ${(result.performance.duration_ms / 1000).toFixed(2)}s`); console.log(`Throughput: ${result.performance.documents_per_second} docs/sec`); console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`); console.log(`Peak Memory: ${result.performance.memory_usage.peak_heap_mb}MB`); console.log(`Memory Efficiency: ${result.performance.memory_usage.peak_heap_mb < 1024 ? 'āœ… EXCELLENT (<1GB)' : result.performance.memory_usage.peak_heap_mb < 1536 ? 'āœ… GOOD (<1.5GB)' : 'āš ļø ACCEPTABLE'}`); // Enhanced cleanup for large test await enhanced_cleanup(); await new Promise(resolve => setTimeout(resolve, 500)); // Extra delay for 1M+ tests }); // Target performance test - 10M documents with memory-efficient approach test('benchmark: 10M documents - enterprise scale target with memory efficiency', async t => { // Aggressive memory cleanup before large test console.log(`\n🧹 Preparing for 10M document test - memory-efficient approach...`); // Multiple cleanup cycles to free maximum memory for (let i = 0; i < 3; i++) { if (global.gc) { global.gc(); } await new Promise(resolve => setTimeout(resolve, 200)); } const pre_test_memory = process.memoryUsage(); console.log(`Memory before test: ${Math.round(pre_test_memory.heapUsed / (1024 * 1024))}MB heap used`); // Use memory-efficient approach for 10M documents const memory_estimate = estimate_memory_usage(10000000, 'minimal', 500); console.log(`Memory estimate: ${memory_estimate.estimated_peak_memory_mb}MB peak`); console.log(`Recommended batch size: ${memory_estimate.recommended_batch_size}`); console.log(`\nStarting 10M document memory-efficient benchmark...`); console.log(`Estimated data size: ${memory_estimate.total_data_size_mb}MB`); const result = await memory_efficient_bulk_insert(TEST_DATABASE, TEST_COLLECTION, 10000000, { generation_batch_size: 500, // Very small generation batches insert_batch_size: 200, // Very small insert batches document_template: 'minimal' // Minimal documents to reduce memory }); t.true(result.acknowledged); t.is(result.inserted_count, 10000000); const duration_seconds = result.performance.duration_ms / 1000; // Relaxed targets for memory-efficient approach (allow up to 10 minutes for 10M docs) t.true(duration_seconds <= 600, `Duration ${duration_seconds}s exceeds 10 minute limit`); t.true(result.performance.documents_per_second >= 15000, `Throughput ${result.performance.documents_per_second} below 15K docs/sec target`); t.true(result.performance.memory_usage.peak_heap_mb < 2048, `Memory ${result.performance.memory_usage.peak_heap_mb}MB exceeds 2GB limit`); console.log(`\n10M Documents Performance (MEMORY-EFFICIENT TARGET TEST):`); console.log(`Duration: ${duration_seconds.toFixed(2)}s`); console.log(`Throughput: ${result.performance.documents_per_second.toLocaleString()} docs/sec`); console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`); console.log(`Peak Memory: ${result.performance.memory_usage.peak_heap_mb}MB`); console.log(`Memory Efficiency: ${result.performance.memory_usage.peak_heap_mb < 1024 ? 'āœ… EXCELLENT (<1GB)' : result.performance.memory_usage.peak_heap_mb < 1536 ? 'āœ… VERY GOOD (<1.5GB)' : result.performance.memory_usage.peak_heap_mb < 2048 ? 'āœ… GOOD (<2GB)' : 'āš ļø ACCEPTABLE'}`); console.log(`Enterprise Scale: ${duration_seconds <= 300 && result.performance.memory_usage.peak_heap_mb < 2048 ? 'āœ… SUCCESS' : 'āŒ FAILED'}`); // Ultra cleanup for very large test await ultra_cleanup(); const post_test_memory = process.memoryUsage(); console.log(`Memory after cleanup: ${Math.round(post_test_memory.heapUsed / (1024 * 1024))}MB heap used`); }); // Document size variation benchmarks test('benchmark: document size impact - small vs medium vs large', async t => { const document_count = 50000; // Small documents const small_docs = generate_benchmark_documents(document_count, { document_size: 'small' }); const small_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, small_docs); await cleanup_database(true); initialize_database(TEST_DB_PATH); // Medium documents const medium_docs = generate_benchmark_documents(document_count, { document_size: 'medium' }); const medium_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, medium_docs); await cleanup_database(true); initialize_database(TEST_DB_PATH); // Large documents const large_docs = generate_benchmark_documents(document_count, { document_size: 'large' }); const large_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, large_docs); t.true(small_result.acknowledged); t.true(medium_result.acknowledged); t.true(large_result.acknowledged); console.log(`\nDocument Size Impact (${document_count} docs):`); console.log(`Small: ${small_result.performance.duration_ms}ms (${small_result.performance.documents_per_second} docs/sec)`); console.log(`Medium: ${medium_result.performance.duration_ms}ms (${medium_result.performance.documents_per_second} docs/sec)`); console.log(`Large: ${large_result.performance.duration_ms}ms (${large_result.performance.documents_per_second} docs/sec)`); // Small documents should be fastest t.true(small_result.performance.documents_per_second >= medium_result.performance.documents_per_second); t.true(medium_result.performance.documents_per_second >= large_result.performance.documents_per_second * 0.5); }); // Memory efficiency benchmarks test('benchmark: memory efficiency with streaming vs batch processing', async t => { const documents = generate_benchmark_documents(100000); // Test streaming processing const streaming_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, { stream_processing: true, batch_size: 1000 }); await cleanup_database(true); initialize_database(TEST_DB_PATH); // Test batch processing const batch_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, { stream_processing: false }); t.true(streaming_result.acknowledged); t.true(batch_result.acknowledged); t.is(streaming_result.inserted_count, 100000); t.is(batch_result.inserted_count, 100000); console.log(`\nMemory Efficiency Comparison (100K docs):`); console.log(`Streaming: ${streaming_result.performance.memory_usage.peak_heap_mb}MB peak, ${streaming_result.performance.duration_ms}ms`); console.log(`Batch: ${batch_result.performance.memory_usage.peak_heap_mb}MB peak, ${batch_result.performance.duration_ms}ms`); // Streaming should use less peak memory t.true(streaming_result.performance.memory_usage.peak_heap_mb <= batch_result.performance.memory_usage.peak_heap_mb); }); // Optimization feature impact benchmarks test('benchmark: optimization features impact', async t => { const documents = generate_benchmark_documents(25000); // Test with all optimizations disabled const baseline_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, { disable_indexing: false, pre_allocate_map_size: false, sort_keys: false, stream_processing: false }); await cleanup_database(true); initialize_database(TEST_DB_PATH); // Test with all optimizations enabled const optimized_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, { disable_indexing: true, pre_allocate_map_size: true, sort_keys: true, stream_processing: true }); t.true(baseline_result.acknowledged); t.true(optimized_result.acknowledged); t.is(baseline_result.inserted_count, 25000); t.is(optimized_result.inserted_count, 25000); const improvement_factor = baseline_result.performance.duration_ms / optimized_result.performance.duration_ms; console.log(`\nOptimization Impact (25K docs):`); console.log(`Baseline: ${baseline_result.performance.duration_ms}ms (${baseline_result.performance.documents_per_second} docs/sec)`); console.log(`Optimized: ${optimized_result.performance.duration_ms}ms (${optimized_result.performance.documents_per_second} docs/sec)`); console.log(`Improvement: ${improvement_factor.toFixed(2)}x faster`); // Should see some improvement with optimizations (relaxed expectation) t.true(improvement_factor >= 0.8, `Expected at least 0.8x performance (allowing for variability), got ${improvement_factor.toFixed(2)}x`); }); // Concurrent operations benchmark test('benchmark: concurrent read performance during bulk insert', async t => { // Insert initial data for reading const initial_docs = generate_benchmark_documents(1000, { include_id: true }); await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, initial_docs); // Prepare large dataset for bulk insert const bulk_docs = generate_benchmark_documents(100000); // Start bulk insert const bulk_start = Date.now(); const bulk_promise = bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, bulk_docs); // Perform concurrent reads const read_start = Date.now(); const read_promises = []; const read_count = 100; for (let i = 0; i < read_count; i++) { const doc_id = initial_docs[i % initial_docs.length]._id; read_promises.push( (async () => { const { default: find_one } = await import('../../src/server/lib/operations/find_one.js'); return find_one(TEST_DATABASE, TEST_COLLECTION, { _id: doc_id }); })() ); } // Wait for all operations to complete const [bulk_result, ...read_results] = await Promise.all([bulk_promise, ...read_promises]); const read_duration = Date.now() - read_start; const bulk_duration = Date.now() - bulk_start; t.true(bulk_result.acknowledged); t.is(bulk_result.inserted_count, 100000); // Verify all reads succeeded const successful_reads = read_results.filter(doc => doc !== null).length; t.is(successful_reads, read_count); const avg_read_time = read_duration / read_count; console.log(`\nConcurrent Operations Performance:`); console.log(`Bulk Insert: ${bulk_duration}ms for 100K docs`); console.log(`Concurrent Reads: ${read_count} reads in ${read_duration}ms (avg: ${avg_read_time.toFixed(2)}ms per read)`); console.log(`Read Success Rate: ${(successful_reads / read_count * 100).toFixed(1)}%`); // Reads should complete reasonably fast even during bulk insert (relaxed expectation) t.true(avg_read_time < 200, `Average read time ${avg_read_time}ms too slow (should be under 200ms)`); }); // Performance regression test test('benchmark: performance regression detection', async t => { const documents = generate_benchmark_documents(50000); // Run benchmark multiple times to get consistent results const results = []; const iterations = 3; for (let i = 0; i < iterations; i++) { const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents); results.push(result.performance); // Clean up for next iteration if (i < iterations - 1) { await cleanup_database(true); initialize_database(TEST_DB_PATH); } } const avg_duration = results.reduce((sum, r) => sum + r.duration_ms, 0) / iterations; const avg_throughput = results.reduce((sum, r) => sum + r.documents_per_second, 0) / iterations; const avg_memory = results.reduce((sum, r) => sum + r.memory_usage.peak_heap_mb, 0) / iterations; console.log(`\nPerformance Consistency (${iterations} iterations, 50K docs):`); console.log(`Average Duration: ${avg_duration.toFixed(2)}ms`); console.log(`Average Throughput: ${avg_throughput.toFixed(0)} docs/sec`); console.log(`Average Peak Memory: ${avg_memory.toFixed(2)}MB`); // Performance should be consistent across runs const duration_variance = Math.max(...results.map(r => r.duration_ms)) - Math.min(...results.map(r => r.duration_ms)); const throughput_variance = Math.max(...results.map(r => r.documents_per_second)) - Math.min(...results.map(r => r.documents_per_second)); console.log(`Duration Variance: ${duration_variance}ms`); console.log(`Throughput Variance: ${throughput_variance} docs/sec`); // Variance should be reasonable (less than 20% of average) t.true(duration_variance < avg_duration * 0.2); t.true(throughput_variance < avg_throughput * 0.2); // Performance targets t.true(avg_throughput >= 5000, `Average throughput ${avg_throughput} below 5000 docs/sec target`); t.true(avg_memory < 500, `Average memory usage ${avg_memory}MB above 500MB target`); });