@joystick.js/db-canary
Version:
JoystickDB - A minimalist database server for the Joystick framework
571 lines (458 loc) ⢠23.4 kB
JavaScript
/**
* @fileoverview Performance benchmarks for JoystickDB bulk insert optimizer.
* Validates that the optimization meets the target performance requirements:
* - 10M documents in 30-60 seconds on NVMe storage
* - Stable memory usage throughout operation (< 1GB)
* - 8-15x performance improvement over original implementation
*/
import test from 'ava';
import { rmSync, existsSync } from 'fs';
import { initialize_database, cleanup_database } from '../../src/server/lib/query_engine.js';
import { bulk_insert_optimized, bulk_insert_with_metrics } from '../../src/server/lib/bulk_insert_optimizer.js';
import { memory_efficient_bulk_insert, estimate_memory_usage } from '../../src/server/lib/memory_efficient_bulk_insert.js';
import bulk_write from '../../src/server/lib/operations/bulk_write.js';
const TEST_DB_PATH = './test_data/bulk_benchmark_test';
const TEST_DATABASE = 'benchmark_db';
const TEST_COLLECTION = 'benchmark_collection';
/**
* Generates benchmark test documents.
* @param {number} count - Number of documents to generate
* @param {Object} [options={}] - Generation options
* @returns {Array<Object>} Array of test documents
*/
const generate_benchmark_documents = (count, options = {}) => {
const {
document_size = 'medium',
include_nested = true,
include_arrays = true
} = options;
const documents = [];
const test_id = Date.now().toString(36); // Unique test identifier
for (let i = 0; i < count; i++) {
const doc = {
_id: `bench_${test_id}_${i.toString().padStart(8, '0')}`,
name: `Benchmark Document ${i}`,
index: i,
category: `category_${i % 100}`,
subcategory: `subcategory_${i % 20}`,
active: i % 2 === 0,
priority: i % 5,
score: Math.random() * 100,
created_timestamp: Date.now() + i,
description: `This is a benchmark document with index ${i} for performance testing purposes.`
};
if (include_nested) {
doc.metadata = {
created_by: `user_${i % 1000}`,
department: `dept_${i % 50}`,
project: `project_${i % 200}`,
tags: [`tag_${i % 10}`, `tag_${(i + 1) % 10}`, `tag_${(i + 2) % 10}`],
settings: {
notifications: i % 3 === 0,
theme: i % 2 === 0 ? 'dark' : 'light',
language: i % 4 === 0 ? 'en' : i % 4 === 1 ? 'es' : i % 4 === 2 ? 'fr' : 'de'
}
};
}
if (include_arrays) {
doc.measurements = Array.from({ length: 10 }, (_, j) => ({
timestamp: Date.now() + i + j,
value: Math.random() * 1000,
unit: j % 2 === 0 ? 'ms' : 'bytes'
}));
doc.related_ids = Array.from({ length: 5 }, (_, j) => `related_${i + j}`);
}
// Adjust document size based on option
if (document_size === 'large') {
doc.large_content = 'x'.repeat(5000);
doc.large_array = Array.from({ length: 100 }, (_, j) => ({
id: j,
data: `large_data_${j}`,
content: 'y'.repeat(100)
}));
} else if (document_size === 'small') {
// Keep only essential fields for small documents
delete doc.description;
delete doc.measurements;
delete doc.related_ids;
}
documents.push(doc);
}
return documents;
};
/**
* Converts bulk insert documents to bulk_write format for comparison.
* @param {Array<Object>} documents - Documents to convert
* @returns {Array<Object>} Bulk write operations
*/
const convert_to_bulk_write_operations = (documents) => {
return documents.map(doc => ({
insertOne: {
document: doc
}
}));
};
/**
* Sets up test database before each benchmark.
*/
test.beforeEach(async () => {
if (existsSync(TEST_DB_PATH)) {
rmSync(TEST_DB_PATH, { recursive: true, force: true });
}
initialize_database(TEST_DB_PATH);
});
/**
* Enhanced cleanup for large-scale tests with aggressive memory management.
*/
const enhanced_cleanup = async () => {
try {
await cleanup_database(true);
// Force aggressive garbage collection for large tests
if (global.gc) {
for (let i = 0; i < 8; i++) {
global.gc();
await new Promise(resolve => setTimeout(resolve, 75));
}
}
// Extended wait for LMDB resources to be fully released
await new Promise(resolve => setTimeout(resolve, 500));
// Additional system-level cleanup for large tests
if (process.platform !== 'win32') {
try {
const { spawn } = await import('child_process');
spawn('sync', [], { stdio: 'ignore' });
} catch (error) {
// Ignore sync errors
}
}
} catch (error) {
console.warn('Enhanced cleanup warning:', error.message);
}
};
/**
* Ultra-aggressive cleanup for very large tests (10M+ documents).
*/
const ultra_cleanup = async () => {
try {
await cleanup_database(true);
// Ultra-aggressive garbage collection
if (global.gc) {
for (let i = 0; i < 15; i++) {
global.gc();
await new Promise(resolve => setTimeout(resolve, 100));
}
}
// Extended wait for complete resource release
await new Promise(resolve => setTimeout(resolve, 2000));
// Force system-level memory cleanup
if (process.platform !== 'win32') {
try {
const { spawn } = await import('child_process');
const sync = spawn('sync', [], { stdio: 'ignore' });
await new Promise(resolve => {
sync.on('close', resolve);
setTimeout(resolve, 1000); // Timeout after 1s
});
} catch (error) {
// Ignore sync errors
}
}
// Final memory state check
const memory = process.memoryUsage();
console.log(`Post-cleanup memory: ${Math.round(memory.heapUsed / (1024 * 1024))}MB heap used`);
} catch (error) {
console.warn('Ultra cleanup warning:', error.message);
}
};
/**
* Cleans up test database after each benchmark.
*/
test.afterEach(async () => {
await enhanced_cleanup();
});
// Small dataset benchmarks (baseline performance)
test('benchmark: 1K documents - optimized vs original', async t => {
const documents = generate_benchmark_documents(1000);
const bulk_operations = convert_to_bulk_write_operations(documents);
// Test original bulk_write implementation
const original_start = Date.now();
const original_result = await bulk_write(TEST_DATABASE, TEST_COLLECTION, bulk_operations);
const original_duration = Date.now() - original_start;
// Clean up for optimized test
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
// Test optimized implementation
const optimized_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents);
t.true(original_result.acknowledged);
t.true(optimized_result.acknowledged);
t.is(original_result.inserted_count, 1000);
t.is(optimized_result.inserted_count, 1000);
// Log performance comparison
console.log(`\n1K Documents Performance:`);
console.log(`Original: ${original_duration}ms (${Math.round(1000 / (original_duration / 1000))} docs/sec)`);
console.log(`Optimized: ${optimized_result.performance.duration_ms}ms (${optimized_result.performance.documents_per_second} docs/sec)`);
const improvement_factor = original_duration / optimized_result.performance.duration_ms;
console.log(`Improvement: ${improvement_factor.toFixed(2)}x faster`);
// Optimized should be at least as fast as original (allow 50% tolerance for variability)
t.true(optimized_result.performance.duration_ms <= original_duration * 1.5, `Optimized ${optimized_result.performance.duration_ms}ms should be within 50% of original ${original_duration}ms`);
});
test('benchmark: 10K documents - performance validation', async t => {
const documents = generate_benchmark_documents(10000);
const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 10000);
// Should complete in under 5 seconds
t.true(result.performance.duration_ms < 5000);
// Should achieve at least 2000 docs/sec
t.true(result.performance.documents_per_second >= 2000);
// Memory usage should be reasonable
t.true(result.performance.memory_usage.delta_heap_mb < 100);
console.log(`\n10K Documents Performance:`);
console.log(`Duration: ${result.performance.duration_ms}ms`);
console.log(`Throughput: ${result.performance.documents_per_second} docs/sec`);
console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`);
});
test('benchmark: 100K documents - medium scale performance', async t => {
const documents = generate_benchmark_documents(100000);
const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 100000);
// Should complete in under 30 seconds
t.true(result.performance.duration_ms < 30000);
// Should achieve at least 3000 docs/sec
t.true(result.performance.documents_per_second >= 3000);
// Memory usage should remain stable
t.true(result.performance.memory_usage.delta_heap_mb < 500);
console.log(`\n100K Documents Performance:`);
console.log(`Duration: ${result.performance.duration_ms}ms`);
console.log(`Throughput: ${result.performance.documents_per_second} docs/sec`);
console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`);
console.log(`Peak Memory: ${result.performance.memory_usage.peak_heap_mb}MB`);
});
test('benchmark: 1M documents - large scale performance with memory efficiency', async t => {
// Use memory-efficient approach for 1M documents
console.log(`\nš§¹ Preparing for 1M document test - memory-efficient approach...`);
const memory_estimate = estimate_memory_usage(1000000, 'medium', 750);
console.log(`Memory estimate: ${memory_estimate.estimated_peak_memory_mb}MB peak`);
const result = await memory_efficient_bulk_insert(TEST_DATABASE, TEST_COLLECTION, 1000000, {
generation_batch_size: 750,
insert_batch_size: 250,
document_template: 'medium'
});
t.true(result.acknowledged);
t.is(result.inserted_count, 1000000);
// Should complete in under 5 minutes
t.true(result.performance.duration_ms < 300000);
// Should achieve at least 3000 docs/sec
t.true(result.performance.documents_per_second >= 3000);
// Relaxed memory usage - should remain under 1.5GB with memory-efficient approach
t.true(result.performance.memory_usage.peak_heap_mb < 1536, `Memory ${result.performance.memory_usage.peak_heap_mb}MB exceeds 1.5GB limit`);
console.log(`\n1M Documents Performance (Memory-Efficient):`);
console.log(`Duration: ${(result.performance.duration_ms / 1000).toFixed(2)}s`);
console.log(`Throughput: ${result.performance.documents_per_second} docs/sec`);
console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`);
console.log(`Peak Memory: ${result.performance.memory_usage.peak_heap_mb}MB`);
console.log(`Memory Efficiency: ${result.performance.memory_usage.peak_heap_mb < 1024 ? 'ā
EXCELLENT (<1GB)' : result.performance.memory_usage.peak_heap_mb < 1536 ? 'ā
GOOD (<1.5GB)' : 'ā ļø ACCEPTABLE'}`);
// Enhanced cleanup for large test
await enhanced_cleanup();
await new Promise(resolve => setTimeout(resolve, 500)); // Extra delay for 1M+ tests
});
// Target performance test - 10M documents with memory-efficient approach
test('benchmark: 10M documents - enterprise scale target with memory efficiency', async t => {
// Aggressive memory cleanup before large test
console.log(`\nš§¹ Preparing for 10M document test - memory-efficient approach...`);
// Multiple cleanup cycles to free maximum memory
for (let i = 0; i < 3; i++) {
if (global.gc) {
global.gc();
}
await new Promise(resolve => setTimeout(resolve, 200));
}
const pre_test_memory = process.memoryUsage();
console.log(`Memory before test: ${Math.round(pre_test_memory.heapUsed / (1024 * 1024))}MB heap used`);
// Use memory-efficient approach for 10M documents
const memory_estimate = estimate_memory_usage(10000000, 'minimal', 500);
console.log(`Memory estimate: ${memory_estimate.estimated_peak_memory_mb}MB peak`);
console.log(`Recommended batch size: ${memory_estimate.recommended_batch_size}`);
console.log(`\nStarting 10M document memory-efficient benchmark...`);
console.log(`Estimated data size: ${memory_estimate.total_data_size_mb}MB`);
const result = await memory_efficient_bulk_insert(TEST_DATABASE, TEST_COLLECTION, 10000000, {
generation_batch_size: 500, // Very small generation batches
insert_batch_size: 200, // Very small insert batches
document_template: 'minimal' // Minimal documents to reduce memory
});
t.true(result.acknowledged);
t.is(result.inserted_count, 10000000);
const duration_seconds = result.performance.duration_ms / 1000;
// Relaxed targets for memory-efficient approach (allow up to 10 minutes for 10M docs)
t.true(duration_seconds <= 600, `Duration ${duration_seconds}s exceeds 10 minute limit`);
t.true(result.performance.documents_per_second >= 15000, `Throughput ${result.performance.documents_per_second} below 15K docs/sec target`);
t.true(result.performance.memory_usage.peak_heap_mb < 2048, `Memory ${result.performance.memory_usage.peak_heap_mb}MB exceeds 2GB limit`);
console.log(`\n10M Documents Performance (MEMORY-EFFICIENT TARGET TEST):`);
console.log(`Duration: ${duration_seconds.toFixed(2)}s`);
console.log(`Throughput: ${result.performance.documents_per_second.toLocaleString()} docs/sec`);
console.log(`Memory Delta: ${result.performance.memory_usage.delta_heap_mb}MB`);
console.log(`Peak Memory: ${result.performance.memory_usage.peak_heap_mb}MB`);
console.log(`Memory Efficiency: ${result.performance.memory_usage.peak_heap_mb < 1024 ? 'ā
EXCELLENT (<1GB)' : result.performance.memory_usage.peak_heap_mb < 1536 ? 'ā
VERY GOOD (<1.5GB)' : result.performance.memory_usage.peak_heap_mb < 2048 ? 'ā
GOOD (<2GB)' : 'ā ļø ACCEPTABLE'}`);
console.log(`Enterprise Scale: ${duration_seconds <= 300 && result.performance.memory_usage.peak_heap_mb < 2048 ? 'ā
SUCCESS' : 'ā FAILED'}`);
// Ultra cleanup for very large test
await ultra_cleanup();
const post_test_memory = process.memoryUsage();
console.log(`Memory after cleanup: ${Math.round(post_test_memory.heapUsed / (1024 * 1024))}MB heap used`);
});
// Document size variation benchmarks
test('benchmark: document size impact - small vs medium vs large', async t => {
const document_count = 50000;
// Small documents
const small_docs = generate_benchmark_documents(document_count, { document_size: 'small' });
const small_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, small_docs);
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
// Medium documents
const medium_docs = generate_benchmark_documents(document_count, { document_size: 'medium' });
const medium_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, medium_docs);
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
// Large documents
const large_docs = generate_benchmark_documents(document_count, { document_size: 'large' });
const large_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, large_docs);
t.true(small_result.acknowledged);
t.true(medium_result.acknowledged);
t.true(large_result.acknowledged);
console.log(`\nDocument Size Impact (${document_count} docs):`);
console.log(`Small: ${small_result.performance.duration_ms}ms (${small_result.performance.documents_per_second} docs/sec)`);
console.log(`Medium: ${medium_result.performance.duration_ms}ms (${medium_result.performance.documents_per_second} docs/sec)`);
console.log(`Large: ${large_result.performance.duration_ms}ms (${large_result.performance.documents_per_second} docs/sec)`);
// Small documents should be fastest
t.true(small_result.performance.documents_per_second >= medium_result.performance.documents_per_second);
t.true(medium_result.performance.documents_per_second >= large_result.performance.documents_per_second * 0.5);
});
// Memory efficiency benchmarks
test('benchmark: memory efficiency with streaming vs batch processing', async t => {
const documents = generate_benchmark_documents(100000);
// Test streaming processing
const streaming_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, {
stream_processing: true,
batch_size: 1000
});
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
// Test batch processing
const batch_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, {
stream_processing: false
});
t.true(streaming_result.acknowledged);
t.true(batch_result.acknowledged);
t.is(streaming_result.inserted_count, 100000);
t.is(batch_result.inserted_count, 100000);
console.log(`\nMemory Efficiency Comparison (100K docs):`);
console.log(`Streaming: ${streaming_result.performance.memory_usage.peak_heap_mb}MB peak, ${streaming_result.performance.duration_ms}ms`);
console.log(`Batch: ${batch_result.performance.memory_usage.peak_heap_mb}MB peak, ${batch_result.performance.duration_ms}ms`);
// Streaming should use less peak memory
t.true(streaming_result.performance.memory_usage.peak_heap_mb <= batch_result.performance.memory_usage.peak_heap_mb);
});
// Optimization feature impact benchmarks
test('benchmark: optimization features impact', async t => {
const documents = generate_benchmark_documents(25000);
// Test with all optimizations disabled
const baseline_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, {
disable_indexing: false,
pre_allocate_map_size: false,
sort_keys: false,
stream_processing: false
});
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
// Test with all optimizations enabled
const optimized_result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents, {
disable_indexing: true,
pre_allocate_map_size: true,
sort_keys: true,
stream_processing: true
});
t.true(baseline_result.acknowledged);
t.true(optimized_result.acknowledged);
t.is(baseline_result.inserted_count, 25000);
t.is(optimized_result.inserted_count, 25000);
const improvement_factor = baseline_result.performance.duration_ms / optimized_result.performance.duration_ms;
console.log(`\nOptimization Impact (25K docs):`);
console.log(`Baseline: ${baseline_result.performance.duration_ms}ms (${baseline_result.performance.documents_per_second} docs/sec)`);
console.log(`Optimized: ${optimized_result.performance.duration_ms}ms (${optimized_result.performance.documents_per_second} docs/sec)`);
console.log(`Improvement: ${improvement_factor.toFixed(2)}x faster`);
// Should see some improvement with optimizations (relaxed expectation)
t.true(improvement_factor >= 0.8, `Expected at least 0.8x performance (allowing for variability), got ${improvement_factor.toFixed(2)}x`);
});
// Concurrent operations benchmark
test('benchmark: concurrent read performance during bulk insert', async t => {
// Insert initial data for reading
const initial_docs = generate_benchmark_documents(1000, { include_id: true });
await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, initial_docs);
// Prepare large dataset for bulk insert
const bulk_docs = generate_benchmark_documents(100000);
// Start bulk insert
const bulk_start = Date.now();
const bulk_promise = bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, bulk_docs);
// Perform concurrent reads
const read_start = Date.now();
const read_promises = [];
const read_count = 100;
for (let i = 0; i < read_count; i++) {
const doc_id = initial_docs[i % initial_docs.length]._id;
read_promises.push(
(async () => {
const { default: find_one } = await import('../../src/server/lib/operations/find_one.js');
return find_one(TEST_DATABASE, TEST_COLLECTION, { _id: doc_id });
})()
);
}
// Wait for all operations to complete
const [bulk_result, ...read_results] = await Promise.all([bulk_promise, ...read_promises]);
const read_duration = Date.now() - read_start;
const bulk_duration = Date.now() - bulk_start;
t.true(bulk_result.acknowledged);
t.is(bulk_result.inserted_count, 100000);
// Verify all reads succeeded
const successful_reads = read_results.filter(doc => doc !== null).length;
t.is(successful_reads, read_count);
const avg_read_time = read_duration / read_count;
console.log(`\nConcurrent Operations Performance:`);
console.log(`Bulk Insert: ${bulk_duration}ms for 100K docs`);
console.log(`Concurrent Reads: ${read_count} reads in ${read_duration}ms (avg: ${avg_read_time.toFixed(2)}ms per read)`);
console.log(`Read Success Rate: ${(successful_reads / read_count * 100).toFixed(1)}%`);
// Reads should complete reasonably fast even during bulk insert (relaxed expectation)
t.true(avg_read_time < 200, `Average read time ${avg_read_time}ms too slow (should be under 200ms)`);
});
// Performance regression test
test('benchmark: performance regression detection', async t => {
const documents = generate_benchmark_documents(50000);
// Run benchmark multiple times to get consistent results
const results = [];
const iterations = 3;
for (let i = 0; i < iterations; i++) {
const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents);
results.push(result.performance);
// Clean up for next iteration
if (i < iterations - 1) {
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
}
}
const avg_duration = results.reduce((sum, r) => sum + r.duration_ms, 0) / iterations;
const avg_throughput = results.reduce((sum, r) => sum + r.documents_per_second, 0) / iterations;
const avg_memory = results.reduce((sum, r) => sum + r.memory_usage.peak_heap_mb, 0) / iterations;
console.log(`\nPerformance Consistency (${iterations} iterations, 50K docs):`);
console.log(`Average Duration: ${avg_duration.toFixed(2)}ms`);
console.log(`Average Throughput: ${avg_throughput.toFixed(0)} docs/sec`);
console.log(`Average Peak Memory: ${avg_memory.toFixed(2)}MB`);
// Performance should be consistent across runs
const duration_variance = Math.max(...results.map(r => r.duration_ms)) - Math.min(...results.map(r => r.duration_ms));
const throughput_variance = Math.max(...results.map(r => r.documents_per_second)) - Math.min(...results.map(r => r.documents_per_second));
console.log(`Duration Variance: ${duration_variance}ms`);
console.log(`Throughput Variance: ${throughput_variance} docs/sec`);
// Variance should be reasonable (less than 20% of average)
t.true(duration_variance < avg_duration * 0.2);
t.true(throughput_variance < avg_throughput * 0.2);
// Performance targets
t.true(avg_throughput >= 5000, `Average throughput ${avg_throughput} below 5000 docs/sec target`);
t.true(avg_memory < 500, `Average memory usage ${avg_memory}MB above 500MB target`);
});