UNPKG

@joystick.js/db-canary

Version:

JoystickDB - A minimalist database server for the Joystick framework

524 lines (421 loc) 17.2 kB
/** * @fileoverview Comprehensive tests for JoystickDB bulk insert optimizer. * Tests performance optimizations, safety guarantees, and concurrent read capabilities. */ import test from 'ava'; import { rmSync, existsSync } from 'fs'; import { initialize_database, cleanup_database } from '../../../src/server/lib/query_engine.js'; import { bulk_insert_optimized, bulk_insert_with_metrics, non_blocking_bulk_insert, calculate_average_document_size, calculate_bulk_map_size, create_size_based_batches, sort_documents_by_key, pre_encode_documents } from '../../../src/server/lib/bulk_insert_optimizer.js'; import find_one from '../../../src/server/lib/operations/find_one.js'; import find from '../../../src/server/lib/operations/find.js'; const TEST_DB_PATH = './test_data/bulk_optimizer_test'; const TEST_DATABASE = 'test_db'; const TEST_COLLECTION = 'test_collection'; /** * Generates test documents for bulk insert testing. * @param {number} count - Number of documents to generate * @param {Object} [options={}] - Generation options * @returns {Array<Object>} Array of test documents */ const generate_test_documents = (count, options = {}) => { const { include_id = false, base_size = 100, variable_size = false } = options; const documents = []; for (let i = 0; i < count; i++) { const doc = { name: `Test Document ${i}`, index: i, category: `category_${i % 10}`, active: i % 2 === 0, metadata: { created_by: 'test_user', tags: [`tag_${i % 5}`, `tag_${(i + 1) % 5}`], priority: i % 3 } }; if (include_id) { doc._id = `test_doc_${i.toString().padStart(6, '0')}`; } // Add variable size content for testing if (variable_size) { const extra_content_size = Math.floor(Math.random() * base_size); doc.content = 'x'.repeat(extra_content_size); } documents.push(doc); } return documents; }; /** * Sets up test database before each test. */ test.beforeEach(async () => { // Clean up any existing test database if (existsSync(TEST_DB_PATH)) { rmSync(TEST_DB_PATH, { recursive: true, force: true }); } // Initialize fresh database initialize_database(TEST_DB_PATH); }); /** * Cleans up test database after each test. */ test.afterEach(async () => { await cleanup_database(true); }); // Utility function tests test('calculate_average_document_size should calculate correct average', t => { const documents = [ { name: 'doc1', data: 'small' }, { name: 'doc2', data: 'medium_sized_content' }, { name: 'doc3', data: 'large_content_with_more_data' } ]; const avg_size = calculate_average_document_size(documents); t.true(avg_size > 0); t.true(typeof avg_size === 'number'); }); test('calculate_bulk_map_size should return appropriate map size', t => { const document_count = 100000; const avg_document_size = 500; const map_size = calculate_bulk_map_size(document_count, avg_document_size); // Should be at least 2x the estimated size const estimated_size = document_count * avg_document_size; t.true(map_size >= estimated_size * 2); // Should be at least 10GB minimum const minimum_size = 1024 * 1024 * 1024 * 10; t.true(map_size >= minimum_size); }); test('create_size_based_batches should create appropriate batches', t => { const documents = generate_test_documents(1000); const target_size = 50 * 1024; // 50KB target const batches = create_size_based_batches(documents, target_size); t.true(batches.length > 0); t.true(Array.isArray(batches)); // Verify all documents are included const total_docs = batches.reduce((sum, batch) => sum + batch.length, 0); t.is(total_docs, documents.length); }); test('sort_documents_by_key should sort documents correctly', t => { const documents = generate_test_documents(100); const sorted_docs = sort_documents_by_key(documents, TEST_DATABASE, TEST_COLLECTION); t.is(sorted_docs.length, documents.length); // Verify all documents have IDs sorted_docs.forEach(doc => { t.truthy(doc._id); }); // Verify sorting (keys should be in ascending order) for (let i = 1; i < sorted_docs.length; i++) { const key_a = `${TEST_DATABASE}:${TEST_COLLECTION}:${sorted_docs[i-1]._id}`; const key_b = `${TEST_DATABASE}:${TEST_COLLECTION}:${sorted_docs[i]._id}`; t.true(key_a.localeCompare(key_b) <= 0); } }); test('pre_encode_documents should encode documents correctly', t => { const documents = generate_test_documents(10, { include_id: true }); const encoded_docs = pre_encode_documents(documents, TEST_DATABASE, TEST_COLLECTION); t.is(encoded_docs.length, documents.length); encoded_docs.forEach((encoded, index) => { t.truthy(encoded.key); t.truthy(encoded.value); t.truthy(encoded.document_id); t.is(encoded.document_id, documents[index]._id); // Verify key format t.true(encoded.key.startsWith(`${TEST_DATABASE}:${TEST_COLLECTION}:`)); // Verify value is valid JSON const parsed = JSON.parse(encoded.value); t.truthy(parsed._created_at); t.truthy(parsed._updated_at); }); }); // Basic bulk insert tests test('bulk_insert_optimized should insert small dataset successfully', async t => { const documents = generate_test_documents(100); const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents); t.true(result.acknowledged); t.is(result.inserted_count, 100); t.is(result.inserted_ids.length, 100); t.truthy(result.performance); t.true(result.performance.duration_ms > 0); t.true(result.performance.documents_per_second > 0); }); test('bulk_insert_optimized should insert medium dataset successfully', async t => { const documents = generate_test_documents(10000); const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents); t.true(result.acknowledged); t.is(result.inserted_count, 10000); t.is(result.inserted_ids.length, 10000); t.truthy(result.performance); // Performance should be reasonable t.true(result.performance.documents_per_second > 1000); }); test('bulk_insert_optimized should handle large dataset efficiently', async t => { const documents = generate_test_documents(100000); const start_time = Date.now(); const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents); const duration = Date.now() - start_time; t.true(result.acknowledged); t.is(result.inserted_count, 100000); t.is(result.inserted_ids.length, 100000); // Should complete in reasonable time (less than 30 seconds) t.true(duration < 30000); // Should achieve good throughput t.true(result.performance.documents_per_second > 3000); }); // Performance optimization tests test('bulk_insert_optimized with streaming should handle memory efficiently', async t => { const documents = generate_test_documents(50000, { variable_size: true, base_size: 1000 }); const start_memory = process.memoryUsage().heapUsed; const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents, { stream_processing: true, batch_size: 1000 }); const end_memory = process.memoryUsage().heapUsed; const memory_delta = end_memory - start_memory; t.true(result.acknowledged); t.is(result.inserted_count, 50000); // Memory usage should be reasonable (less than 500MB delta) t.true(memory_delta < 500 * 1024 * 1024); }); test('bulk_insert_optimized with key sorting should improve performance', async t => { const documents = generate_test_documents(10000); // Test with sorting enabled const sorted_result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents, { sort_keys: true }); // Clean up for second test await cleanup_database(true); initialize_database(TEST_DB_PATH); // Test with sorting disabled const unsorted_result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents, { sort_keys: false }); t.true(sorted_result.acknowledged); t.true(unsorted_result.acknowledged); t.is(sorted_result.inserted_count, 10000); t.is(unsorted_result.inserted_count, 10000); // Sorted version should generally be faster or similar // (This is a performance hint, not a strict requirement) t.true(sorted_result.performance.duration_ms <= unsorted_result.performance.duration_ms * 1.5); }); // Error handling tests test('bulk_insert_optimized should reject invalid parameters', async t => { await t.throwsAsync( () => bulk_insert_optimized('', TEST_COLLECTION, []), { message: /Database name and collection name are required/ } ); await t.throwsAsync( () => bulk_insert_optimized(TEST_DATABASE, '', []), { message: /Database name and collection name are required/ } ); await t.throwsAsync( () => bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, []), { message: /Documents must be a non-empty array/ } ); await t.throwsAsync( () => bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, null), { message: /Documents must be a non-empty array/ } ); }); test('bulk_insert_optimized should handle duplicate IDs correctly', async t => { const documents = [ { _id: 'duplicate_id', name: 'Document 1' }, { _id: 'duplicate_id', name: 'Document 2' } ]; await t.throwsAsync( () => bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents), { message: /Document with _id duplicate_id already exists/ } ); }); // Data integrity tests test('bulk_insert_optimized should preserve document data integrity', async t => { const original_documents = generate_test_documents(1000, { include_id: true }); await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, original_documents); // Verify all documents were inserted correctly for (const original_doc of original_documents) { const retrieved_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { _id: original_doc._id }); t.truthy(retrieved_doc); t.is(retrieved_doc.name, original_doc.name); t.is(retrieved_doc.index, original_doc.index); t.is(retrieved_doc.category, original_doc.category); t.is(retrieved_doc.active, original_doc.active); t.deepEqual(retrieved_doc.metadata, original_doc.metadata); t.truthy(retrieved_doc._created_at); t.truthy(retrieved_doc._updated_at); } }); // Concurrent read safety tests test('bulk_insert_optimized should allow concurrent reads', async t => { // Insert initial data const initial_docs = generate_test_documents(1000, { include_id: true }); await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, initial_docs); // Start bulk insert of additional data const additional_docs = generate_test_documents(10000); const bulk_insert_promise = bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, additional_docs); // Perform concurrent reads while bulk insert is running const read_promises = []; for (let i = 0; i < 10; i++) { read_promises.push( find_one(TEST_DATABASE, TEST_COLLECTION, { _id: initial_docs[i]._id }) ); } // Wait for both bulk insert and reads to complete const [bulk_result, ...read_results] = await Promise.all([ bulk_insert_promise, ...read_promises ]); // Verify bulk insert succeeded t.true(bulk_result.acknowledged); t.is(bulk_result.inserted_count, 10000); // Verify all reads succeeded and returned correct data read_results.forEach((doc, index) => { t.truthy(doc); t.is(doc._id, initial_docs[index]._id); t.is(doc.name, initial_docs[index].name); }); }); // Performance monitoring tests test('bulk_insert_with_metrics should provide detailed performance metrics', async t => { const documents = generate_test_documents(5000); const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents); t.true(result.acknowledged); t.is(result.inserted_count, 5000); t.truthy(result.performance); t.truthy(result.performance.memory_usage); // Verify performance metrics structure t.true(typeof result.performance.duration_ms === 'number'); t.true(typeof result.performance.documents_per_second === 'number'); t.true(typeof result.performance.memory_usage.start_heap_mb === 'number'); t.true(typeof result.performance.memory_usage.end_heap_mb === 'number'); t.true(typeof result.performance.memory_usage.delta_heap_mb === 'number'); t.true(typeof result.performance.memory_usage.peak_heap_mb === 'number'); }); // Non-blocking bulk insert tests test('non_blocking_bulk_insert should process in chunks', async t => { const documents = generate_test_documents(25000); const result = await non_blocking_bulk_insert(TEST_DATABASE, TEST_COLLECTION, documents, { chunk_size: 5000 }); t.true(result.acknowledged); t.is(result.inserted_count, 25000); t.is(result.inserted_ids.length, 25000); t.truthy(result.performance); t.true(result.performance.duration_ms > 0); t.true(result.performance.documents_per_second > 0); }); // Stress tests test('bulk_insert_optimized should handle very large documents', async t => { const large_documents = []; for (let i = 0; i < 1000; i++) { large_documents.push({ name: `Large Document ${i}`, large_content: 'x'.repeat(10000), // 10KB per document nested_data: { level1: { level2: { level3: { data: Array.from({ length: 100 }, (_, j) => ({ id: j, value: `value_${j}` })) } } } } }); } const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, large_documents); t.true(result.acknowledged); t.is(result.inserted_count, 1000); t.true(result.performance.documents_per_second > 100); }); test('bulk_insert_optimized should maintain performance with mixed document sizes', async t => { const mixed_documents = []; // Small documents for (let i = 0; i < 5000; i++) { mixed_documents.push({ type: 'small', index: i, data: 'small_content' }); } // Medium documents for (let i = 0; i < 3000; i++) { mixed_documents.push({ type: 'medium', index: i, data: 'x'.repeat(1000), metadata: Array.from({ length: 50 }, (_, j) => ({ key: `key_${j}`, value: `value_${j}` })) }); } // Large documents for (let i = 0; i < 2000; i++) { mixed_documents.push({ type: 'large', index: i, data: 'x'.repeat(5000), large_array: Array.from({ length: 200 }, (_, j) => ({ id: j, content: `content_${j}` })) }); } const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, mixed_documents); t.true(result.acknowledged); t.is(result.inserted_count, 10000); t.true(result.performance.documents_per_second > 2000); // Verify data integrity for different document types const small_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { type: 'small' }); const medium_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { type: 'medium' }); const large_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { type: 'large' }); t.truthy(small_doc); t.is(small_doc.type, 'small'); t.truthy(medium_doc); t.is(medium_doc.type, 'medium'); t.is(medium_doc.metadata.length, 50); t.truthy(large_doc); t.is(large_doc.type, 'large'); t.is(large_doc.large_array.length, 200); }); // Edge case tests test('bulk_insert_optimized should handle documents with special characters', async t => { const special_documents = [ { name: 'Document with "quotes"', content: 'Content with \n newlines and \t tabs' }, { name: 'Document with émojis 🚀', content: 'Unicode content: café, naïve, résumé' }, { name: 'Document with JSON', content: '{"nested": "json", "array": [1, 2, 3]}' }, { name: 'Document with HTML', content: '<div>HTML content</div>' }, { name: 'Document with null values', nullable_field: null, undefined_field: undefined } ]; const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, special_documents); t.true(result.acknowledged); t.is(result.inserted_count, 5); // Verify special characters are preserved const docs = await find(TEST_DATABASE, TEST_COLLECTION, {}); t.is(docs.length, 5); const emoji_doc = docs.find(doc => doc.name.includes('émojis')); t.truthy(emoji_doc); t.true(emoji_doc.content.includes('café')); }); test('bulk_insert_optimized should handle empty and minimal documents', async t => { const minimal_documents = [ {}, { single_field: 'value' }, { _id: 'custom_id_1' }, { _id: 'custom_id_2', empty_object: {}, empty_array: [] } ]; const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, minimal_documents); t.true(result.acknowledged); t.is(result.inserted_count, 4); // Verify all documents were inserted with proper timestamps const docs = await find(TEST_DATABASE, TEST_COLLECTION, {}); t.is(docs.length, 4); docs.forEach(doc => { t.truthy(doc._id); t.truthy(doc._created_at); t.truthy(doc._updated_at); }); });