@joystick.js/db-canary
Version:
JoystickDB - A minimalist database server for the Joystick framework
524 lines (421 loc) • 17.2 kB
JavaScript
/**
* @fileoverview Comprehensive tests for JoystickDB bulk insert optimizer.
* Tests performance optimizations, safety guarantees, and concurrent read capabilities.
*/
import test from 'ava';
import { rmSync, existsSync } from 'fs';
import { initialize_database, cleanup_database } from '../../../src/server/lib/query_engine.js';
import {
bulk_insert_optimized,
bulk_insert_with_metrics,
non_blocking_bulk_insert,
calculate_average_document_size,
calculate_bulk_map_size,
create_size_based_batches,
sort_documents_by_key,
pre_encode_documents
} from '../../../src/server/lib/bulk_insert_optimizer.js';
import find_one from '../../../src/server/lib/operations/find_one.js';
import find from '../../../src/server/lib/operations/find.js';
const TEST_DB_PATH = './test_data/bulk_optimizer_test';
const TEST_DATABASE = 'test_db';
const TEST_COLLECTION = 'test_collection';
/**
* Generates test documents for bulk insert testing.
* @param {number} count - Number of documents to generate
* @param {Object} [options={}] - Generation options
* @returns {Array<Object>} Array of test documents
*/
const generate_test_documents = (count, options = {}) => {
const {
include_id = false,
base_size = 100,
variable_size = false
} = options;
const documents = [];
for (let i = 0; i < count; i++) {
const doc = {
name: `Test Document ${i}`,
index: i,
category: `category_${i % 10}`,
active: i % 2 === 0,
metadata: {
created_by: 'test_user',
tags: [`tag_${i % 5}`, `tag_${(i + 1) % 5}`],
priority: i % 3
}
};
if (include_id) {
doc._id = `test_doc_${i.toString().padStart(6, '0')}`;
}
// Add variable size content for testing
if (variable_size) {
const extra_content_size = Math.floor(Math.random() * base_size);
doc.content = 'x'.repeat(extra_content_size);
}
documents.push(doc);
}
return documents;
};
/**
* Sets up test database before each test.
*/
test.beforeEach(async () => {
// Clean up any existing test database
if (existsSync(TEST_DB_PATH)) {
rmSync(TEST_DB_PATH, { recursive: true, force: true });
}
// Initialize fresh database
initialize_database(TEST_DB_PATH);
});
/**
* Cleans up test database after each test.
*/
test.afterEach(async () => {
await cleanup_database(true);
});
// Utility function tests
test('calculate_average_document_size should calculate correct average', t => {
const documents = [
{ name: 'doc1', data: 'small' },
{ name: 'doc2', data: 'medium_sized_content' },
{ name: 'doc3', data: 'large_content_with_more_data' }
];
const avg_size = calculate_average_document_size(documents);
t.true(avg_size > 0);
t.true(typeof avg_size === 'number');
});
test('calculate_bulk_map_size should return appropriate map size', t => {
const document_count = 100000;
const avg_document_size = 500;
const map_size = calculate_bulk_map_size(document_count, avg_document_size);
// Should be at least 2x the estimated size
const estimated_size = document_count * avg_document_size;
t.true(map_size >= estimated_size * 2);
// Should be at least 10GB minimum
const minimum_size = 1024 * 1024 * 1024 * 10;
t.true(map_size >= minimum_size);
});
test('create_size_based_batches should create appropriate batches', t => {
const documents = generate_test_documents(1000);
const target_size = 50 * 1024; // 50KB target
const batches = create_size_based_batches(documents, target_size);
t.true(batches.length > 0);
t.true(Array.isArray(batches));
// Verify all documents are included
const total_docs = batches.reduce((sum, batch) => sum + batch.length, 0);
t.is(total_docs, documents.length);
});
test('sort_documents_by_key should sort documents correctly', t => {
const documents = generate_test_documents(100);
const sorted_docs = sort_documents_by_key(documents, TEST_DATABASE, TEST_COLLECTION);
t.is(sorted_docs.length, documents.length);
// Verify all documents have IDs
sorted_docs.forEach(doc => {
t.truthy(doc._id);
});
// Verify sorting (keys should be in ascending order)
for (let i = 1; i < sorted_docs.length; i++) {
const key_a = `${TEST_DATABASE}:${TEST_COLLECTION}:${sorted_docs[i-1]._id}`;
const key_b = `${TEST_DATABASE}:${TEST_COLLECTION}:${sorted_docs[i]._id}`;
t.true(key_a.localeCompare(key_b) <= 0);
}
});
test('pre_encode_documents should encode documents correctly', t => {
const documents = generate_test_documents(10, { include_id: true });
const encoded_docs = pre_encode_documents(documents, TEST_DATABASE, TEST_COLLECTION);
t.is(encoded_docs.length, documents.length);
encoded_docs.forEach((encoded, index) => {
t.truthy(encoded.key);
t.truthy(encoded.value);
t.truthy(encoded.document_id);
t.is(encoded.document_id, documents[index]._id);
// Verify key format
t.true(encoded.key.startsWith(`${TEST_DATABASE}:${TEST_COLLECTION}:`));
// Verify value is valid JSON
const parsed = JSON.parse(encoded.value);
t.truthy(parsed._created_at);
t.truthy(parsed._updated_at);
});
});
// Basic bulk insert tests
test('bulk_insert_optimized should insert small dataset successfully', async t => {
const documents = generate_test_documents(100);
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 100);
t.is(result.inserted_ids.length, 100);
t.truthy(result.performance);
t.true(result.performance.duration_ms > 0);
t.true(result.performance.documents_per_second > 0);
});
test('bulk_insert_optimized should insert medium dataset successfully', async t => {
const documents = generate_test_documents(10000);
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 10000);
t.is(result.inserted_ids.length, 10000);
t.truthy(result.performance);
// Performance should be reasonable
t.true(result.performance.documents_per_second > 1000);
});
test('bulk_insert_optimized should handle large dataset efficiently', async t => {
const documents = generate_test_documents(100000);
const start_time = Date.now();
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents);
const duration = Date.now() - start_time;
t.true(result.acknowledged);
t.is(result.inserted_count, 100000);
t.is(result.inserted_ids.length, 100000);
// Should complete in reasonable time (less than 30 seconds)
t.true(duration < 30000);
// Should achieve good throughput
t.true(result.performance.documents_per_second > 3000);
});
// Performance optimization tests
test('bulk_insert_optimized with streaming should handle memory efficiently', async t => {
const documents = generate_test_documents(50000, { variable_size: true, base_size: 1000 });
const start_memory = process.memoryUsage().heapUsed;
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents, {
stream_processing: true,
batch_size: 1000
});
const end_memory = process.memoryUsage().heapUsed;
const memory_delta = end_memory - start_memory;
t.true(result.acknowledged);
t.is(result.inserted_count, 50000);
// Memory usage should be reasonable (less than 500MB delta)
t.true(memory_delta < 500 * 1024 * 1024);
});
test('bulk_insert_optimized with key sorting should improve performance', async t => {
const documents = generate_test_documents(10000);
// Test with sorting enabled
const sorted_result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents, {
sort_keys: true
});
// Clean up for second test
await cleanup_database(true);
initialize_database(TEST_DB_PATH);
// Test with sorting disabled
const unsorted_result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents, {
sort_keys: false
});
t.true(sorted_result.acknowledged);
t.true(unsorted_result.acknowledged);
t.is(sorted_result.inserted_count, 10000);
t.is(unsorted_result.inserted_count, 10000);
// Sorted version should generally be faster or similar
// (This is a performance hint, not a strict requirement)
t.true(sorted_result.performance.duration_ms <= unsorted_result.performance.duration_ms * 1.5);
});
// Error handling tests
test('bulk_insert_optimized should reject invalid parameters', async t => {
await t.throwsAsync(
() => bulk_insert_optimized('', TEST_COLLECTION, []),
{ message: /Database name and collection name are required/ }
);
await t.throwsAsync(
() => bulk_insert_optimized(TEST_DATABASE, '', []),
{ message: /Database name and collection name are required/ }
);
await t.throwsAsync(
() => bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, []),
{ message: /Documents must be a non-empty array/ }
);
await t.throwsAsync(
() => bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, null),
{ message: /Documents must be a non-empty array/ }
);
});
test('bulk_insert_optimized should handle duplicate IDs correctly', async t => {
const documents = [
{ _id: 'duplicate_id', name: 'Document 1' },
{ _id: 'duplicate_id', name: 'Document 2' }
];
await t.throwsAsync(
() => bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, documents),
{ message: /Document with _id duplicate_id already exists/ }
);
});
// Data integrity tests
test('bulk_insert_optimized should preserve document data integrity', async t => {
const original_documents = generate_test_documents(1000, { include_id: true });
await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, original_documents);
// Verify all documents were inserted correctly
for (const original_doc of original_documents) {
const retrieved_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { _id: original_doc._id });
t.truthy(retrieved_doc);
t.is(retrieved_doc.name, original_doc.name);
t.is(retrieved_doc.index, original_doc.index);
t.is(retrieved_doc.category, original_doc.category);
t.is(retrieved_doc.active, original_doc.active);
t.deepEqual(retrieved_doc.metadata, original_doc.metadata);
t.truthy(retrieved_doc._created_at);
t.truthy(retrieved_doc._updated_at);
}
});
// Concurrent read safety tests
test('bulk_insert_optimized should allow concurrent reads', async t => {
// Insert initial data
const initial_docs = generate_test_documents(1000, { include_id: true });
await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, initial_docs);
// Start bulk insert of additional data
const additional_docs = generate_test_documents(10000);
const bulk_insert_promise = bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, additional_docs);
// Perform concurrent reads while bulk insert is running
const read_promises = [];
for (let i = 0; i < 10; i++) {
read_promises.push(
find_one(TEST_DATABASE, TEST_COLLECTION, { _id: initial_docs[i]._id })
);
}
// Wait for both bulk insert and reads to complete
const [bulk_result, ...read_results] = await Promise.all([
bulk_insert_promise,
...read_promises
]);
// Verify bulk insert succeeded
t.true(bulk_result.acknowledged);
t.is(bulk_result.inserted_count, 10000);
// Verify all reads succeeded and returned correct data
read_results.forEach((doc, index) => {
t.truthy(doc);
t.is(doc._id, initial_docs[index]._id);
t.is(doc.name, initial_docs[index].name);
});
});
// Performance monitoring tests
test('bulk_insert_with_metrics should provide detailed performance metrics', async t => {
const documents = generate_test_documents(5000);
const result = await bulk_insert_with_metrics(TEST_DATABASE, TEST_COLLECTION, documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 5000);
t.truthy(result.performance);
t.truthy(result.performance.memory_usage);
// Verify performance metrics structure
t.true(typeof result.performance.duration_ms === 'number');
t.true(typeof result.performance.documents_per_second === 'number');
t.true(typeof result.performance.memory_usage.start_heap_mb === 'number');
t.true(typeof result.performance.memory_usage.end_heap_mb === 'number');
t.true(typeof result.performance.memory_usage.delta_heap_mb === 'number');
t.true(typeof result.performance.memory_usage.peak_heap_mb === 'number');
});
// Non-blocking bulk insert tests
test('non_blocking_bulk_insert should process in chunks', async t => {
const documents = generate_test_documents(25000);
const result = await non_blocking_bulk_insert(TEST_DATABASE, TEST_COLLECTION, documents, {
chunk_size: 5000
});
t.true(result.acknowledged);
t.is(result.inserted_count, 25000);
t.is(result.inserted_ids.length, 25000);
t.truthy(result.performance);
t.true(result.performance.duration_ms > 0);
t.true(result.performance.documents_per_second > 0);
});
// Stress tests
test('bulk_insert_optimized should handle very large documents', async t => {
const large_documents = [];
for (let i = 0; i < 1000; i++) {
large_documents.push({
name: `Large Document ${i}`,
large_content: 'x'.repeat(10000), // 10KB per document
nested_data: {
level1: {
level2: {
level3: {
data: Array.from({ length: 100 }, (_, j) => ({ id: j, value: `value_${j}` }))
}
}
}
}
});
}
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, large_documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 1000);
t.true(result.performance.documents_per_second > 100);
});
test('bulk_insert_optimized should maintain performance with mixed document sizes', async t => {
const mixed_documents = [];
// Small documents
for (let i = 0; i < 5000; i++) {
mixed_documents.push({
type: 'small',
index: i,
data: 'small_content'
});
}
// Medium documents
for (let i = 0; i < 3000; i++) {
mixed_documents.push({
type: 'medium',
index: i,
data: 'x'.repeat(1000),
metadata: Array.from({ length: 50 }, (_, j) => ({ key: `key_${j}`, value: `value_${j}` }))
});
}
// Large documents
for (let i = 0; i < 2000; i++) {
mixed_documents.push({
type: 'large',
index: i,
data: 'x'.repeat(5000),
large_array: Array.from({ length: 200 }, (_, j) => ({ id: j, content: `content_${j}` }))
});
}
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, mixed_documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 10000);
t.true(result.performance.documents_per_second > 2000);
// Verify data integrity for different document types
const small_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { type: 'small' });
const medium_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { type: 'medium' });
const large_doc = await find_one(TEST_DATABASE, TEST_COLLECTION, { type: 'large' });
t.truthy(small_doc);
t.is(small_doc.type, 'small');
t.truthy(medium_doc);
t.is(medium_doc.type, 'medium');
t.is(medium_doc.metadata.length, 50);
t.truthy(large_doc);
t.is(large_doc.type, 'large');
t.is(large_doc.large_array.length, 200);
});
// Edge case tests
test('bulk_insert_optimized should handle documents with special characters', async t => {
const special_documents = [
{ name: 'Document with "quotes"', content: 'Content with \n newlines and \t tabs' },
{ name: 'Document with émojis 🚀', content: 'Unicode content: café, naïve, résumé' },
{ name: 'Document with JSON', content: '{"nested": "json", "array": [1, 2, 3]}' },
{ name: 'Document with HTML', content: '<div>HTML content</div>' },
{ name: 'Document with null values', nullable_field: null, undefined_field: undefined }
];
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, special_documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 5);
// Verify special characters are preserved
const docs = await find(TEST_DATABASE, TEST_COLLECTION, {});
t.is(docs.length, 5);
const emoji_doc = docs.find(doc => doc.name.includes('émojis'));
t.truthy(emoji_doc);
t.true(emoji_doc.content.includes('café'));
});
test('bulk_insert_optimized should handle empty and minimal documents', async t => {
const minimal_documents = [
{},
{ single_field: 'value' },
{ _id: 'custom_id_1' },
{ _id: 'custom_id_2', empty_object: {}, empty_array: [] }
];
const result = await bulk_insert_optimized(TEST_DATABASE, TEST_COLLECTION, minimal_documents);
t.true(result.acknowledged);
t.is(result.inserted_count, 4);
// Verify all documents were inserted with proper timestamps
const docs = await find(TEST_DATABASE, TEST_COLLECTION, {});
t.is(docs.length, 4);
docs.forEach(doc => {
t.truthy(doc._id);
t.truthy(doc._created_at);
t.truthy(doc._updated_at);
});
});