UNPKG

@andrejs1979/document

Version:

MongoDB-compatible document database for NoSQL

598 lines 25 kB
/** * NoSQL - Document Index Manager * Advanced indexing system for fast document queries */ import { DocumentError } from '../types'; /** * Smart index manager with auto-indexing capabilities */ export class DocumentIndexManager { d1; kvStore; config; indexCache = new Map(); queryPatterns = new Map(); // Track query patterns for auto-indexing fieldUsage = new Map(); // Track field usage constructor(config) { this.config = config; this.d1 = config.d1Database; this.kvStore = config.kvStore; } /** * Create a new index */ async createIndex(database, collection, indexSpec) { try { const indexName = this.generateIndexName(database, collection, indexSpec); // Check if index already exists if (await this.indexExists(database, collection, indexName)) { throw new DocumentError(`Index ${indexName} already exists`, 'INDEX_EXISTS'); } // Create the index based on type await this.createIndexByType(database, collection, indexName, indexSpec); // Store index metadata await this.storeIndexMetadata(database, collection, indexName, indexSpec); // Cache the index info this.cacheIndexInfo(database, collection, indexName, indexSpec); console.log(`Created index ${indexName} for ${database}.${collection}`); } catch (error) { throw new DocumentError(`Failed to create index: ${error.message}`, 'INDEX_CREATION_ERROR'); } } /** * Drop an existing index */ async dropIndex(database, collection, indexName) { try { // Get index info const indexInfo = await this.getIndexInfo(database, collection, indexName); if (!indexInfo) { throw new DocumentError(`Index ${indexName} does not exist`, 'INDEX_NOT_FOUND'); } // Drop the index await this.dropIndexByType(database, collection, indexInfo); // Remove metadata await this.removeIndexMetadata(database, collection, indexName); // Remove from cache this.indexCache.delete(this.getIndexCacheKey(database, collection, indexName)); console.log(`Dropped index ${indexName} for ${database}.${collection}`); } catch (error) { throw new DocumentError(`Failed to drop index: ${error.message}`, 'INDEX_DROP_ERROR'); } } /** * List all indexes for a collection */ async listIndexes(database, collection) { try { const result = await this.d1.prepare(` SELECT index_name, index_spec, created_at, size_bytes, usage_count FROM index_metadata WHERE database_name = ? AND collection_name = ? ORDER BY created_at ASC `).bind(database, collection).all(); const indexes = []; for (const row of result.results || []) { const spec = JSON.parse(row.index_spec); indexes.push({ name: row.index_name, key: spec.key, options: spec.options || {}, size: row.size_bytes || 0, stats: { accesses: row.usage_count || 0, lastAccessed: new Date(row.created_at), usage: row.usage_count || 0 } }); } return indexes; } catch (error) { throw new DocumentError(`Failed to list indexes: ${error.message}`, 'INDEX_LIST_ERROR'); } } /** * Get index recommendations based on query patterns */ async getIndexRecommendations(database, collection) { const recommendations = []; const reasons = []; // Analyze query patterns const patternAnalysis = await this.analyzeQueryPatterns(database, collection); // Check field usage frequency const frequentFields = this.getFrequentlyUsedFields(database, collection); // Recommend single-field indexes for frequently queried fields for (const field of frequentFields) { if (field.usage > (this.config.autoIndexThreshold || 100)) { const indexSpec = { key: { [field.name]: 1 } }; if (!await this.hasEquivalentIndex(database, collection, indexSpec)) { recommendations.push(indexSpec); reasons.push(`Field '${field.name}' is frequently queried (${field.usage} times)`); } } } // Recommend compound indexes for common query patterns for (const pattern of patternAnalysis) { if (pattern.frequency > 50 && pattern.fields.length > 1) { const indexSpec = { key: pattern.fields.reduce((acc, field) => ({ ...acc, [field]: 1 }), {}) }; if (!await this.hasEquivalentIndex(database, collection, indexSpec)) { recommendations.push(indexSpec); reasons.push(`Compound index for common query pattern: ${pattern.fields.join(', ')}`); } } } // Recommend text indexes for text search patterns const textSearchFields = this.getTextSearchFields(database, collection); if (textSearchFields.length > 0) { const textIndexSpec = { key: textSearchFields.reduce((acc, field) => ({ ...acc, [field]: 'text' }), {}), options: { name: `text_index_${collection}` } }; if (!await this.hasEquivalentIndex(database, collection, textIndexSpec)) { recommendations.push(textIndexSpec); reasons.push(`Text index for full-text search on: ${textSearchFields.join(', ')}`); } } // Recommend vector indexes if vector operations are detected const hasVectorOperations = await this.hasVectorOperations(database, collection); if (hasVectorOperations) { const vectorIndexSpec = { key: { '_vector': 'vector' }, options: { name: `vector_index_${collection}`, vectorOptions: { dimensions: this.config.vectorConfig?.defaultDimensions || 1536, similarity: 'cosine', type: 'hnsw' } } }; if (!await this.hasEquivalentIndex(database, collection, vectorIndexSpec)) { recommendations.push(vectorIndexSpec); reasons.push('Vector index for similarity search operations'); } } // Determine priority based on potential impact let priority = 'low'; if (recommendations.length > 0) { const totalQueryCount = Array.from(this.queryPatterns.values()).reduce((sum, count) => sum + count, 0); if (totalQueryCount > 1000) { priority = 'high'; } else if (totalQueryCount > 100) { priority = 'medium'; } } return { recommended: recommendations, reasons, priority }; } /** * Auto-create indexes based on query patterns */ async autoCreateIndexes(database, collection) { if (!this.config.enableAutoIndexing) { return; } try { const recommendations = await this.getIndexRecommendations(database, collection); for (const indexSpec of recommendations.recommended) { try { await this.createIndex(database, collection, indexSpec); console.log(`Auto-created index for ${database}.${collection}:`, indexSpec); } catch (error) { console.warn(`Failed to auto-create index:`, error.message); } } } catch (error) { console.warn(`Auto-indexing failed for ${database}.${collection}:`, error.message); } } /** * Track query pattern for auto-indexing */ trackQueryPattern(database, collection, filter, options = {}) { if (!this.config.enableAutoIndexing) { return; } // Extract fields from query const fields = this.extractQueryFields(filter); const pattern = `${database}.${collection}:${fields.sort().join(',')}`; // Update pattern frequency this.queryPatterns.set(pattern, (this.queryPatterns.get(pattern) || 0) + 1); // Update field usage for (const field of fields) { const key = `${database}.${collection}.${field}`; const current = this.fieldUsage.get(key) || { count: 0, lastUsed: new Date() }; this.fieldUsage.set(key, { count: current.count + 1, lastUsed: new Date() }); } // Trigger auto-indexing if threshold is reached const patternCount = this.queryPatterns.get(pattern) || 0; if (patternCount % (this.config.autoIndexThreshold || 100) === 0) { // Run auto-indexing asynchronously this.autoCreateIndexes(database, collection).catch(console.error); } } /** * Get optimal index for a query */ async getOptimalIndex(database, collection, filter, sort) { const indexes = await this.listIndexes(database, collection); // Extract query fields const queryFields = this.extractQueryFields(filter); const sortFields = sort ? Object.keys(sort) : []; let bestIndex = null; let bestScore = 0; for (const index of indexes) { const score = this.calculateIndexScore(index, queryFields, sortFields); if (score > bestScore) { bestScore = score; bestIndex = index; } } return bestIndex; } /** * Rebuild index (refresh statistics and optimize) */ async rebuildIndex(database, collection, indexName) { try { const indexInfo = await this.getIndexInfo(database, collection, indexName); if (!indexInfo) { throw new DocumentError(`Index ${indexName} not found`, 'INDEX_NOT_FOUND'); } // For SQLite, we can't directly rebuild indexes, but we can drop and recreate const indexSpec = { key: indexInfo.key, options: indexInfo.options }; // Drop the old index await this.dropIndex(database, collection, indexName); // Recreate the index await this.createIndex(database, collection, indexSpec); console.log(`Rebuilt index ${indexName} for ${database}.${collection}`); } catch (error) { throw new DocumentError(`Failed to rebuild index: ${error.message}`, 'INDEX_REBUILD_ERROR'); } } /** * Get index usage statistics */ async getIndexStats(database, collection) { const indexes = await this.listIndexes(database, collection); const stats = { totalIndexes: indexes.length, totalSize: indexes.reduce((sum, idx) => sum + idx.size, 0), usageStats: indexes.reduce((acc, idx) => { acc[idx.name] = idx.stats?.usage || 0; return acc; }, {}), recommendations: [] }; // Generate recommendations const unusedIndexes = indexes.filter(idx => (idx.stats?.usage || 0) < 10); if (unusedIndexes.length > 0) { stats.recommendations.push(`Consider dropping unused indexes: ${unusedIndexes.map(idx => idx.name).join(', ')}`); } const recommendations = await this.getIndexRecommendations(database, collection); if (recommendations.recommended.length > 0) { stats.recommendations.push(`Consider creating ${recommendations.recommended.length} recommended indexes`); } return stats; } // =============================== // Private Methods // =============================== async createIndexByType(database, collection, indexName, indexSpec) { const fields = Object.keys(indexSpec.key); const indexTypes = Object.values(indexSpec.key); // Determine index type if (indexTypes.includes('text')) { await this.createTextIndex(database, collection, indexName, indexSpec); } else if (indexTypes.includes('vector')) { await this.createVectorIndex(database, collection, indexName, indexSpec); } else if (indexTypes.includes('2dsphere') || indexTypes.includes('2d')) { await this.createGeospatialIndex(database, collection, indexName, indexSpec); } else { await this.createBTreeIndex(database, collection, indexName, indexSpec); } } async createBTreeIndex(database, collection, indexName, indexSpec) { const fields = Object.entries(indexSpec.key); if (fields.length === 1) { // Single field index const [field, direction] = fields[0]; const order = direction === -1 ? 'DESC' : 'ASC'; // Try to use indexed column if available const indexColumn = this.getIndexColumn(field); if (indexColumn) { await this.d1.exec(` CREATE INDEX ${indexName} ON documents(${indexColumn} ${order}) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE `); } else { await this.d1.exec(` CREATE INDEX ${indexName} ON documents(JSON_EXTRACT(_data, '$.${field}') ${order}) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE `); } } else { // Compound index const indexColumns = fields.map(([field, direction]) => { const order = direction === -1 ? 'DESC' : 'ASC'; const indexColumn = this.getIndexColumn(field); if (indexColumn) { return `${indexColumn} ${order}`; } else { return `JSON_EXTRACT(_data, '$.${field}') ${order}`; } }); await this.d1.exec(` CREATE INDEX ${indexName} ON documents(${indexColumns.join(', ')}) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE `); } } async createTextIndex(database, collection, indexName, indexSpec) { // SQLite doesn't have built-in full-text search, so we use the _searchText field await this.d1.exec(` CREATE INDEX ${indexName} ON documents(_searchText) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE AND _searchText IS NOT NULL `); } async createVectorIndex(database, collection, indexName, indexSpec) { // Vector indexes are handled by the vector storage system // For now, just create an index on the vector presence await this.d1.exec(` CREATE INDEX ${indexName} ON documents(_vector) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE AND _vector IS NOT NULL `); } async createGeospatialIndex(database, collection, indexName, indexSpec) { // Simplified geospatial indexing const geoFields = Object.keys(indexSpec.key); for (const field of geoFields) { await this.d1.exec(` CREATE INDEX ${indexName}_${field}_lat ON documents(JSON_EXTRACT(_data, '$.${field}.lat')) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE `); await this.d1.exec(` CREATE INDEX ${indexName}_${field}_lng ON documents(JSON_EXTRACT(_data, '$.${field}.lng')) WHERE _database = '${database}' AND _collection = '${collection}' AND _deleted = FALSE `); } } async dropIndexByType(database, collection, indexInfo) { // Drop the main index await this.d1.exec(`DROP INDEX IF EXISTS ${indexInfo.name}`); // Drop any related indexes (for geospatial, etc.) const indexTypes = Object.values(indexInfo.key); if (indexTypes.includes('2dsphere') || indexTypes.includes('2d')) { const geoFields = Object.keys(indexInfo.key); for (const field of geoFields) { await this.d1.exec(`DROP INDEX IF EXISTS ${indexInfo.name}_${field}_lat`); await this.d1.exec(`DROP INDEX IF EXISTS ${indexInfo.name}_${field}_lng`); } } } generateIndexName(database, collection, indexSpec) { if (indexSpec.options?.name) { return indexSpec.options.name; } const fields = Object.keys(indexSpec.key).join('_'); const types = Object.values(indexSpec.key).join('_'); return `idx_${database}_${collection}_${fields}_${types}`.replace(/[^a-zA-Z0-9_]/g, '_'); } async indexExists(database, collection, indexName) { const result = await this.d1.prepare(` SELECT 1 FROM index_metadata WHERE database_name = ? AND collection_name = ? AND index_name = ? `).bind(database, collection, indexName).first(); return !!result; } async storeIndexMetadata(database, collection, indexName, indexSpec) { await this.d1.prepare(` INSERT INTO index_metadata ( database_name, collection_name, index_name, index_spec, created_at, size_bytes, usage_count ) VALUES (?, ?, ?, ?, ?, ?, ?) `).bind(database, collection, indexName, JSON.stringify(indexSpec), new Date().toISOString(), 0, 0).run(); } async removeIndexMetadata(database, collection, indexName) { await this.d1.prepare(` DELETE FROM index_metadata WHERE database_name = ? AND collection_name = ? AND index_name = ? `).bind(database, collection, indexName).run(); } async getIndexInfo(database, collection, indexName) { const cacheKey = this.getIndexCacheKey(database, collection, indexName); if (this.indexCache.has(cacheKey)) { return this.indexCache.get(cacheKey); } const result = await this.d1.prepare(` SELECT index_spec, created_at, size_bytes, usage_count FROM index_metadata WHERE database_name = ? AND collection_name = ? AND index_name = ? `).bind(database, collection, indexName).first(); if (!result) { return null; } const spec = JSON.parse(result.index_spec); const indexInfo = { name: indexName, key: spec.key, options: spec.options || {}, size: result.size_bytes || 0, stats: { accesses: result.usage_count || 0, lastAccessed: new Date(result.created_at), usage: result.usage_count || 0 } }; this.indexCache.set(cacheKey, indexInfo); return indexInfo; } cacheIndexInfo(database, collection, indexName, indexSpec) { const cacheKey = this.getIndexCacheKey(database, collection, indexName); const indexInfo = { name: indexName, key: indexSpec.key, options: indexSpec.options || {}, size: 0, stats: { accesses: 0, lastAccessed: new Date(), usage: 0 } }; this.indexCache.set(cacheKey, indexInfo); } getIndexCacheKey(database, collection, indexName) { return `${database}.${collection}.${indexName}`; } extractQueryFields(filter) { const fields = []; const extractFromObject = (obj, path = '') => { for (const [key, value] of Object.entries(obj)) { if (key.startsWith('$')) { // Operator - check its value if (Array.isArray(value)) { for (const item of value) { if (typeof item === 'object') { extractFromObject(item, path); } } } else if (typeof value === 'object' && value !== null) { extractFromObject(value, path); } } else { // Field name const fieldPath = path ? `${path}.${key}` : key; fields.push(fieldPath); if (typeof value === 'object' && value !== null && !Array.isArray(value)) { extractFromObject(value, fieldPath); } } } }; extractFromObject(filter); return [...new Set(fields)]; // Remove duplicates } getIndexColumn(field) { // Map common fields to indexed columns const fieldMapping = { 'status': '_idx_field_1', 'type': '_idx_field_2', 'category': '_idx_field_3', 'userId': '_idx_field_4', 'email': '_idx_field_5', 'score': '_idx_field_6', 'rating': '_idx_field_7', 'price': '_idx_field_8', 'views': '_idx_field_9', 'likes': '_idx_field_10' }; return fieldMapping[field] || null; } async analyzeQueryPatterns(database, collection) { const patterns = []; const prefix = `${database}.${collection}:`; for (const [pattern, frequency] of this.queryPatterns.entries()) { if (pattern.startsWith(prefix)) { const fieldsStr = pattern.substring(prefix.length); if (fieldsStr) { const fields = fieldsStr.split(','); patterns.push({ fields, frequency }); } } } return patterns.sort((a, b) => b.frequency - a.frequency); } getFrequentlyUsedFields(database, collection) { const fields = []; const prefix = `${database}.${collection}.`; for (const [key, stats] of this.fieldUsage.entries()) { if (key.startsWith(prefix)) { const fieldName = key.substring(prefix.length); fields.push({ name: fieldName, usage: stats.count }); } } return fields.sort((a, b) => b.usage - a.usage); } getTextSearchFields(database, collection) { // Common text search fields return ['title', 'content', 'description', 'text', 'name', 'summary']; } async hasVectorOperations(database, collection) { // Check if there are any vector operations in query patterns const prefix = `${database}.${collection}:`; for (const pattern of this.queryPatterns.keys()) { if (pattern.startsWith(prefix) && pattern.includes('_vector')) { return true; } } return false; } async hasEquivalentIndex(database, collection, indexSpec) { const indexes = await this.listIndexes(database, collection); for (const index of indexes) { if (this.indexSpecsEqual(index.key, indexSpec.key)) { return true; } } return false; } indexSpecsEqual(key1, key2) { const keys1 = Object.keys(key1).sort(); const keys2 = Object.keys(key2).sort(); if (keys1.length !== keys2.length) { return false; } for (let i = 0; i < keys1.length; i++) { if (keys1[i] !== keys2[i] || key1[keys1[i]] !== key2[keys2[i]]) { return false; } } return true; } calculateIndexScore(index, queryFields, sortFields) { let score = 0; const indexFields = Object.keys(index.key); // Score based on field overlap for (const queryField of queryFields) { if (indexFields.includes(queryField)) { score += 10; } } // Score based on sort field overlap for (const sortField of sortFields) { if (indexFields.includes(sortField)) { score += 5; } } // Prefer more specific indexes (fewer fields) score -= indexFields.length; // Prefer frequently used indexes score += (index.stats?.usage || 0) * 0.1; return score; } } //# sourceMappingURL=index-manager.js.map