UNPKG

@andrejs1979/document

Version:

MongoDB-compatible document database for NoSQL

670 lines (669 loc) 27.8 kB
/** * NoSQL - Document Storage Engine * High-performance document storage with Cloudflare D1/R2 integration */ import { DocumentError, ValidationError, DuplicateKeyError } from '../types'; /** * Document storage with caching and optimization */ export class DocumentStorage { d1; kvStore; r2Bucket; config; documentCache = new Map(); queryCache = new Map(); initialized = false; constructor(config) { this.config = { name: config.name, d1Database: config.d1Database, kvStore: config.kvStore, r2Bucket: config.r2Bucket, maxDocumentSize: config.maxDocumentSize || 16 * 1024 * 1024, // 16MB queryTimeout: config.queryTimeout || 30000, batchSize: config.batchSize || 100, enableQueryCache: config.enableQueryCache ?? true, queryCacheTTL: config.queryCacheTTL || 300, // 5 minutes cacheSize: config.cacheSize || 100, // MB enableAutoIndexing: config.enableAutoIndexing ?? true, autoIndexThreshold: config.autoIndexThreshold || 1000, maxIndexedFields: config.maxIndexedFields || 20, vectorConfig: { enabled: config.vectorConfig?.enabled ?? true, defaultDimensions: config.vectorConfig?.defaultDimensions || 1536, defaultModel: config.vectorConfig?.defaultModel || 'text-embedding-ada-002', autoEmbedding: config.vectorConfig?.autoEmbedding ?? false, embeddingFields: config.vectorConfig?.embeddingFields || ['content', 'text', 'description'], ...config.vectorConfig }, enableValidation: config.enableValidation ?? true, enableSchemaEvolution: config.enableSchemaEvolution ?? true, enableChangeStreams: config.enableChangeStreams ?? true, maxChangeStreamConnections: config.maxChangeStreamConnections || 1000, enableQueryLogging: config.enableQueryLogging ?? false, enablePerformanceMetrics: config.enablePerformanceMetrics ?? true, enableRelationships: config.enableRelationships ?? true, populateDepth: config.populateDepth || 3, bulkWriteBatchSize: config.bulkWriteBatchSize || 1000, bulkWriteParallelism: config.bulkWriteParallelism || 4 }; this.d1 = config.d1Database; this.kvStore = config.kvStore; this.r2Bucket = config.r2Bucket; if (!this.d1) { throw new DocumentError('D1 database instance is required', 'MISSING_D1'); } } /** * Initialize storage with schema creation */ async initialize() { if (this.initialized) return; try { // Create core document table await this.d1.exec(` CREATE TABLE IF NOT EXISTS documents ( _id TEXT PRIMARY KEY, _collection TEXT NOT NULL, _database TEXT NOT NULL, _data TEXT NOT NULL, -- JSON document data _searchText TEXT, -- Full-text search index _vector BLOB, -- Vector embeddings _vectorDims INTEGER, -- Vector dimensions _metadata TEXT, -- JSON metadata _version INTEGER DEFAULT 1, _createdAt DATETIME DEFAULT CURRENT_TIMESTAMP, _updatedAt DATETIME DEFAULT CURRENT_TIMESTAMP, _deleted BOOLEAN DEFAULT FALSE, -- Dynamic indexed fields _idx_field_1 TEXT, _idx_field_2 TEXT, _idx_field_3 TEXT, _idx_field_4 TEXT, _idx_field_5 TEXT, _idx_field_6 REAL, _idx_field_7 REAL, _idx_field_8 REAL, _idx_field_9 INTEGER, _idx_field_10 INTEGER ) `); // Create core indexes await this.createCoreIndexes(); // Create metadata tables await this.createMetadataTables(); this.initialized = true; console.log(`Document storage initialized for database: ${this.config.name}`); } catch (error) { throw new DocumentError(`Storage initialization failed: ${error.message}`, 'INIT_ERROR'); } } /** * Insert a single document */ async insertOne(collection, document) { await this.ensureInitialized(); if (!document._id) { document._id = this.generateObjectId(); } // Validate document size const docSize = JSON.stringify(document).length; if (docSize > this.config.maxDocumentSize) { throw new ValidationError(`Document size ${docSize} exceeds maximum ${this.config.maxDocumentSize}`); } try { const now = new Date(); document._createdAt = now; document._updatedAt = now; document._version = 1; // Prepare search text and metadata const searchText = this.extractSearchText(document); const metadata = this.extractMetadata(document); // Prepare vector data if present let vectorData = null; let vectorDims = null; if (document._vector) { vectorData = new Uint8Array(document._vector.data.buffer); vectorDims = document._vector.data.length; } // Extract dynamic index values const indexValues = this.extractIndexValues(document); // Insert into D1 const result = await this.d1.prepare(` INSERT INTO documents ( _id, _collection, _database, _data, _searchText, _vector, _vectorDims, _metadata, _version, _createdAt, _updatedAt, _deleted, _idx_field_1, _idx_field_2, _idx_field_3, _idx_field_4, _idx_field_5, _idx_field_6, _idx_field_7, _idx_field_8, _idx_field_9, _idx_field_10 ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `).bind(document._id, collection, this.config.name, JSON.stringify(document), searchText, vectorData, vectorDims, JSON.stringify(metadata), document._version, now.toISOString(), now.toISOString(), false, ...indexValues).run(); if (!result.success) { throw new DocumentError('Failed to insert document', 'INSERT_ERROR'); } // Cache the document this.cacheDocument(this.getDocumentKey(collection, document._id), document); // Store large documents in R2 if available if (docSize > 1024 * 1024 && this.r2Bucket) { // > 1MB await this.storeInR2(collection, document._id, document); } return { acknowledged: true, insertedId: document._id }; } catch (error) { if (error.message?.includes('UNIQUE constraint failed')) { throw new DuplicateKeyError(`Document with _id '${document._id}' already exists`); } throw new DocumentError(`Insert failed: ${error.message}`, 'INSERT_ERROR'); } } /** * Insert multiple documents */ async insertMany(collection, documents) { await this.ensureInitialized(); const insertedIds = []; const batchSize = this.config.bulkWriteBatchSize; try { // Process in batches for (let i = 0; i < documents.length; i += batchSize) { const batch = documents.slice(i, i + batchSize); // Prepare batch for transaction const statements = batch.map(doc => { if (!doc._id) { doc._id = this.generateObjectId(); } const now = new Date(); doc._createdAt = now; doc._updatedAt = now; doc._version = 1; const searchText = this.extractSearchText(doc); const metadata = this.extractMetadata(doc); const indexValues = this.extractIndexValues(doc); let vectorData = null; let vectorDims = null; if (doc._vector) { vectorData = new Uint8Array(doc._vector.data.buffer); vectorDims = doc._vector.data.length; } return { stmt: this.d1.prepare(` INSERT INTO documents ( _id, _collection, _database, _data, _searchText, _vector, _vectorDims, _metadata, _version, _createdAt, _updatedAt, _deleted, _idx_field_1, _idx_field_2, _idx_field_3, _idx_field_4, _idx_field_5, _idx_field_6, _idx_field_7, _idx_field_8, _idx_field_9, _idx_field_10 ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `), bind: [ doc._id, collection, this.config.name, JSON.stringify(doc), searchText, vectorData, vectorDims, JSON.stringify(metadata), doc._version, now.toISOString(), now.toISOString(), false, ...indexValues ] }; }); // Execute batch transaction const results = await this.d1.batch(statements.map(s => s.stmt.bind(...s.bind))); // Collect successful insertions for (let j = 0; j < results.length; j++) { if (results[j].success) { insertedIds.push(batch[j]._id); // Cache the document this.cacheDocument(this.getDocumentKey(collection, batch[j]._id), batch[j]); } } } return { acknowledged: true, insertedCount: insertedIds.length, insertedIds }; } catch (error) { throw new DocumentError(`Bulk insert failed: ${error.message}`, 'BULK_INSERT_ERROR'); } } /** * Find documents by filter */ async find(collection, filter, options = {}) { await this.ensureInitialized(); try { // Check query cache first const cacheKey = this.getQueryCacheKey('find', collection, filter, options); if (this.config.enableQueryCache && this.queryCache.has(cacheKey)) { const cached = this.queryCache.get(cacheKey); if (Date.now() - cached.timestamp < this.config.queryCacheTTL * 1000) { return cached.result; } this.queryCache.delete(cacheKey); } // Build SQL query from MongoDB filter const { sql, params } = this.buildQuery(collection, filter, options); // Execute query const result = await this.d1.prepare(sql).bind(...params).all(); // Parse results const documents = result.results?.map((row) => { const doc = JSON.parse(row._data); // Add vector data if present if (row._vector && row._vectorDims) { doc._vector = { id: doc._id, data: new Float32Array(new Uint8Array(row._vector).buffer), metadata: doc._vector?.metadata || {} }; } return doc; }) || []; // Cache the result if (this.config.enableQueryCache) { this.queryCache.set(cacheKey, { result: documents, timestamp: Date.now() }); } return documents; } catch (error) { throw new DocumentError(`Find operation failed: ${error.message}`, 'FIND_ERROR'); } } /** * Find a single document */ async findOne(collection, filter, options = {}) { const results = await this.find(collection, filter, { ...options, limit: 1 }); return results.length > 0 ? results[0] : null; } /** * Update a single document */ async updateOne(collection, filter, update, options = {}) { await this.ensureInitialized(); try { // Find the document first const existingDoc = await this.findOne(collection, filter); if (!existingDoc && !options.upsert) { return { acknowledged: true, matchedCount: 0, modifiedCount: 0, upsertedCount: 0 }; } let doc = existingDoc; let upsertedId; if (!doc && options.upsert) { // Create new document for upsert doc = { ...filter, _id: this.generateObjectId() }; upsertedId = doc._id; } // Apply updates const updatedDoc = this.applyUpdateOperators(doc, update); updatedDoc._updatedAt = new Date(); updatedDoc._version = (updatedDoc._version || 0) + 1; // Prepare for update const searchText = this.extractSearchText(updatedDoc); const metadata = this.extractMetadata(updatedDoc); const indexValues = this.extractIndexValues(updatedDoc); let vectorData = null; let vectorDims = null; if (updatedDoc._vector) { vectorData = new Uint8Array(updatedDoc._vector.data.buffer); vectorDims = updatedDoc._vector.data.length; } let result; if (existingDoc) { // Update existing document result = await this.d1.prepare(` UPDATE documents SET _data = ?, _searchText = ?, _vector = ?, _vectorDims = ?, _metadata = ?, _version = ?, _updatedAt = ?, _idx_field_1 = ?, _idx_field_2 = ?, _idx_field_3 = ?, _idx_field_4 = ?, _idx_field_5 = ?, _idx_field_6 = ?, _idx_field_7 = ?, _idx_field_8 = ?, _idx_field_9 = ?, _idx_field_10 = ? WHERE _id = ? AND _collection = ? AND _database = ? AND _deleted = FALSE `).bind(JSON.stringify(updatedDoc), searchText, vectorData, vectorDims, JSON.stringify(metadata), updatedDoc._version, updatedDoc._updatedAt.toISOString(), ...indexValues, updatedDoc._id, collection, this.config.name).run(); } else { // Insert new document (upsert) result = await this.insertOne(collection, updatedDoc); } // Update cache this.cacheDocument(this.getDocumentKey(collection, updatedDoc._id), updatedDoc); return { acknowledged: true, matchedCount: existingDoc ? 1 : 0, modifiedCount: existingDoc ? 1 : 0, upsertedId, upsertedCount: upsertedId ? 1 : 0 }; } catch (error) { throw new DocumentError(`Update operation failed: ${error.message}`, 'UPDATE_ERROR'); } } /** * Delete documents */ async deleteMany(collection, filter) { await this.ensureInitialized(); try { // Build query to find matching documents const { sql, params } = this.buildQuery(collection, filter, {}); // Soft delete (mark as deleted) const deleteSql = sql.replace('SELECT _data FROM documents', 'UPDATE documents SET _deleted = TRUE, _updatedAt = ?'); const now = new Date().toISOString(); const result = await this.d1.prepare(deleteSql).bind(now, ...params).run(); // Clear from cache if (result.changes > 0) { this.clearQueryCache(); } return { acknowledged: true, deletedCount: result.changes || 0 }; } catch (error) { throw new DocumentError(`Delete operation failed: ${error.message}`, 'DELETE_ERROR'); } } /** * Delete a single document */ async deleteOne(collection, filter) { const doc = await this.findOne(collection, filter); if (!doc) { return { acknowledged: true, deletedCount: 0 }; } const result = await this.deleteMany(collection, { _id: doc._id }); return { acknowledged: true, deletedCount: Math.min(result.deletedCount, 1) }; } /** * Count documents */ async countDocuments(collection, filter = {}) { await this.ensureInitialized(); try { const { sql, params } = this.buildQuery(collection, filter, {}); const countSql = sql.replace('SELECT _data FROM documents', 'SELECT COUNT(*) as count FROM documents'); const result = await this.d1.prepare(countSql).bind(...params).first(); return result?.count || 0; } catch (error) { throw new DocumentError(`Count operation failed: ${error.message}`, 'COUNT_ERROR'); } } // =============================== // Private Methods // =============================== async ensureInitialized() { if (!this.initialized) { await this.initialize(); } } generateObjectId() { // Simple ObjectId generation - in production use a proper library const timestamp = Math.floor(Date.now() / 1000).toString(16); const random = Math.random().toString(16).substring(2, 10); const counter = Math.random().toString(16).substring(2, 8); return timestamp + random + counter; } extractSearchText(document) { // Extract searchable text from document const searchableFields = ['title', 'content', 'description', 'text', 'name']; const texts = []; const extractText = (obj, depth = 0) => { if (depth > 3) return; // Prevent infinite recursion for (const [key, value] of Object.entries(obj)) { if (typeof value === 'string' && (searchableFields.includes(key) || key.includes('text'))) { texts.push(value); } else if (typeof value === 'object' && value !== null && !Array.isArray(value)) { extractText(value, depth + 1); } } }; extractText(document); return texts.join(' ').substring(0, 10000); // Limit to 10KB } extractMetadata(document) { return { tags: document.tags || [], category: document.category, source: document.source, confidence: document.confidence, lastProcessed: new Date(), processingVersion: '1.0.0', customFields: {} }; } extractIndexValues(document) { // Extract up to 10 indexable values from the document const values = new Array(10).fill(null); // Common indexable fields const stringFields = ['status', 'type', 'category', 'userId', 'email']; const numberFields = ['score', 'rating', 'price', 'quantity', 'age']; const integerFields = ['views', 'likes', 'comments', 'shares', 'count']; let idx = 0; // String fields (fields 1-5) for (const field of stringFields) { if (idx >= 5) break; if (document[field] && typeof document[field] === 'string') { values[idx] = document[field]; } idx++; } // Number fields (fields 6-8) idx = 5; for (const field of numberFields) { if (idx >= 8) break; if (document[field] && typeof document[field] === 'number') { values[idx] = document[field]; } idx++; } // Integer fields (fields 9-10) idx = 8; for (const field of integerFields) { if (idx >= 10) break; if (document[field] && Number.isInteger(document[field])) { values[idx] = document[field]; } idx++; } return values; } buildQuery(collection, filter, options) { const params = []; const conditions = []; // Base conditions conditions.push('_collection = ?'); params.push(collection); conditions.push('_database = ?'); params.push(this.config.name); conditions.push('_deleted = FALSE'); // Build filter conditions this.buildFilterConditions(filter, conditions, params); // Build SQL let sql = `SELECT _data, _vector, _vectorDims FROM documents WHERE ${conditions.join(' AND ')}`; // Add sorting if (options.sort) { const sortClauses = Object.entries(options.sort).map(([field, direction]) => { return `JSON_EXTRACT(_data, '$.${field}') ${direction === 1 ? 'ASC' : 'DESC'}`; }); sql += ` ORDER BY ${sortClauses.join(', ')}`; } // Add limit and offset if (options.limit) { sql += ` LIMIT ${options.limit}`; } if (options.skip) { sql += ` OFFSET ${options.skip}`; } return { sql, params }; } buildFilterConditions(filter, conditions, params) { for (const [key, value] of Object.entries(filter)) { if (key.startsWith('$')) { // Handle MongoDB operators this.handleOperator(key, value, conditions, params); } else { // Simple equality conditions.push(`JSON_EXTRACT(_data, '$.${key}') = ?`); params.push(value); } } } handleOperator(operator, value, conditions, params) { switch (operator) { case '$and': const andConditions = []; for (const subFilter of value) { const subConds = []; this.buildFilterConditions(subFilter, subConds, params); andConditions.push(`(${subConds.join(' AND ')})`); } conditions.push(`(${andConditions.join(' AND ')})`); break; case '$or': const orConditions = []; for (const subFilter of value) { const subConds = []; this.buildFilterConditions(subFilter, subConds, params); orConditions.push(`(${subConds.join(' AND ')})`); } conditions.push(`(${orConditions.join(' OR ')})`); break; // Add more operators as needed } } applyUpdateOperators(document, update) { const result = { ...document }; if (this.isUpdateOperators(update)) { // Handle update operators if (update.$set) { Object.assign(result, update.$set); } if (update.$unset) { for (const field of Object.keys(update.$unset)) { delete result[field]; } } if (update.$inc) { for (const [field, increment] of Object.entries(update.$inc)) { result[field] = (result[field] || 0) + increment; } } // Add more operators as needed } else { // Replace document Object.assign(result, update); } return result; } isUpdateOperators(update) { return Object.keys(update).some(key => key.startsWith('$')); } async createCoreIndexes() { const indexes = [ 'CREATE INDEX IF NOT EXISTS idx_collection ON documents(_collection)', 'CREATE INDEX IF NOT EXISTS idx_database ON documents(_database)', 'CREATE INDEX IF NOT EXISTS idx_created_at ON documents(_createdAt)', 'CREATE INDEX IF NOT EXISTS idx_updated_at ON documents(_updatedAt)', 'CREATE INDEX IF NOT EXISTS idx_deleted ON documents(_deleted)', 'CREATE INDEX IF NOT EXISTS idx_collection_db ON documents(_collection, _database)', // Dynamic field indexes 'CREATE INDEX IF NOT EXISTS idx_field_1 ON documents(_idx_field_1)', 'CREATE INDEX IF NOT EXISTS idx_field_2 ON documents(_idx_field_2)', 'CREATE INDEX IF NOT EXISTS idx_field_3 ON documents(_idx_field_3)', 'CREATE INDEX IF NOT EXISTS idx_field_4 ON documents(_idx_field_4)', 'CREATE INDEX IF NOT EXISTS idx_field_5 ON documents(_idx_field_5)', 'CREATE INDEX IF NOT EXISTS idx_field_6 ON documents(_idx_field_6)', 'CREATE INDEX IF NOT EXISTS idx_field_7 ON documents(_idx_field_7)', 'CREATE INDEX IF NOT EXISTS idx_field_8 ON documents(_idx_field_8)', 'CREATE INDEX IF NOT EXISTS idx_field_9 ON documents(_idx_field_9)', 'CREATE INDEX IF NOT EXISTS idx_field_10 ON documents(_idx_field_10)' ]; for (const indexSql of indexes) { await this.d1.exec(indexSql); } } async createMetadataTables() { // Collection metadata await this.d1.exec(` CREATE TABLE IF NOT EXISTS collection_metadata ( database_name TEXT, collection_name TEXT, document_count INTEGER DEFAULT 0, total_size INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, collection_name) ) `); // Index mappings await this.d1.exec(` CREATE TABLE IF NOT EXISTS index_mappings ( database_name TEXT, collection_name TEXT, field_path TEXT, index_column TEXT, data_type TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (database_name, collection_name, field_path) ) `); } cacheDocument(key, document) { if (this.documentCache.size >= 10000) { // Max cache size // Remove oldest entries (simple LRU simulation) const firstKey = this.documentCache.keys().next().value; this.documentCache.delete(firstKey); } this.documentCache.set(key, document); } getDocumentKey(collection, id) { return `${this.config.name}:${collection}:${id}`; } getQueryCacheKey(operation, collection, filter, options) { return `${operation}:${collection}:${JSON.stringify(filter)}:${JSON.stringify(options)}`; } clearQueryCache() { this.queryCache.clear(); } async storeInR2(collection, id, document) { if (!this.r2Bucket) return; try { const key = `documents/${this.config.name}/${collection}/${id}.json`; await this.r2Bucket.put(key, JSON.stringify(document)); } catch (error) { console.warn('Failed to store document in R2:', error); } } } //# sourceMappingURL=document-storage.js.map