UNPKG

@andrejs1979/document

Version:

MongoDB-compatible document database for NoSQL

754 lines 29.9 kB
/** * NoSQL - MongoDB-Compatible Query Engine * Advanced query processing with optimization and indexing */ import { DocumentError } from '../types'; /** * Query engine for MongoDB-compatible operations */ export class MongoQueryEngine { d1; indexManager; queryMetrics = []; constructor(d1, indexManager) { this.d1 = d1; this.indexManager = indexManager; } /** * Build SQL query from MongoDB filter */ buildQuery(database, collection, filter, options = {}) { const params = []; const conditions = []; const explanation = []; // Base conditions conditions.push('_collection = ?'); params.push(collection); explanation.push(`Collection filter: ${collection}`); conditions.push('_database = ?'); params.push(database); explanation.push(`Database filter: ${database}`); conditions.push('_deleted = FALSE'); explanation.push('Exclude deleted documents'); // Build filter conditions this.buildFilterConditions(filter, conditions, params, explanation); // Select fields let selectFields = '_id, _data, _vector, _vectorDims, _metadata, _createdAt, _updatedAt'; if (options.projection) { selectFields = this.buildProjection(options.projection); } // Build main query let sql = `SELECT ${selectFields} FROM documents WHERE ${conditions.join(' AND ')}`; // Add sorting if (options.sort) { const { orderClause, sortExplanation } = this.buildOrderBy(options.sort); sql += orderClause; explanation.push(sortExplanation); } // Add limit and offset if (options.limit) { sql += ` LIMIT ${options.limit}`; explanation.push(`Limit: ${options.limit}`); } if (options.skip) { sql += ` OFFSET ${options.skip}`; explanation.push(`Skip: ${options.skip}`); } return { sql, params, explanation: explanation.join('; ') }; } /** * Build filter conditions from MongoDB query */ buildFilterConditions(filter, conditions, params, explanation) { for (const [key, value] of Object.entries(filter)) { if (key.startsWith('$')) { this.handleLogicalOperator(key, value, conditions, params, explanation); } else { this.handleFieldFilter(key, value, conditions, params, explanation); } } } /** * Handle logical operators ($and, $or, $not, etc.) */ handleLogicalOperator(operator, value, conditions, params, explanation) { switch (operator) { case '$and': if (Array.isArray(value)) { const andConditions = []; for (const subFilter of value) { const subConds = []; this.buildFilterConditions(subFilter, subConds, params, explanation); andConditions.push(`(${subConds.join(' AND ')})`); } conditions.push(`(${andConditions.join(' AND ')})`); explanation.push(`$and operator with ${value.length} conditions`); } break; case '$or': if (Array.isArray(value)) { const orConditions = []; for (const subFilter of value) { const subConds = []; this.buildFilterConditions(subFilter, subConds, params, explanation); orConditions.push(`(${subConds.join(' AND ')})`); } conditions.push(`(${orConditions.join(' OR ')})`); explanation.push(`$or operator with ${value.length} conditions`); } break; case '$not': const notConditions = []; this.buildFilterConditions(value, notConditions, params, explanation); conditions.push(`NOT (${notConditions.join(' AND ')})`); explanation.push('$not operator'); break; case '$nor': if (Array.isArray(value)) { const norConditions = []; for (const subFilter of value) { const subConds = []; this.buildFilterConditions(subFilter, subConds, params, explanation); norConditions.push(`(${subConds.join(' AND ')})`); } conditions.push(`NOT (${norConditions.join(' OR ')})`); explanation.push(`$nor operator with ${value.length} conditions`); } break; case '$text': this.handleTextSearch(value, conditions, params, explanation); break; case '$vectorSearch': this.handleVectorSearch(value, conditions, params, explanation); break; case '$hybridSearch': this.handleHybridSearch(value, conditions, params, explanation); break; default: throw new DocumentError(`Unsupported operator: ${operator}`, 'UNSUPPORTED_OPERATOR'); } } /** * Handle field-level filters */ handleFieldFilter(field, value, conditions, params, explanation) { // Check if value contains operators if (typeof value === 'object' && value !== null && !Array.isArray(value)) { for (const [op, opValue] of Object.entries(value)) { this.handleFieldOperator(field, op, opValue, conditions, params, explanation); } } else { // Simple equality const indexColumn = this.getIndexColumn(field); if (indexColumn) { conditions.push(`${indexColumn} = ?`); explanation.push(`Indexed field ${field} via ${indexColumn}`); } else { conditions.push(`JSON_EXTRACT(_data, '$.${field}') = ?`); explanation.push(`JSON extraction for field ${field}`); } params.push(value); } } /** * Handle field operators ($eq, $ne, $gt, etc.) */ handleFieldOperator(field, operator, value, conditions, params, explanation) { const fieldRef = this.getFieldReference(field); switch (operator) { case '$eq': conditions.push(`${fieldRef} = ?`); params.push(value); explanation.push(`Equality on ${field}`); break; case '$ne': conditions.push(`${fieldRef} != ? OR ${fieldRef} IS NULL`); params.push(value); explanation.push(`Not equal on ${field}`); break; case '$gt': conditions.push(`${fieldRef} > ?`); params.push(value); explanation.push(`Greater than on ${field}`); break; case '$gte': conditions.push(`${fieldRef} >= ?`); params.push(value); explanation.push(`Greater than or equal on ${field}`); break; case '$lt': conditions.push(`${fieldRef} < ?`); params.push(value); explanation.push(`Less than on ${field}`); break; case '$lte': conditions.push(`${fieldRef} <= ?`); params.push(value); explanation.push(`Less than or equal on ${field}`); break; case '$in': if (Array.isArray(value)) { const placeholders = value.map(() => '?').join(', '); conditions.push(`${fieldRef} IN (${placeholders})`); params.push(...value); explanation.push(`IN operator on ${field} with ${value.length} values`); } break; case '$nin': if (Array.isArray(value)) { const placeholders = value.map(() => '?').join(', '); conditions.push(`(${fieldRef} NOT IN (${placeholders}) OR ${fieldRef} IS NULL)`); params.push(...value); explanation.push(`NOT IN operator on ${field} with ${value.length} values`); } break; case '$exists': if (value) { conditions.push(`${fieldRef} IS NOT NULL`); explanation.push(`Exists check on ${field}`); } else { conditions.push(`${fieldRef} IS NULL`); explanation.push(`Not exists check on ${field}`); } break; case '$type': const typeCheck = this.getTypeCheck(fieldRef, value); conditions.push(typeCheck); explanation.push(`Type check on ${field} for type ${value}`); break; case '$regex': // SQLite doesn't have full regex support, use LIKE for basic patterns const pattern = this.convertRegexToLike(value); conditions.push(`${fieldRef} LIKE ?`); params.push(pattern); explanation.push(`Regex pattern on ${field}`); break; case '$size': if (typeof value === 'number') { conditions.push(`JSON_ARRAY_LENGTH(${fieldRef}) = ?`); params.push(value); explanation.push(`Array size check on ${field}`); } break; case '$all': if (Array.isArray(value)) { for (const item of value) { conditions.push(`EXISTS (SELECT 1 FROM JSON_EACH(${fieldRef}) WHERE JSON_EACH.value = ?)`); params.push(item); } explanation.push(`Array contains all values on ${field}`); } break; case '$elemMatch': // Complex array element matching - simplified implementation const elemConditions = []; this.buildFilterConditions(value, elemConditions, params, explanation); conditions.push(`EXISTS (SELECT 1 FROM JSON_EACH(${fieldRef}) WHERE ${elemConditions.join(' AND ')})`); explanation.push(`Element match on array ${field}`); break; default: throw new DocumentError(`Unsupported field operator: ${operator}`, 'UNSUPPORTED_OPERATOR'); } } /** * Handle text search */ handleTextSearch(textQuery, conditions, params, explanation) { if (typeof textQuery === 'object' && textQuery.$search) { // Use FTS if available, otherwise fallback to LIKE const searchTerms = textQuery.$search.split(' ').filter(term => term.length > 0); const searchConditions = searchTerms.map(() => '_searchText LIKE ?'); conditions.push(`(${searchConditions.join(' AND ')})`); params.push(...searchTerms.map(term => `%${term}%`)); explanation.push(`Full-text search for: ${textQuery.$search}`); } } /** * Handle vector search */ handleVectorSearch(vectorQuery, conditions, params, explanation) { if (vectorQuery.vector && Array.isArray(vectorQuery.vector)) { // Vector search would typically use a vector index // For now, we'll add a placeholder condition conditions.push('_vector IS NOT NULL'); explanation.push(`Vector search with ${vectorQuery.vector.length} dimensions`); // In a real implementation, this would use vector similarity functions // or delegate to a vector search service like Cloudflare Vectorize } } /** * Handle hybrid search (text + vector) */ handleHybridSearch(hybridQuery, conditions, params, explanation) { const hybridConditions = []; if (hybridQuery.text) { hybridConditions.push('_searchText LIKE ?'); params.push(`%${hybridQuery.text}%`); } if (hybridQuery.vector) { hybridConditions.push('_vector IS NOT NULL'); } if (hybridConditions.length > 0) { conditions.push(`(${hybridConditions.join(' AND ')})`); explanation.push('Hybrid text and vector search'); } } /** * Build ORDER BY clause */ buildOrderBy(sort) { const sortClauses = []; const explanations = []; for (const [field, direction] of Object.entries(sort)) { if (field === '$textScore') { // Text score sorting - would need implementation sortClauses.push('_searchText IS NOT NULL DESC'); explanations.push('Text score'); } else if (field === '$vectorScore') { // Vector score sorting - would need implementation sortClauses.push('_vector IS NOT NULL DESC'); explanations.push('Vector score'); } else { const fieldRef = this.getFieldReference(field); const dir = direction === 1 ? 'ASC' : 'DESC'; sortClauses.push(`${fieldRef} ${dir}`); explanations.push(`${field} ${dir}`); } } const orderClause = sortClauses.length > 0 ? ` ORDER BY ${sortClauses.join(', ')}` : ''; const sortExplanation = `Sort by: ${explanations.join(', ')}`; return { orderClause, sortExplanation }; } /** * Build projection (field selection) */ buildProjection(projection) { const includeFields = []; const excludeFields = []; for (const [field, include] of Object.entries(projection)) { if (include === 1) { includeFields.push(field); } else if (include === 0) { excludeFields.push(field); } } // Always include essential fields const essentialFields = ['_id', '_createdAt', '_updatedAt']; if (includeFields.length > 0) { // Include only specified fields plus essential fields const fields = [...new Set([...includeFields, ...essentialFields])]; return '_id, _data, _vector, _vectorDims, _metadata, _createdAt, _updatedAt'; } else { // Include all except excluded fields return '_id, _data, _vector, _vectorDims, _metadata, _createdAt, _updatedAt'; } } /** * Get field reference (indexed column or JSON extract) */ getFieldReference(field) { const indexColumn = this.getIndexColumn(field); if (indexColumn) { return indexColumn; } return `JSON_EXTRACT(_data, '$.${field}')`; } /** * Get indexed column for field (if available) */ getIndexColumn(field) { // Map common fields to indexed columns const fieldMapping = { 'status': '_idx_field_1', 'type': '_idx_field_2', 'category': '_idx_field_3', 'userId': '_idx_field_4', 'email': '_idx_field_5', 'score': '_idx_field_6', 'rating': '_idx_field_7', 'price': '_idx_field_8', 'views': '_idx_field_9', 'likes': '_idx_field_10' }; return fieldMapping[field] || null; } /** * Get type check SQL */ getTypeCheck(fieldRef, type) { if (typeof type === 'string') { switch (type) { case 'string': return `typeof(${fieldRef}) = 'text'`; case 'number': return `typeof(${fieldRef}) IN ('integer', 'real')`; case 'boolean': return `typeof(${fieldRef}) = 'integer' AND ${fieldRef} IN (0, 1)`; case 'object': return `typeof(${fieldRef}) = 'text' AND JSON_VALID(${fieldRef})`; case 'array': return `typeof(${fieldRef}) = 'text' AND JSON_TYPE(${fieldRef}) = 'array'`; case 'null': return `${fieldRef} IS NULL`; default: return `typeof(${fieldRef}) = '${type}'`; } } // BSON type numbers (simplified) switch (type) { case 1: // double return `typeof(${fieldRef}) = 'real'`; case 2: // string return `typeof(${fieldRef}) = 'text'`; case 3: // object return `typeof(${fieldRef}) = 'text' AND JSON_TYPE(${fieldRef}) = 'object'`; case 4: // array return `typeof(${fieldRef}) = 'text' AND JSON_TYPE(${fieldRef}) = 'array'`; case 8: // boolean return `typeof(${fieldRef}) = 'integer' AND ${fieldRef} IN (0, 1)`; case 10: // null return `${fieldRef} IS NULL`; case 16: // int32 case 18: // int64 return `typeof(${fieldRef}) = 'integer'`; default: return '1=1'; // Always true for unknown types } } /** * Convert regex to SQL LIKE pattern (simplified) */ convertRegexToLike(regex) { let pattern = regex.toString(); // Remove regex delimiters if (pattern.startsWith('/') && pattern.lastIndexOf('/') > 0) { pattern = pattern.slice(1, pattern.lastIndexOf('/')); } // Convert common regex patterns to LIKE patterns pattern = pattern .replace(/\.\*/g, '%') // .* becomes % .replace(/\./g, '_') // . becomes _ .replace(/\^/g, '') // Remove start anchor .replace(/\$/g, '') // Remove end anchor .replace(/\[.*?\]/g, '_') // Character classes become _ .replace(/\+/g, '%') // + becomes % .replace(/\?/g, '_'); // ? becomes _ return `%${pattern}%`; } /** * Execute aggregation pipeline */ async executeAggregation(database, collection, pipeline, options = {}) { const startTime = Date.now(); let currentData = []; try { // Start with all documents from collection const { sql, params } = this.buildQuery(database, collection, {}, {}); const result = await this.d1.prepare(sql).bind(...params).all(); currentData = result.results?.map((row) => { const doc = JSON.parse(row._data); if (row._vector && row._vectorDims) { doc._vector = { id: doc._id, data: new Float32Array(new Uint8Array(row._vector).buffer) }; } return doc; }) || []; // Process each stage of the pipeline for (const stage of pipeline) { currentData = await this.processAggregationStage(currentData, stage); } // Record metrics this.recordQueryMetrics({ queryId: this.generateQueryId(), timestamp: new Date(), queryType: 'aggregate', latency: Date.now() - startTime, documentsExamined: result.results?.length || 0, documentsReturned: currentData.length, indexHits: 0, cacheHit: false }); return currentData; } catch (error) { throw new DocumentError(`Aggregation failed: ${error.message}`, 'AGGREGATION_ERROR'); } } /** * Process single aggregation stage */ async processAggregationStage(data, stage) { const stageKey = Object.keys(stage)[0]; const stageValue = stage[stageKey]; switch (stageKey) { case '$match': return this.stageMatch(data, stageValue); case '$project': return this.stageProject(data, stageValue); case '$group': return this.stageGroup(data, stageValue); case '$sort': return this.stageSort(data, stageValue); case '$limit': return this.stageLimit(data, stageValue); case '$skip': return this.stageSkip(data, stageValue); case '$unwind': return this.stageUnwind(data, stageValue); case '$lookup': return await this.stageLookup(data, stageValue); case '$addFields': return this.stageAddFields(data, stageValue); case '$count': return [{ [stageValue]: data.length }]; case '$sample': return this.stageSample(data, stageValue); default: throw new DocumentError(`Unsupported aggregation stage: ${stageKey}`, 'UNSUPPORTED_STAGE'); } } // Aggregation stage implementations stageMatch(data, filter) { return data.filter(doc => this.matchesFilter(doc, filter)); } stageProject(data, projection) { return data.map(doc => this.applyProjection(doc, projection)); } stageGroup(data, groupSpec) { const groups = new Map(); // Group documents for (const doc of data) { const groupKey = this.evaluateExpression(doc, groupSpec._id); const keyStr = JSON.stringify(groupKey); if (!groups.has(keyStr)) { groups.set(keyStr, []); } groups.get(keyStr).push(doc); } // Apply group operations const results = []; for (const [key, groupDocs] of groups) { const result = { _id: JSON.parse(key) }; for (const [field, operation] of Object.entries(groupSpec)) { if (field === '_id') continue; result[field] = this.applyGroupOperation(groupDocs, operation); } results.push(result); } return results; } stageSort(data, sortSpec) { return [...data].sort((a, b) => { for (const [field, direction] of Object.entries(sortSpec)) { const aVal = this.getFieldValue(a, field); const bVal = this.getFieldValue(b, field); if (aVal < bVal) return direction === 1 ? -1 : 1; if (aVal > bVal) return direction === 1 ? 1 : -1; } return 0; }); } stageLimit(data, limit) { return data.slice(0, limit); } stageSkip(data, skip) { return data.slice(skip); } stageUnwind(data, unwindSpec) { const path = typeof unwindSpec === 'string' ? unwindSpec : unwindSpec.path; const field = path.startsWith('$') ? path.substring(1) : path; const results = []; for (const doc of data) { const arrayValue = this.getFieldValue(doc, field); if (Array.isArray(arrayValue)) { for (const item of arrayValue) { const newDoc = { ...doc }; this.setFieldValue(newDoc, field, item); results.push(newDoc); } } else { results.push(doc); } } return results; } async stageLookup(data, lookupSpec) { // Simplified lookup implementation // In a real implementation, this would query the foreign collection return data.map(doc => ({ ...doc, [lookupSpec.as]: [] // Empty array for now })); } stageAddFields(data, fieldsSpec) { return data.map(doc => { const newDoc = { ...doc }; for (const [field, expression] of Object.entries(fieldsSpec)) { newDoc[field] = this.evaluateExpression(doc, expression); } return newDoc; }); } stageSample(data, sampleSpec) { const size = sampleSpec.size || 1; const shuffled = [...data].sort(() => Math.random() - 0.5); return shuffled.slice(0, size); } // Helper methods matchesFilter(document, filter) { // Simplified filter matching implementation for (const [key, value] of Object.entries(filter)) { const docValue = this.getFieldValue(document, key); if (docValue !== value) { return false; } } return true; } applyProjection(document, projection) { const result = {}; for (const [field, include] of Object.entries(projection)) { if (include) { if (typeof include === 'object') { // Expression result[field] = this.evaluateExpression(document, include); } else { // Simple field inclusion result[field] = this.getFieldValue(document, field); } } } return result; } evaluateExpression(document, expression) { if (typeof expression === 'string' && expression.startsWith('$')) { return this.getFieldValue(document, expression.substring(1)); } return expression; } applyGroupOperation(docs, operation) { if (typeof operation === 'object') { const op = Object.keys(operation)[0]; const value = operation[op]; switch (op) { case '$sum': if (value === 1) return docs.length; return docs.reduce((sum, doc) => sum + this.evaluateExpression(doc, value), 0); case '$avg': const values = docs.map(doc => this.evaluateExpression(doc, value)); return values.reduce((sum, val) => sum + val, 0) / values.length; case '$min': return Math.min(...docs.map(doc => this.evaluateExpression(doc, value))); case '$max': return Math.max(...docs.map(doc => this.evaluateExpression(doc, value))); case '$first': return docs.length > 0 ? this.evaluateExpression(docs[0], value) : null; case '$last': return docs.length > 0 ? this.evaluateExpression(docs[docs.length - 1], value) : null; case '$push': return docs.map(doc => this.evaluateExpression(doc, value)); default: return null; } } return operation; } getFieldValue(document, field) { const path = field.split('.'); let current = document; for (const segment of path) { if (current && typeof current === 'object') { current = current[segment]; } else { return undefined; } } return current; } setFieldValue(document, field, value) { const path = field.split('.'); let current = document; for (let i = 0; i < path.length - 1; i++) { const segment = path[i]; if (!current[segment] || typeof current[segment] !== 'object') { current[segment] = {}; } current = current[segment]; } current[path[path.length - 1]] = value; } recordQueryMetrics(metrics) { this.queryMetrics.push(metrics); // Keep only last 1000 metrics if (this.queryMetrics.length > 1000) { this.queryMetrics = this.queryMetrics.slice(-1000); } } generateQueryId() { return Math.random().toString(36).substring(2, 15); } /** * Get query performance metrics */ getQueryMetrics() { return [...this.queryMetrics]; } /** * Explain query execution plan */ async explainQuery(database, collection, filter, options = {}) { const { sql, params, explanation } = this.buildQuery(database, collection, filter, options); // Get SQLite query plan const explainSql = `EXPLAIN QUERY PLAN ${sql}`; const planResult = await this.d1.prepare(explainSql).bind(...params).all(); return { queryPlanner: { plannerVersion: 1, namespace: `${database}.${collection}`, indexFilterSet: false, parsedQuery: filter, winningPlan: { stage: 'COLLSCAN', executionStats: { totalExamined: 0, totalDocsReturned: 0, executionTimeMillis: 0, indexesUsed: [] } }, rejectedPlans: [] }, executionStats: { stage: 'COLLSCAN', executionStats: { totalExamined: 0, totalDocsReturned: 0, executionTimeMillis: 0, indexesUsed: [] } } }; } } //# sourceMappingURL=query-engine.js.map