@andrejs1979/document
Version:
MongoDB-compatible document database for NoSQL
754 lines • 29.9 kB
JavaScript
/**
* NoSQL - MongoDB-Compatible Query Engine
* Advanced query processing with optimization and indexing
*/
import { DocumentError } from '../types';
/**
* Query engine for MongoDB-compatible operations
*/
export class MongoQueryEngine {
d1;
indexManager;
queryMetrics = [];
constructor(d1, indexManager) {
this.d1 = d1;
this.indexManager = indexManager;
}
/**
* Build SQL query from MongoDB filter
*/
buildQuery(database, collection, filter, options = {}) {
const params = [];
const conditions = [];
const explanation = [];
// Base conditions
conditions.push('_collection = ?');
params.push(collection);
explanation.push(`Collection filter: ${collection}`);
conditions.push('_database = ?');
params.push(database);
explanation.push(`Database filter: ${database}`);
conditions.push('_deleted = FALSE');
explanation.push('Exclude deleted documents');
// Build filter conditions
this.buildFilterConditions(filter, conditions, params, explanation);
// Select fields
let selectFields = '_id, _data, _vector, _vectorDims, _metadata, _createdAt, _updatedAt';
if (options.projection) {
selectFields = this.buildProjection(options.projection);
}
// Build main query
let sql = `SELECT ${selectFields} FROM documents WHERE ${conditions.join(' AND ')}`;
// Add sorting
if (options.sort) {
const { orderClause, sortExplanation } = this.buildOrderBy(options.sort);
sql += orderClause;
explanation.push(sortExplanation);
}
// Add limit and offset
if (options.limit) {
sql += ` LIMIT ${options.limit}`;
explanation.push(`Limit: ${options.limit}`);
}
if (options.skip) {
sql += ` OFFSET ${options.skip}`;
explanation.push(`Skip: ${options.skip}`);
}
return {
sql,
params,
explanation: explanation.join('; ')
};
}
/**
* Build filter conditions from MongoDB query
*/
buildFilterConditions(filter, conditions, params, explanation) {
for (const [key, value] of Object.entries(filter)) {
if (key.startsWith('$')) {
this.handleLogicalOperator(key, value, conditions, params, explanation);
}
else {
this.handleFieldFilter(key, value, conditions, params, explanation);
}
}
}
/**
* Handle logical operators ($and, $or, $not, etc.)
*/
handleLogicalOperator(operator, value, conditions, params, explanation) {
switch (operator) {
case '$and':
if (Array.isArray(value)) {
const andConditions = [];
for (const subFilter of value) {
const subConds = [];
this.buildFilterConditions(subFilter, subConds, params, explanation);
andConditions.push(`(${subConds.join(' AND ')})`);
}
conditions.push(`(${andConditions.join(' AND ')})`);
explanation.push(`$and operator with ${value.length} conditions`);
}
break;
case '$or':
if (Array.isArray(value)) {
const orConditions = [];
for (const subFilter of value) {
const subConds = [];
this.buildFilterConditions(subFilter, subConds, params, explanation);
orConditions.push(`(${subConds.join(' AND ')})`);
}
conditions.push(`(${orConditions.join(' OR ')})`);
explanation.push(`$or operator with ${value.length} conditions`);
}
break;
case '$not':
const notConditions = [];
this.buildFilterConditions(value, notConditions, params, explanation);
conditions.push(`NOT (${notConditions.join(' AND ')})`);
explanation.push('$not operator');
break;
case '$nor':
if (Array.isArray(value)) {
const norConditions = [];
for (const subFilter of value) {
const subConds = [];
this.buildFilterConditions(subFilter, subConds, params, explanation);
norConditions.push(`(${subConds.join(' AND ')})`);
}
conditions.push(`NOT (${norConditions.join(' OR ')})`);
explanation.push(`$nor operator with ${value.length} conditions`);
}
break;
case '$text':
this.handleTextSearch(value, conditions, params, explanation);
break;
case '$vectorSearch':
this.handleVectorSearch(value, conditions, params, explanation);
break;
case '$hybridSearch':
this.handleHybridSearch(value, conditions, params, explanation);
break;
default:
throw new DocumentError(`Unsupported operator: ${operator}`, 'UNSUPPORTED_OPERATOR');
}
}
/**
* Handle field-level filters
*/
handleFieldFilter(field, value, conditions, params, explanation) {
// Check if value contains operators
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
for (const [op, opValue] of Object.entries(value)) {
this.handleFieldOperator(field, op, opValue, conditions, params, explanation);
}
}
else {
// Simple equality
const indexColumn = this.getIndexColumn(field);
if (indexColumn) {
conditions.push(`${indexColumn} = ?`);
explanation.push(`Indexed field ${field} via ${indexColumn}`);
}
else {
conditions.push(`JSON_EXTRACT(_data, '$.${field}') = ?`);
explanation.push(`JSON extraction for field ${field}`);
}
params.push(value);
}
}
/**
* Handle field operators ($eq, $ne, $gt, etc.)
*/
handleFieldOperator(field, operator, value, conditions, params, explanation) {
const fieldRef = this.getFieldReference(field);
switch (operator) {
case '$eq':
conditions.push(`${fieldRef} = ?`);
params.push(value);
explanation.push(`Equality on ${field}`);
break;
case '$ne':
conditions.push(`${fieldRef} != ? OR ${fieldRef} IS NULL`);
params.push(value);
explanation.push(`Not equal on ${field}`);
break;
case '$gt':
conditions.push(`${fieldRef} > ?`);
params.push(value);
explanation.push(`Greater than on ${field}`);
break;
case '$gte':
conditions.push(`${fieldRef} >= ?`);
params.push(value);
explanation.push(`Greater than or equal on ${field}`);
break;
case '$lt':
conditions.push(`${fieldRef} < ?`);
params.push(value);
explanation.push(`Less than on ${field}`);
break;
case '$lte':
conditions.push(`${fieldRef} <= ?`);
params.push(value);
explanation.push(`Less than or equal on ${field}`);
break;
case '$in':
if (Array.isArray(value)) {
const placeholders = value.map(() => '?').join(', ');
conditions.push(`${fieldRef} IN (${placeholders})`);
params.push(...value);
explanation.push(`IN operator on ${field} with ${value.length} values`);
}
break;
case '$nin':
if (Array.isArray(value)) {
const placeholders = value.map(() => '?').join(', ');
conditions.push(`(${fieldRef} NOT IN (${placeholders}) OR ${fieldRef} IS NULL)`);
params.push(...value);
explanation.push(`NOT IN operator on ${field} with ${value.length} values`);
}
break;
case '$exists':
if (value) {
conditions.push(`${fieldRef} IS NOT NULL`);
explanation.push(`Exists check on ${field}`);
}
else {
conditions.push(`${fieldRef} IS NULL`);
explanation.push(`Not exists check on ${field}`);
}
break;
case '$type':
const typeCheck = this.getTypeCheck(fieldRef, value);
conditions.push(typeCheck);
explanation.push(`Type check on ${field} for type ${value}`);
break;
case '$regex':
// SQLite doesn't have full regex support, use LIKE for basic patterns
const pattern = this.convertRegexToLike(value);
conditions.push(`${fieldRef} LIKE ?`);
params.push(pattern);
explanation.push(`Regex pattern on ${field}`);
break;
case '$size':
if (typeof value === 'number') {
conditions.push(`JSON_ARRAY_LENGTH(${fieldRef}) = ?`);
params.push(value);
explanation.push(`Array size check on ${field}`);
}
break;
case '$all':
if (Array.isArray(value)) {
for (const item of value) {
conditions.push(`EXISTS (SELECT 1 FROM JSON_EACH(${fieldRef}) WHERE JSON_EACH.value = ?)`);
params.push(item);
}
explanation.push(`Array contains all values on ${field}`);
}
break;
case '$elemMatch':
// Complex array element matching - simplified implementation
const elemConditions = [];
this.buildFilterConditions(value, elemConditions, params, explanation);
conditions.push(`EXISTS (SELECT 1 FROM JSON_EACH(${fieldRef}) WHERE ${elemConditions.join(' AND ')})`);
explanation.push(`Element match on array ${field}`);
break;
default:
throw new DocumentError(`Unsupported field operator: ${operator}`, 'UNSUPPORTED_OPERATOR');
}
}
/**
* Handle text search
*/
handleTextSearch(textQuery, conditions, params, explanation) {
if (typeof textQuery === 'object' && textQuery.$search) {
// Use FTS if available, otherwise fallback to LIKE
const searchTerms = textQuery.$search.split(' ').filter(term => term.length > 0);
const searchConditions = searchTerms.map(() => '_searchText LIKE ?');
conditions.push(`(${searchConditions.join(' AND ')})`);
params.push(...searchTerms.map(term => `%${term}%`));
explanation.push(`Full-text search for: ${textQuery.$search}`);
}
}
/**
* Handle vector search
*/
handleVectorSearch(vectorQuery, conditions, params, explanation) {
if (vectorQuery.vector && Array.isArray(vectorQuery.vector)) {
// Vector search would typically use a vector index
// For now, we'll add a placeholder condition
conditions.push('_vector IS NOT NULL');
explanation.push(`Vector search with ${vectorQuery.vector.length} dimensions`);
// In a real implementation, this would use vector similarity functions
// or delegate to a vector search service like Cloudflare Vectorize
}
}
/**
* Handle hybrid search (text + vector)
*/
handleHybridSearch(hybridQuery, conditions, params, explanation) {
const hybridConditions = [];
if (hybridQuery.text) {
hybridConditions.push('_searchText LIKE ?');
params.push(`%${hybridQuery.text}%`);
}
if (hybridQuery.vector) {
hybridConditions.push('_vector IS NOT NULL');
}
if (hybridConditions.length > 0) {
conditions.push(`(${hybridConditions.join(' AND ')})`);
explanation.push('Hybrid text and vector search');
}
}
/**
* Build ORDER BY clause
*/
buildOrderBy(sort) {
const sortClauses = [];
const explanations = [];
for (const [field, direction] of Object.entries(sort)) {
if (field === '$textScore') {
// Text score sorting - would need implementation
sortClauses.push('_searchText IS NOT NULL DESC');
explanations.push('Text score');
}
else if (field === '$vectorScore') {
// Vector score sorting - would need implementation
sortClauses.push('_vector IS NOT NULL DESC');
explanations.push('Vector score');
}
else {
const fieldRef = this.getFieldReference(field);
const dir = direction === 1 ? 'ASC' : 'DESC';
sortClauses.push(`${fieldRef} ${dir}`);
explanations.push(`${field} ${dir}`);
}
}
const orderClause = sortClauses.length > 0 ? ` ORDER BY ${sortClauses.join(', ')}` : '';
const sortExplanation = `Sort by: ${explanations.join(', ')}`;
return { orderClause, sortExplanation };
}
/**
* Build projection (field selection)
*/
buildProjection(projection) {
const includeFields = [];
const excludeFields = [];
for (const [field, include] of Object.entries(projection)) {
if (include === 1) {
includeFields.push(field);
}
else if (include === 0) {
excludeFields.push(field);
}
}
// Always include essential fields
const essentialFields = ['_id', '_createdAt', '_updatedAt'];
if (includeFields.length > 0) {
// Include only specified fields plus essential fields
const fields = [...new Set([...includeFields, ...essentialFields])];
return '_id, _data, _vector, _vectorDims, _metadata, _createdAt, _updatedAt';
}
else {
// Include all except excluded fields
return '_id, _data, _vector, _vectorDims, _metadata, _createdAt, _updatedAt';
}
}
/**
* Get field reference (indexed column or JSON extract)
*/
getFieldReference(field) {
const indexColumn = this.getIndexColumn(field);
if (indexColumn) {
return indexColumn;
}
return `JSON_EXTRACT(_data, '$.${field}')`;
}
/**
* Get indexed column for field (if available)
*/
getIndexColumn(field) {
// Map common fields to indexed columns
const fieldMapping = {
'status': '_idx_field_1',
'type': '_idx_field_2',
'category': '_idx_field_3',
'userId': '_idx_field_4',
'email': '_idx_field_5',
'score': '_idx_field_6',
'rating': '_idx_field_7',
'price': '_idx_field_8',
'views': '_idx_field_9',
'likes': '_idx_field_10'
};
return fieldMapping[field] || null;
}
/**
* Get type check SQL
*/
getTypeCheck(fieldRef, type) {
if (typeof type === 'string') {
switch (type) {
case 'string':
return `typeof(${fieldRef}) = 'text'`;
case 'number':
return `typeof(${fieldRef}) IN ('integer', 'real')`;
case 'boolean':
return `typeof(${fieldRef}) = 'integer' AND ${fieldRef} IN (0, 1)`;
case 'object':
return `typeof(${fieldRef}) = 'text' AND JSON_VALID(${fieldRef})`;
case 'array':
return `typeof(${fieldRef}) = 'text' AND JSON_TYPE(${fieldRef}) = 'array'`;
case 'null':
return `${fieldRef} IS NULL`;
default:
return `typeof(${fieldRef}) = '${type}'`;
}
}
// BSON type numbers (simplified)
switch (type) {
case 1: // double
return `typeof(${fieldRef}) = 'real'`;
case 2: // string
return `typeof(${fieldRef}) = 'text'`;
case 3: // object
return `typeof(${fieldRef}) = 'text' AND JSON_TYPE(${fieldRef}) = 'object'`;
case 4: // array
return `typeof(${fieldRef}) = 'text' AND JSON_TYPE(${fieldRef}) = 'array'`;
case 8: // boolean
return `typeof(${fieldRef}) = 'integer' AND ${fieldRef} IN (0, 1)`;
case 10: // null
return `${fieldRef} IS NULL`;
case 16: // int32
case 18: // int64
return `typeof(${fieldRef}) = 'integer'`;
default:
return '1=1'; // Always true for unknown types
}
}
/**
* Convert regex to SQL LIKE pattern (simplified)
*/
convertRegexToLike(regex) {
let pattern = regex.toString();
// Remove regex delimiters
if (pattern.startsWith('/') && pattern.lastIndexOf('/') > 0) {
pattern = pattern.slice(1, pattern.lastIndexOf('/'));
}
// Convert common regex patterns to LIKE patterns
pattern = pattern
.replace(/\.\*/g, '%') // .* becomes %
.replace(/\./g, '_') // . becomes _
.replace(/\^/g, '') // Remove start anchor
.replace(/\$/g, '') // Remove end anchor
.replace(/\[.*?\]/g, '_') // Character classes become _
.replace(/\+/g, '%') // + becomes %
.replace(/\?/g, '_'); // ? becomes _
return `%${pattern}%`;
}
/**
* Execute aggregation pipeline
*/
async executeAggregation(database, collection, pipeline, options = {}) {
const startTime = Date.now();
let currentData = [];
try {
// Start with all documents from collection
const { sql, params } = this.buildQuery(database, collection, {}, {});
const result = await this.d1.prepare(sql).bind(...params).all();
currentData = result.results?.map((row) => {
const doc = JSON.parse(row._data);
if (row._vector && row._vectorDims) {
doc._vector = {
id: doc._id,
data: new Float32Array(new Uint8Array(row._vector).buffer)
};
}
return doc;
}) || [];
// Process each stage of the pipeline
for (const stage of pipeline) {
currentData = await this.processAggregationStage(currentData, stage);
}
// Record metrics
this.recordQueryMetrics({
queryId: this.generateQueryId(),
timestamp: new Date(),
queryType: 'aggregate',
latency: Date.now() - startTime,
documentsExamined: result.results?.length || 0,
documentsReturned: currentData.length,
indexHits: 0,
cacheHit: false
});
return currentData;
}
catch (error) {
throw new DocumentError(`Aggregation failed: ${error.message}`, 'AGGREGATION_ERROR');
}
}
/**
* Process single aggregation stage
*/
async processAggregationStage(data, stage) {
const stageKey = Object.keys(stage)[0];
const stageValue = stage[stageKey];
switch (stageKey) {
case '$match':
return this.stageMatch(data, stageValue);
case '$project':
return this.stageProject(data, stageValue);
case '$group':
return this.stageGroup(data, stageValue);
case '$sort':
return this.stageSort(data, stageValue);
case '$limit':
return this.stageLimit(data, stageValue);
case '$skip':
return this.stageSkip(data, stageValue);
case '$unwind':
return this.stageUnwind(data, stageValue);
case '$lookup':
return await this.stageLookup(data, stageValue);
case '$addFields':
return this.stageAddFields(data, stageValue);
case '$count':
return [{ [stageValue]: data.length }];
case '$sample':
return this.stageSample(data, stageValue);
default:
throw new DocumentError(`Unsupported aggregation stage: ${stageKey}`, 'UNSUPPORTED_STAGE');
}
}
// Aggregation stage implementations
stageMatch(data, filter) {
return data.filter(doc => this.matchesFilter(doc, filter));
}
stageProject(data, projection) {
return data.map(doc => this.applyProjection(doc, projection));
}
stageGroup(data, groupSpec) {
const groups = new Map();
// Group documents
for (const doc of data) {
const groupKey = this.evaluateExpression(doc, groupSpec._id);
const keyStr = JSON.stringify(groupKey);
if (!groups.has(keyStr)) {
groups.set(keyStr, []);
}
groups.get(keyStr).push(doc);
}
// Apply group operations
const results = [];
for (const [key, groupDocs] of groups) {
const result = { _id: JSON.parse(key) };
for (const [field, operation] of Object.entries(groupSpec)) {
if (field === '_id')
continue;
result[field] = this.applyGroupOperation(groupDocs, operation);
}
results.push(result);
}
return results;
}
stageSort(data, sortSpec) {
return [...data].sort((a, b) => {
for (const [field, direction] of Object.entries(sortSpec)) {
const aVal = this.getFieldValue(a, field);
const bVal = this.getFieldValue(b, field);
if (aVal < bVal)
return direction === 1 ? -1 : 1;
if (aVal > bVal)
return direction === 1 ? 1 : -1;
}
return 0;
});
}
stageLimit(data, limit) {
return data.slice(0, limit);
}
stageSkip(data, skip) {
return data.slice(skip);
}
stageUnwind(data, unwindSpec) {
const path = typeof unwindSpec === 'string' ? unwindSpec : unwindSpec.path;
const field = path.startsWith('$') ? path.substring(1) : path;
const results = [];
for (const doc of data) {
const arrayValue = this.getFieldValue(doc, field);
if (Array.isArray(arrayValue)) {
for (const item of arrayValue) {
const newDoc = { ...doc };
this.setFieldValue(newDoc, field, item);
results.push(newDoc);
}
}
else {
results.push(doc);
}
}
return results;
}
async stageLookup(data, lookupSpec) {
// Simplified lookup implementation
// In a real implementation, this would query the foreign collection
return data.map(doc => ({
...doc,
[lookupSpec.as]: [] // Empty array for now
}));
}
stageAddFields(data, fieldsSpec) {
return data.map(doc => {
const newDoc = { ...doc };
for (const [field, expression] of Object.entries(fieldsSpec)) {
newDoc[field] = this.evaluateExpression(doc, expression);
}
return newDoc;
});
}
stageSample(data, sampleSpec) {
const size = sampleSpec.size || 1;
const shuffled = [...data].sort(() => Math.random() - 0.5);
return shuffled.slice(0, size);
}
// Helper methods
matchesFilter(document, filter) {
// Simplified filter matching implementation
for (const [key, value] of Object.entries(filter)) {
const docValue = this.getFieldValue(document, key);
if (docValue !== value) {
return false;
}
}
return true;
}
applyProjection(document, projection) {
const result = {};
for (const [field, include] of Object.entries(projection)) {
if (include) {
if (typeof include === 'object') {
// Expression
result[field] = this.evaluateExpression(document, include);
}
else {
// Simple field inclusion
result[field] = this.getFieldValue(document, field);
}
}
}
return result;
}
evaluateExpression(document, expression) {
if (typeof expression === 'string' && expression.startsWith('$')) {
return this.getFieldValue(document, expression.substring(1));
}
return expression;
}
applyGroupOperation(docs, operation) {
if (typeof operation === 'object') {
const op = Object.keys(operation)[0];
const value = operation[op];
switch (op) {
case '$sum':
if (value === 1)
return docs.length;
return docs.reduce((sum, doc) => sum + this.evaluateExpression(doc, value), 0);
case '$avg':
const values = docs.map(doc => this.evaluateExpression(doc, value));
return values.reduce((sum, val) => sum + val, 0) / values.length;
case '$min':
return Math.min(...docs.map(doc => this.evaluateExpression(doc, value)));
case '$max':
return Math.max(...docs.map(doc => this.evaluateExpression(doc, value)));
case '$first':
return docs.length > 0 ? this.evaluateExpression(docs[0], value) : null;
case '$last':
return docs.length > 0 ? this.evaluateExpression(docs[docs.length - 1], value) : null;
case '$push':
return docs.map(doc => this.evaluateExpression(doc, value));
default:
return null;
}
}
return operation;
}
getFieldValue(document, field) {
const path = field.split('.');
let current = document;
for (const segment of path) {
if (current && typeof current === 'object') {
current = current[segment];
}
else {
return undefined;
}
}
return current;
}
setFieldValue(document, field, value) {
const path = field.split('.');
let current = document;
for (let i = 0; i < path.length - 1; i++) {
const segment = path[i];
if (!current[segment] || typeof current[segment] !== 'object') {
current[segment] = {};
}
current = current[segment];
}
current[path[path.length - 1]] = value;
}
recordQueryMetrics(metrics) {
this.queryMetrics.push(metrics);
// Keep only last 1000 metrics
if (this.queryMetrics.length > 1000) {
this.queryMetrics = this.queryMetrics.slice(-1000);
}
}
generateQueryId() {
return Math.random().toString(36).substring(2, 15);
}
/**
* Get query performance metrics
*/
getQueryMetrics() {
return [...this.queryMetrics];
}
/**
* Explain query execution plan
*/
async explainQuery(database, collection, filter, options = {}) {
const { sql, params, explanation } = this.buildQuery(database, collection, filter, options);
// Get SQLite query plan
const explainSql = `EXPLAIN QUERY PLAN ${sql}`;
const planResult = await this.d1.prepare(explainSql).bind(...params).all();
return {
queryPlanner: {
plannerVersion: 1,
namespace: `${database}.${collection}`,
indexFilterSet: false,
parsedQuery: filter,
winningPlan: {
stage: 'COLLSCAN',
executionStats: {
totalExamined: 0,
totalDocsReturned: 0,
executionTimeMillis: 0,
indexesUsed: []
}
},
rejectedPlans: []
},
executionStats: {
stage: 'COLLSCAN',
executionStats: {
totalExamined: 0,
totalDocsReturned: 0,
executionTimeMillis: 0,
indexesUsed: []
}
}
};
}
}
//# sourceMappingURL=query-engine.js.map