UNPKG

mcp-context-engineering

Version:

The intelligent context optimization system for AI coding assistants. Built with Cole's PRP methodology, Context Portal knowledge graphs, and production-ready MongoDB architecture.

332 lines (331 loc) 11.8 kB
import { ObjectId } from 'mongodb'; import { z } from 'zod'; /** * Vector Embeddings Schema - MongoDB Atlas Vector Search Optimized * * Following best practices for: * - MongoDB Atlas Vector Search with hybrid search capabilities * - Voyage AI embeddings for English methodology descriptions * - Semantic search across context patterns and knowledge graphs * - Cross-workspace knowledge sharing and discovery */ // Individual embedding document export const EmbeddingDocumentSchema = z.object({ _id: z.instanceof(ObjectId).optional(), // Source identification workspace_id: z.string(), project_id: z.string(), item_type: z.enum([ 'prp_methodology', 'context_pattern', 'decision', 'progress_entry', 'system_pattern', 'query', 'product_context', 'active_context' ]), item_id: z.string(), // Vector data (1024 dimensions for Voyage AI voyage-3-large) embedding: z.array(z.number()).length(1024), // Text content that was embedded text_content: z.string(), text_type: z.enum(['methodology_description', 'context_description', 'query_text', 'combined_text']), // Metadata for hybrid search metadata: z.object({ // Core attributes title: z.string().optional(), description: z.string().optional(), tags: z.array(z.string()).default([]), category: z.string().optional(), // Technical metadata tech_stack: z.array(z.string()).default([]), complexity: z.enum(['low', 'medium', 'high']).optional(), pattern_type: z.string().optional(), // Quality and effectiveness effectiveness_score: z.number().min(0).max(10).optional(), usage_count: z.number().default(0), success_rate: z.number().min(0).max(1).optional(), // Temporal metadata created_at: z.date(), last_accessed: z.date().optional(), last_updated: z.date(), // Workspace isolation workspace_type: z.enum(['private', 'team', 'public']).default('private'), access_level: z.enum(['read', 'write', 'admin']).default('read') }), // Cross-reference links related_items: z.array(z.object({ item_type: z.string(), item_id: z.string(), relationship: z.string(), strength: z.number().min(0).max(1) })).default([]), // Embedding generation metadata embedding_metadata: z.object({ model: z.string().default('voyage-3-large'), model_version: z.string().optional(), generation_timestamp: z.date(), token_count: z.number().optional(), processing_time_ms: z.number().optional(), confidence_score: z.number().min(0).max(1).optional() }), // Version control version: z.number().default(1), superseded_by: z.instanceof(ObjectId).optional(), supersedes: z.instanceof(ObjectId).optional() }); // Semantic search query schema export const SemanticQuerySchema = z.object({ // Query text query_text: z.string(), query_embedding: z.array(z.number()).length(1024).optional(), // Search parameters k: z.number().min(1).max(100).default(10), // Number of results min_score: z.number().min(0).max(1).default(0.7), // Minimum similarity score // Filters for hybrid search filters: z.object({ workspace_ids: z.array(z.string()).optional(), project_ids: z.array(z.string()).optional(), item_types: z.array(z.string()).optional(), tech_stacks: z.array(z.string()).optional(), complexity: z.enum(['low', 'medium', 'high']).optional(), min_effectiveness: z.number().min(0).max(10).optional(), created_after: z.date().optional(), tags: z.array(z.string()).optional() }).optional(), // Search behavior include_metadata: z.boolean().default(true), include_text_content: z.boolean().default(false), cross_workspace: z.boolean().default(false) }); // Search result schema export const SearchResultSchema = z.object({ document: EmbeddingDocumentSchema, similarity_score: z.number().min(0).max(1), rank: z.number(), explanation: z.string().optional() }); // Batch embedding operation schema export const BatchEmbeddingSchema = z.object({ _id: z.instanceof(ObjectId).optional(), batch_id: z.string(), workspace_id: z.string(), project_id: z.string(), // Batch metadata status: z.enum(['pending', 'processing', 'completed', 'failed']), total_items: z.number(), processed_items: z.number().default(0), failed_items: z.number().default(0), // Processing details started_at: z.date().optional(), completed_at: z.date().optional(), processing_time_ms: z.number().optional(), // Items to embed items: z.array(z.object({ item_id: z.string(), item_type: z.string(), text_content: z.string(), metadata: z.record(z.any()), status: z.enum(['pending', 'processing', 'completed', 'failed']).default('pending'), error_message: z.string().optional() })), // Configuration embedding_config: z.object({ model: z.string().default('voyage-3-large'), batch_size: z.number().default(100), retry_attempts: z.number().default(3), timeout_ms: z.number().default(30000) }), created_at: z.date(), updated_at: z.date() }); // Vector search index configuration export const VectorIndexConfigSchema = z.object({ index_name: z.string(), collection_name: z.string(), // Vector field configuration vector_field: z.string().default('embedding'), dimensions: z.number().default(1024), similarity: z.enum(['cosine', 'euclidean', 'dotProduct']).default('cosine'), // Index options index_options: z.object({ type: z.literal('vector'), numLists: z.number().optional(), // For IVF indexes filters: z.array(z.string()).optional() // Filterable fields }), // Metadata created_at: z.date(), status: z.enum(['creating', 'ready', 'failed']), index_size_mb: z.number().optional(), document_count: z.number().optional() }); // Embedding analytics and performance tracking export const EmbeddingAnalyticsSchema = z.object({ _id: z.instanceof(ObjectId).optional(), workspace_id: z.string(), analysis_period: z.object({ start_date: z.date(), end_date: z.date() }), // Usage statistics usage_stats: z.object({ total_embeddings: z.number(), new_embeddings: z.number(), updated_embeddings: z.number(), search_queries: z.number(), avg_search_latency_ms: z.number(), cache_hit_rate: z.number() }), // Quality metrics quality_metrics: z.object({ avg_similarity_score: z.number(), search_satisfaction_rate: z.number(), false_positive_rate: z.number(), coverage_completeness: z.number() }), // Performance insights performance_insights: z.object({ top_search_patterns: z.array(z.string()), most_effective_embeddings: z.array(z.string()), optimization_opportunities: z.array(z.string()), recommended_actions: z.array(z.string()) }), // Cost tracking cost_tracking: z.object({ embedding_api_calls: z.number(), estimated_cost_usd: z.number(), token_usage: z.number(), cost_per_search: z.number() }), analysis_date: z.date(), next_analysis_due: z.date() }); /** * Vector Search Service Helper Class * Provides utilities for working with MongoDB Atlas Vector Search */ export class VectorSearchHelper { /** * Generate MongoDB Atlas Vector Search aggregation pipeline */ static createVectorSearchPipeline(query) { const pipeline = []; // Vector search stage if (query.query_embedding) { pipeline.push({ $vectorSearch: { index: 'vector_index', path: 'embedding', queryVector: query.query_embedding, numCandidates: query.k * 10, // MongoDB recommendation limit: query.k, filter: this.buildFilterExpression(query.filters) } }); } // Add similarity score pipeline.push({ $addFields: { similarity_score: { $meta: 'vectorSearchScore' } } }); // Filter by minimum score if (query.min_score > 0) { pipeline.push({ $match: { similarity_score: { $gte: query.min_score } } }); } // Project fields based on requirements const projection = { similarity_score: 1, item_type: 1, item_id: 1, workspace_id: 1, project_id: 1 }; if (query.include_metadata) { projection.metadata = 1; } if (query.include_text_content) { projection.text_content = 1; } pipeline.push({ $project: projection }); return pipeline; } /** * Build filter expression for hybrid search */ static buildFilterExpression(filters) { if (!filters) return {}; const filterExpr = {}; if (filters.workspace_ids?.length) { filterExpr.workspace_id = { $in: filters.workspace_ids }; } if (filters.project_ids?.length) { filterExpr.project_id = { $in: filters.project_ids }; } if (filters.item_types?.length) { filterExpr.item_type = { $in: filters.item_types }; } if (filters.tech_stacks?.length) { filterExpr['metadata.tech_stack'] = { $in: filters.tech_stacks }; } if (filters.complexity) { filterExpr['metadata.complexity'] = filters.complexity; } if (filters.min_effectiveness) { filterExpr['metadata.effectiveness_score'] = { $gte: filters.min_effectiveness }; } if (filters.created_after) { filterExpr['metadata.created_at'] = { $gte: filters.created_after }; } if (filters.tags?.length) { filterExpr['metadata.tags'] = { $in: filters.tags }; } return filterExpr; } /** * Create vector search index definition */ static createVectorIndexDefinition(config) { return { name: config.index_name, definition: { fields: [ { type: 'vector', path: config.vector_field, numDimensions: config.dimensions, similarity: config.similarity }, // Add filterable fields ...(config.index_options.filters?.map(field => ({ type: 'filter', path: field })) || []) ] } }; } /** * Calculate embedding similarity */ static calculateCosineSimilarity(embedding1, embedding2) { if (embedding1.length !== embedding2.length) { throw new Error('Embeddings must have the same dimensions'); } let dotProduct = 0; let norm1 = 0; let norm2 = 0; for (let i = 0; i < embedding1.length; i++) { dotProduct += embedding1[i] * embedding2[i]; norm1 += embedding1[i] * embedding1[i]; norm2 += embedding2[i] * embedding2[i]; } return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2)); } }