mcp-context-engineering
Version:
The intelligent context optimization system for AI coding assistants. Built with Cole's PRP methodology, Context Portal knowledge graphs, and production-ready MongoDB architecture.
332 lines (331 loc) • 11.8 kB
JavaScript
import { ObjectId } from 'mongodb';
import { z } from 'zod';
/**
* Vector Embeddings Schema - MongoDB Atlas Vector Search Optimized
*
* Following best practices for:
* - MongoDB Atlas Vector Search with hybrid search capabilities
* - Voyage AI embeddings for English methodology descriptions
* - Semantic search across context patterns and knowledge graphs
* - Cross-workspace knowledge sharing and discovery
*/
// Individual embedding document
export const EmbeddingDocumentSchema = z.object({
_id: z.instanceof(ObjectId).optional(),
// Source identification
workspace_id: z.string(),
project_id: z.string(),
item_type: z.enum([
'prp_methodology',
'context_pattern',
'decision',
'progress_entry',
'system_pattern',
'query',
'product_context',
'active_context'
]),
item_id: z.string(),
// Vector data (1024 dimensions for Voyage AI voyage-3-large)
embedding: z.array(z.number()).length(1024),
// Text content that was embedded
text_content: z.string(),
text_type: z.enum(['methodology_description', 'context_description', 'query_text', 'combined_text']),
// Metadata for hybrid search
metadata: z.object({
// Core attributes
title: z.string().optional(),
description: z.string().optional(),
tags: z.array(z.string()).default([]),
category: z.string().optional(),
// Technical metadata
tech_stack: z.array(z.string()).default([]),
complexity: z.enum(['low', 'medium', 'high']).optional(),
pattern_type: z.string().optional(),
// Quality and effectiveness
effectiveness_score: z.number().min(0).max(10).optional(),
usage_count: z.number().default(0),
success_rate: z.number().min(0).max(1).optional(),
// Temporal metadata
created_at: z.date(),
last_accessed: z.date().optional(),
last_updated: z.date(),
// Workspace isolation
workspace_type: z.enum(['private', 'team', 'public']).default('private'),
access_level: z.enum(['read', 'write', 'admin']).default('read')
}),
// Cross-reference links
related_items: z.array(z.object({
item_type: z.string(),
item_id: z.string(),
relationship: z.string(),
strength: z.number().min(0).max(1)
})).default([]),
// Embedding generation metadata
embedding_metadata: z.object({
model: z.string().default('voyage-3-large'),
model_version: z.string().optional(),
generation_timestamp: z.date(),
token_count: z.number().optional(),
processing_time_ms: z.number().optional(),
confidence_score: z.number().min(0).max(1).optional()
}),
// Version control
version: z.number().default(1),
superseded_by: z.instanceof(ObjectId).optional(),
supersedes: z.instanceof(ObjectId).optional()
});
// Semantic search query schema
export const SemanticQuerySchema = z.object({
// Query text
query_text: z.string(),
query_embedding: z.array(z.number()).length(1024).optional(),
// Search parameters
k: z.number().min(1).max(100).default(10), // Number of results
min_score: z.number().min(0).max(1).default(0.7), // Minimum similarity score
// Filters for hybrid search
filters: z.object({
workspace_ids: z.array(z.string()).optional(),
project_ids: z.array(z.string()).optional(),
item_types: z.array(z.string()).optional(),
tech_stacks: z.array(z.string()).optional(),
complexity: z.enum(['low', 'medium', 'high']).optional(),
min_effectiveness: z.number().min(0).max(10).optional(),
created_after: z.date().optional(),
tags: z.array(z.string()).optional()
}).optional(),
// Search behavior
include_metadata: z.boolean().default(true),
include_text_content: z.boolean().default(false),
cross_workspace: z.boolean().default(false)
});
// Search result schema
export const SearchResultSchema = z.object({
document: EmbeddingDocumentSchema,
similarity_score: z.number().min(0).max(1),
rank: z.number(),
explanation: z.string().optional()
});
// Batch embedding operation schema
export const BatchEmbeddingSchema = z.object({
_id: z.instanceof(ObjectId).optional(),
batch_id: z.string(),
workspace_id: z.string(),
project_id: z.string(),
// Batch metadata
status: z.enum(['pending', 'processing', 'completed', 'failed']),
total_items: z.number(),
processed_items: z.number().default(0),
failed_items: z.number().default(0),
// Processing details
started_at: z.date().optional(),
completed_at: z.date().optional(),
processing_time_ms: z.number().optional(),
// Items to embed
items: z.array(z.object({
item_id: z.string(),
item_type: z.string(),
text_content: z.string(),
metadata: z.record(z.any()),
status: z.enum(['pending', 'processing', 'completed', 'failed']).default('pending'),
error_message: z.string().optional()
})),
// Configuration
embedding_config: z.object({
model: z.string().default('voyage-3-large'),
batch_size: z.number().default(100),
retry_attempts: z.number().default(3),
timeout_ms: z.number().default(30000)
}),
created_at: z.date(),
updated_at: z.date()
});
// Vector search index configuration
export const VectorIndexConfigSchema = z.object({
index_name: z.string(),
collection_name: z.string(),
// Vector field configuration
vector_field: z.string().default('embedding'),
dimensions: z.number().default(1024),
similarity: z.enum(['cosine', 'euclidean', 'dotProduct']).default('cosine'),
// Index options
index_options: z.object({
type: z.literal('vector'),
numLists: z.number().optional(), // For IVF indexes
filters: z.array(z.string()).optional() // Filterable fields
}),
// Metadata
created_at: z.date(),
status: z.enum(['creating', 'ready', 'failed']),
index_size_mb: z.number().optional(),
document_count: z.number().optional()
});
// Embedding analytics and performance tracking
export const EmbeddingAnalyticsSchema = z.object({
_id: z.instanceof(ObjectId).optional(),
workspace_id: z.string(),
analysis_period: z.object({
start_date: z.date(),
end_date: z.date()
}),
// Usage statistics
usage_stats: z.object({
total_embeddings: z.number(),
new_embeddings: z.number(),
updated_embeddings: z.number(),
search_queries: z.number(),
avg_search_latency_ms: z.number(),
cache_hit_rate: z.number()
}),
// Quality metrics
quality_metrics: z.object({
avg_similarity_score: z.number(),
search_satisfaction_rate: z.number(),
false_positive_rate: z.number(),
coverage_completeness: z.number()
}),
// Performance insights
performance_insights: z.object({
top_search_patterns: z.array(z.string()),
most_effective_embeddings: z.array(z.string()),
optimization_opportunities: z.array(z.string()),
recommended_actions: z.array(z.string())
}),
// Cost tracking
cost_tracking: z.object({
embedding_api_calls: z.number(),
estimated_cost_usd: z.number(),
token_usage: z.number(),
cost_per_search: z.number()
}),
analysis_date: z.date(),
next_analysis_due: z.date()
});
/**
* Vector Search Service Helper Class
* Provides utilities for working with MongoDB Atlas Vector Search
*/
export class VectorSearchHelper {
/**
* Generate MongoDB Atlas Vector Search aggregation pipeline
*/
static createVectorSearchPipeline(query) {
const pipeline = [];
// Vector search stage
if (query.query_embedding) {
pipeline.push({
$vectorSearch: {
index: 'vector_index',
path: 'embedding',
queryVector: query.query_embedding,
numCandidates: query.k * 10, // MongoDB recommendation
limit: query.k,
filter: this.buildFilterExpression(query.filters)
}
});
}
// Add similarity score
pipeline.push({
$addFields: {
similarity_score: { $meta: 'vectorSearchScore' }
}
});
// Filter by minimum score
if (query.min_score > 0) {
pipeline.push({
$match: {
similarity_score: { $gte: query.min_score }
}
});
}
// Project fields based on requirements
const projection = {
similarity_score: 1,
item_type: 1,
item_id: 1,
workspace_id: 1,
project_id: 1
};
if (query.include_metadata) {
projection.metadata = 1;
}
if (query.include_text_content) {
projection.text_content = 1;
}
pipeline.push({ $project: projection });
return pipeline;
}
/**
* Build filter expression for hybrid search
*/
static buildFilterExpression(filters) {
if (!filters)
return {};
const filterExpr = {};
if (filters.workspace_ids?.length) {
filterExpr.workspace_id = { $in: filters.workspace_ids };
}
if (filters.project_ids?.length) {
filterExpr.project_id = { $in: filters.project_ids };
}
if (filters.item_types?.length) {
filterExpr.item_type = { $in: filters.item_types };
}
if (filters.tech_stacks?.length) {
filterExpr['metadata.tech_stack'] = { $in: filters.tech_stacks };
}
if (filters.complexity) {
filterExpr['metadata.complexity'] = filters.complexity;
}
if (filters.min_effectiveness) {
filterExpr['metadata.effectiveness_score'] = { $gte: filters.min_effectiveness };
}
if (filters.created_after) {
filterExpr['metadata.created_at'] = { $gte: filters.created_after };
}
if (filters.tags?.length) {
filterExpr['metadata.tags'] = { $in: filters.tags };
}
return filterExpr;
}
/**
* Create vector search index definition
*/
static createVectorIndexDefinition(config) {
return {
name: config.index_name,
definition: {
fields: [
{
type: 'vector',
path: config.vector_field,
numDimensions: config.dimensions,
similarity: config.similarity
},
// Add filterable fields
...(config.index_options.filters?.map(field => ({
type: 'filter',
path: field
})) || [])
]
}
};
}
/**
* Calculate embedding similarity
*/
static calculateCosineSimilarity(embedding1, embedding2) {
if (embedding1.length !== embedding2.length) {
throw new Error('Embeddings must have the same dimensions');
}
let dotProduct = 0;
let norm1 = 0;
let norm2 = 0;
for (let i = 0; i < embedding1.length; i++) {
dotProduct += embedding1[i] * embedding2[i];
norm1 += embedding1[i] * embedding1[i];
norm2 += embedding2[i] * embedding2[i];
}
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
}