UNPKG

@escher-dbai/rag-module

Version:

Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.

481 lines (418 loc) 16.2 kB
const path = require('path'); const fs = require('fs-extra'); /** * Operation Context Service * Provides service-operation mapping and configuration lookup from ServiceRegistry * NEW: Now uses RAG-first discovery for intelligent resource detection */ class OperationContextService { constructor(basePath, ragModule = null) { this.basePath = basePath; this.ragModule = ragModule; // NEW: RAG module for resource discovery this.serviceRegistry = null; this.initialized = false; // Operation keyword mappings to standardize user input this.operationMappings = { // EC2 operations 'start': ['start_instance', 'boot', 'power_on', 'turn_on'], 'stop': ['stop_instance', 'shutdown', 'power_off', 'turn_off'], 'restart': ['reboot_instance', 'reboot', 'bounce'], 'terminate': ['terminate_instance', 'destroy', 'kill'], 'modify': ['modify_instance', 'change', 'update', 'resize'], // RDS operations 'start_db': ['start_db_instance', 'start_database', 'boot_db'], 'stop_db': ['stop_db_instance', 'stop_database', 'shutdown_db'], 'restart_db': ['restart_db_instance', 'reboot_db', 'bounce_db'], 'delete_db': ['delete_db_instance', 'destroy_db', 'remove_db'], 'modify_db': ['modify_db_instance', 'change_db', 'resize_db'], 'snapshot': ['create_db_snapshot', 'backup_db', 'create_backup'], 'restore': ['restore_db_from_snapshot', 'restore_backup'], // S3 operations 'create_bucket': ['create_bucket', 'make_bucket', 'new_bucket'], 'delete_bucket': ['delete_bucket', 'remove_bucket', 'destroy_bucket'], 'modify_bucket': ['modify_bucket_policy', 'change_bucket', 'update_bucket'] }; // Service detection patterns - ORDER MATTERS! More specific patterns first this.servicePatterns = { 'aws_rds': { keywords: ['rds', 'database', 'db', 'postgres', 'mysql', 'oracle', 'sql'], id_patterns: [/^db-[A-Z0-9]+$/] }, 'aws_s3': { keywords: ['s3', 'bucket', 'storage', 'object'], id_patterns: [/^bucket-[A-Z0-9]+$/] }, 'aws_ec2': { keywords: ['ec2', 'instance', 'virtual machine', 'vm', 'server'], id_patterns: [/^i-[0-9a-f]{8,17}$/] } }; } /** * Initialize the service by loading ServiceRegistry */ async initialize() { if (this.initialized) return; try { // ServiceRegistry.json is in the main project src directory, not the demo folder const projectRoot = path.dirname(path.dirname(__dirname)); // Go up from src/services to project root const registryPath = path.join(projectRoot, 'src', 'config', 'ServiceRegistry.json'); if (!await fs.pathExists(registryPath)) { throw new Error(`ServiceRegistry not found at: ${registryPath}`); } this.serviceRegistry = await fs.readJSON(registryPath); this.initialized = true; console.log('✅ OperationContextService initialized with ServiceRegistry'); } catch (error) { throw new Error(`Failed to initialize OperationContextService: ${error.message}`); } } /** * Parse user request to identify service, operation, and resource * NEW: Uses RAG-first discovery instead of brittle keyword matching * @param {string} userRequest - Natural language request * @returns {Promise<ParsedOperation>} */ async parseOperation(userRequest) { this._ensureInitialized(); const request = userRequest.toLowerCase().trim(); // Step 1: Extract resource name from request (keep existing logic for now) const resourceName = this._extractResourceName(request); // console.log(`📝 Extracted resource name: "${resourceName}"`); let service = 'unknown'; let operation = 'unknown'; let foundResource = null; let parseMethod = 'unknown'; // Step 2: NEW - Try RAG-first resource discovery if (this.ragModule && resourceName !== 'unknown') { try { foundResource = await this.ragModule.findResourceByName(resourceName); if (foundResource && foundResource.service) { service = `aws_${foundResource.service}`; parseMethod = 'rag_discovery'; console.log(`✅ Found: ${foundResource.service.toUpperCase()} resource`); // Step 3: Use semantic operation classification operation = await this.ragModule.classifyOperation(userRequest, foundResource.service); } } catch (error) { // console.log(`⚠️ RAG discovery failed: ${error.message}`); } } // Step 4: Fallback to old keyword-based system if RAG fails if (service === 'unknown' || operation === 'unknown') { service = this._detectService(request, resourceName); operation = this._detectOperation(request, service); parseMethod = 'keyword_fallback'; } // Step 5: Get operation configuration const operationConfig = this.getOperationConfig(service, operation); const result = { original_request: userRequest, service: service, operation: operation, resource_name: resourceName, operation_config: operationConfig, parsed_successfully: service !== 'unknown' && operation !== 'unknown', // NEW: Include additional metadata parse_method: parseMethod, found_resource: foundResource, confidence_score: foundResource ? foundResource.score : 0.5 }; // console.log(`🎯 Parse Result: ${service}.${operation} via ${parseMethod}`); return result; } /** * Get operation configuration from ServiceRegistry * @param {string} service - Service name (e.g., 'aws_ec2') * @param {string} operation - Operation name (e.g., 'stop_instance') * @returns {Object} */ getOperationConfig(service, operation) { this._ensureInitialized(); if (!this.serviceRegistry[service]) { return null; } const operationConfig = this.serviceRegistry[service].operations[operation]; if (!operationConfig) { return null; } return { description: operationConfig.description, required_params: operationConfig.required_params || [], optional_params: operationConfig.optional_params || [], validation_rules: operationConfig.validation_rules || {}, confidential_data: operationConfig.confidential_data || [], non_confidential_data: operationConfig.non_confidential_data || [] }; } /** * Get all available services * @returns {Array<string>} */ getAvailableServices() { this._ensureInitialized(); return Object.keys(this.serviceRegistry); } /** * Get all operations for a service * @param {string} service - Service name * @returns {Array<string>} */ getServiceOperations(service) { this._ensureInitialized(); if (!this.serviceRegistry[service]) { return []; } return Object.keys(this.serviceRegistry[service].operations); } /** * Get service description * @param {string} service - Service name * @returns {string} */ getServiceDescription(service) { this._ensureInitialized(); if (!this.serviceRegistry[service]) { return 'Unknown service'; } return this.serviceRegistry[service].description; } /** * Check if parameter is required for operation * @param {string} service - Service name * @param {string} operation - Operation name * @param {string} paramName - Parameter name * @returns {boolean} */ isParameterRequired(service, operation, paramName) { const config = this.getOperationConfig(service, operation); if (!config) return false; return config.required_params.includes(paramName); } /** * Check if parameter is optional for operation * @param {string} service - Service name * @param {string} operation - Operation name * @param {string} paramName - Parameter name * @returns {boolean} */ isParameterOptional(service, operation, paramName) { const config = this.getOperationConfig(service, operation); if (!config) return false; return config.optional_params.includes(paramName); } /** * Check if data field is confidential * @param {string} service - Service name * @param {string} operation - Operation name * @param {string} fieldName - Data field name * @returns {boolean} */ isDataConfidential(service, operation, fieldName) { const config = this.getOperationConfig(service, operation); if (!config) return true; // Default to confidential for safety return config.confidential_data.includes(fieldName); } /** * Check if data field is non-confidential (safe to send) * @param {string} service - Service name * @param {string} operation - Operation name * @param {string} fieldName - Data field name * @returns {boolean} */ isDataNonConfidential(service, operation, fieldName) { const config = this.getOperationConfig(service, operation); if (!config) return false; // Default to confidential for safety return config.non_confidential_data.includes(fieldName); } /** * Validate parameter value against rules * @param {string} service - Service name * @param {string} operation - Operation name * @param {string} paramName - Parameter name * @param {any} value - Parameter value * @returns {Object} Validation result */ validateParameter(service, operation, paramName, value) { const config = this.getOperationConfig(service, operation); if (!config || !config.validation_rules[paramName]) { return { valid: true }; } const rule = config.validation_rules[paramName]; // Type validation if (rule.type && typeof value !== rule.type) { return { valid: false, error: `Parameter ${paramName} must be of type ${rule.type}` }; } // Pattern validation if (rule.pattern && typeof value === 'string') { const pattern = new RegExp(rule.pattern); if (!pattern.test(value)) { return { valid: false, error: `Parameter ${paramName} does not match required pattern: ${rule.pattern}` }; } } // Enum validation if (rule.enum && !rule.enum.includes(value)) { return { valid: false, error: `Parameter ${paramName} must be one of: ${rule.enum.join(', ')}` }; } return { valid: true }; } // Private methods _ensureInitialized() { if (!this.initialized) { throw new Error('OperationContextService must be initialized before use'); } } /** * Extract resource name from user request */ _extractResourceName(request) { // Look for common patterns - more specific patterns first const patterns = [ // "stop my pg-instance-main1" - look after "my" /(?:my\s+)([a-zA-Z0-9\-_]+(?:\-[a-zA-Z0-9]+)*)/, // "stop instance pg-instance-main1" /instance\s+([a-zA-Z0-9\-_]+)/, // "stop database pg-instance-main1" /database\s+([a-zA-Z0-9\-_]+)/, // "stop bucket my-data-bucket" /bucket\s+([a-zA-Z0-9\-_]+)/, // NEW: "bring up escher-ec2", "restart escher-stream-ec2" /(?:up|down)\s+([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+)*)/, // Generic pattern - find resource-like names (avoid action words) /(?:stop|start|restart|delete|modify|create|bring|power|boot|launch)\s+(?:my\s+)?([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+)+)/, // NEW: Simple hyphenated names at end of request /(?:stop|start|restart|delete|modify|create|bring|power|boot|launch|up|down)\s+([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+)*)/, // Last resort - any hyphenated word that looks like a resource name /\b([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+){1,})\b/ ]; for (const pattern of patterns) { const match = request.match(pattern); if (match && match[1] && match[1].length > 2) { // Filter out common words that aren't resource names const commonWords = ['the', 'my', 'our', 'this', 'that', 'instance', 'database', 'bucket', 'server', 'stop', 'start', 'restart']; if (!commonWords.includes(match[1].toLowerCase())) { return match[1]; } } } return 'unknown'; } /** * Detect service from request and resource name */ _detectService(request, resourceName) { // Check resource name patterns FIRST for highest specificity if (/\b(pg-|mysql-|db-|rds-)\b/i.test(resourceName)) { return 'aws_rds'; } if (/\b(bucket-|s3-|storage-)\b/i.test(resourceName)) { return 'aws_s3'; } // Then check for explicit service keywords for (const [service, config] of Object.entries(this.servicePatterns)) { // Check keywords for (const keyword of config.keywords) { if (request.includes(keyword)) { return service; } } // Check ID patterns for (const pattern of config.id_patterns) { if (pattern.test(resourceName)) { return service; } } } // Enhanced detection based on resource name patterns and context // Database-related keywords and resource name patterns - CHECK FIRST for specificity if (/\b(db|database|postgres|mysql|oracle|sql|rds)\b/i.test(request) || /\b(pg-|mysql-|db-|rds-)\b/i.test(resourceName)) { return 'aws_rds'; } // S3/Storage keywords and patterns if (/\b(bucket|storage|s3)\b/i.test(request) || /\b(bucket-|s3-|storage-)\b/i.test(resourceName)) { return 'aws_s3'; } // EC2/Instance keywords - check last to avoid false matches // Also exclude database-like resource names from EC2 detection if ((/\b(instance|server|vm|ec2)\b/i.test(request) || /\b(i-[0-9a-f]+|web-|app-|server-)\b/i.test(resourceName)) && !/\b(pg-|mysql-|db-|rds-)\b/i.test(resourceName)) { return 'aws_ec2'; } return 'unknown'; } /** * Detect operation from request */ _detectOperation(request, service) { // Check each operation mapping for (const [standardOp, variations] of Object.entries(this.operationMappings)) { for (const variation of variations) { if (request.includes(variation.replace(/_/g, ' ')) || request.includes(variation)) { // Return the appropriate operation for the detected service return this._mapOperationToService(standardOp, service); } } } // Default operation detection - CHECK MORE SPECIFIC TERMS FIRST if (request.includes('restart') || request.includes('reboot')) { return service === 'aws_rds' ? 'restart_db_instance' : 'reboot_instance'; } if (request.includes('start')) { return service === 'aws_rds' ? 'start_db_instance' : 'start_instance'; } if (request.includes('stop')) { return service === 'aws_rds' ? 'stop_db_instance' : 'stop_instance'; } if (request.includes('delete') || request.includes('remove') || request.includes('terminate')) { if (service === 'aws_rds') return 'delete_db_instance'; if (service === 'aws_s3') return 'delete_bucket'; return 'terminate_instance'; } return 'unknown'; } /** * Map generic operation to service-specific operation */ _mapOperationToService(operation, service) { const mappings = { 'start': { 'aws_ec2': 'start_instance', 'aws_rds': 'start_db_instance' }, 'stop': { 'aws_ec2': 'stop_instance', 'aws_rds': 'stop_db_instance' }, 'restart': { 'aws_ec2': 'reboot_instance', 'aws_rds': 'restart_db_instance' }, 'delete': { 'aws_ec2': 'terminate_instance', 'aws_rds': 'delete_db_instance', 'aws_s3': 'delete_bucket' }, 'modify': { 'aws_ec2': 'modify_instance', 'aws_rds': 'modify_db_instance', 'aws_s3': 'modify_bucket_policy' } }; return mappings[operation] && mappings[operation][service] ? mappings[operation][service] : operation; } } module.exports = OperationContextService;