@escher-dbai/rag-module
Version:
Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.
481 lines (418 loc) • 16.2 kB
JavaScript
const path = require('path');
const fs = require('fs-extra');
/**
* Operation Context Service
* Provides service-operation mapping and configuration lookup from ServiceRegistry
* NEW: Now uses RAG-first discovery for intelligent resource detection
*/
class OperationContextService {
constructor(basePath, ragModule = null) {
this.basePath = basePath;
this.ragModule = ragModule; // NEW: RAG module for resource discovery
this.serviceRegistry = null;
this.initialized = false;
// Operation keyword mappings to standardize user input
this.operationMappings = {
// EC2 operations
'start': ['start_instance', 'boot', 'power_on', 'turn_on'],
'stop': ['stop_instance', 'shutdown', 'power_off', 'turn_off'],
'restart': ['reboot_instance', 'reboot', 'bounce'],
'terminate': ['terminate_instance', 'destroy', 'kill'],
'modify': ['modify_instance', 'change', 'update', 'resize'],
// RDS operations
'start_db': ['start_db_instance', 'start_database', 'boot_db'],
'stop_db': ['stop_db_instance', 'stop_database', 'shutdown_db'],
'restart_db': ['restart_db_instance', 'reboot_db', 'bounce_db'],
'delete_db': ['delete_db_instance', 'destroy_db', 'remove_db'],
'modify_db': ['modify_db_instance', 'change_db', 'resize_db'],
'snapshot': ['create_db_snapshot', 'backup_db', 'create_backup'],
'restore': ['restore_db_from_snapshot', 'restore_backup'],
// S3 operations
'create_bucket': ['create_bucket', 'make_bucket', 'new_bucket'],
'delete_bucket': ['delete_bucket', 'remove_bucket', 'destroy_bucket'],
'modify_bucket': ['modify_bucket_policy', 'change_bucket', 'update_bucket']
};
// Service detection patterns - ORDER MATTERS! More specific patterns first
this.servicePatterns = {
'aws_rds': {
keywords: ['rds', 'database', 'db', 'postgres', 'mysql', 'oracle', 'sql'],
id_patterns: [/^db-[A-Z0-9]+$/]
},
'aws_s3': {
keywords: ['s3', 'bucket', 'storage', 'object'],
id_patterns: [/^bucket-[A-Z0-9]+$/]
},
'aws_ec2': {
keywords: ['ec2', 'instance', 'virtual machine', 'vm', 'server'],
id_patterns: [/^i-[0-9a-f]{8,17}$/]
}
};
}
/**
* Initialize the service by loading ServiceRegistry
*/
async initialize() {
if (this.initialized) return;
try {
// ServiceRegistry.json is in the main project src directory, not the demo folder
const projectRoot = path.dirname(path.dirname(__dirname)); // Go up from src/services to project root
const registryPath = path.join(projectRoot, 'src', 'config', 'ServiceRegistry.json');
if (!await fs.pathExists(registryPath)) {
throw new Error(`ServiceRegistry not found at: ${registryPath}`);
}
this.serviceRegistry = await fs.readJSON(registryPath);
this.initialized = true;
console.log('✅ OperationContextService initialized with ServiceRegistry');
} catch (error) {
throw new Error(`Failed to initialize OperationContextService: ${error.message}`);
}
}
/**
* Parse user request to identify service, operation, and resource
* NEW: Uses RAG-first discovery instead of brittle keyword matching
* @param {string} userRequest - Natural language request
* @returns {Promise<ParsedOperation>}
*/
async parseOperation(userRequest) {
this._ensureInitialized();
const request = userRequest.toLowerCase().trim();
// Step 1: Extract resource name from request (keep existing logic for now)
const resourceName = this._extractResourceName(request);
// console.log(`📝 Extracted resource name: "${resourceName}"`);
let service = 'unknown';
let operation = 'unknown';
let foundResource = null;
let parseMethod = 'unknown';
// Step 2: NEW - Try RAG-first resource discovery
if (this.ragModule && resourceName !== 'unknown') {
try {
foundResource = await this.ragModule.findResourceByName(resourceName);
if (foundResource && foundResource.service) {
service = `aws_${foundResource.service}`;
parseMethod = 'rag_discovery';
console.log(`✅ Found: ${foundResource.service.toUpperCase()} resource`);
// Step 3: Use semantic operation classification
operation = await this.ragModule.classifyOperation(userRequest, foundResource.service);
}
} catch (error) {
// console.log(`⚠️ RAG discovery failed: ${error.message}`);
}
}
// Step 4: Fallback to old keyword-based system if RAG fails
if (service === 'unknown' || operation === 'unknown') {
service = this._detectService(request, resourceName);
operation = this._detectOperation(request, service);
parseMethod = 'keyword_fallback';
}
// Step 5: Get operation configuration
const operationConfig = this.getOperationConfig(service, operation);
const result = {
original_request: userRequest,
service: service,
operation: operation,
resource_name: resourceName,
operation_config: operationConfig,
parsed_successfully: service !== 'unknown' && operation !== 'unknown',
// NEW: Include additional metadata
parse_method: parseMethod,
found_resource: foundResource,
confidence_score: foundResource ? foundResource.score : 0.5
};
// console.log(`🎯 Parse Result: ${service}.${operation} via ${parseMethod}`);
return result;
}
/**
* Get operation configuration from ServiceRegistry
* @param {string} service - Service name (e.g., 'aws_ec2')
* @param {string} operation - Operation name (e.g., 'stop_instance')
* @returns {Object}
*/
getOperationConfig(service, operation) {
this._ensureInitialized();
if (!this.serviceRegistry[service]) {
return null;
}
const operationConfig = this.serviceRegistry[service].operations[operation];
if (!operationConfig) {
return null;
}
return {
description: operationConfig.description,
required_params: operationConfig.required_params || [],
optional_params: operationConfig.optional_params || [],
validation_rules: operationConfig.validation_rules || {},
confidential_data: operationConfig.confidential_data || [],
non_confidential_data: operationConfig.non_confidential_data || []
};
}
/**
* Get all available services
* @returns {Array<string>}
*/
getAvailableServices() {
this._ensureInitialized();
return Object.keys(this.serviceRegistry);
}
/**
* Get all operations for a service
* @param {string} service - Service name
* @returns {Array<string>}
*/
getServiceOperations(service) {
this._ensureInitialized();
if (!this.serviceRegistry[service]) {
return [];
}
return Object.keys(this.serviceRegistry[service].operations);
}
/**
* Get service description
* @param {string} service - Service name
* @returns {string}
*/
getServiceDescription(service) {
this._ensureInitialized();
if (!this.serviceRegistry[service]) {
return 'Unknown service';
}
return this.serviceRegistry[service].description;
}
/**
* Check if parameter is required for operation
* @param {string} service - Service name
* @param {string} operation - Operation name
* @param {string} paramName - Parameter name
* @returns {boolean}
*/
isParameterRequired(service, operation, paramName) {
const config = this.getOperationConfig(service, operation);
if (!config) return false;
return config.required_params.includes(paramName);
}
/**
* Check if parameter is optional for operation
* @param {string} service - Service name
* @param {string} operation - Operation name
* @param {string} paramName - Parameter name
* @returns {boolean}
*/
isParameterOptional(service, operation, paramName) {
const config = this.getOperationConfig(service, operation);
if (!config) return false;
return config.optional_params.includes(paramName);
}
/**
* Check if data field is confidential
* @param {string} service - Service name
* @param {string} operation - Operation name
* @param {string} fieldName - Data field name
* @returns {boolean}
*/
isDataConfidential(service, operation, fieldName) {
const config = this.getOperationConfig(service, operation);
if (!config) return true; // Default to confidential for safety
return config.confidential_data.includes(fieldName);
}
/**
* Check if data field is non-confidential (safe to send)
* @param {string} service - Service name
* @param {string} operation - Operation name
* @param {string} fieldName - Data field name
* @returns {boolean}
*/
isDataNonConfidential(service, operation, fieldName) {
const config = this.getOperationConfig(service, operation);
if (!config) return false; // Default to confidential for safety
return config.non_confidential_data.includes(fieldName);
}
/**
* Validate parameter value against rules
* @param {string} service - Service name
* @param {string} operation - Operation name
* @param {string} paramName - Parameter name
* @param {any} value - Parameter value
* @returns {Object} Validation result
*/
validateParameter(service, operation, paramName, value) {
const config = this.getOperationConfig(service, operation);
if (!config || !config.validation_rules[paramName]) {
return { valid: true };
}
const rule = config.validation_rules[paramName];
// Type validation
if (rule.type && typeof value !== rule.type) {
return {
valid: false,
error: `Parameter ${paramName} must be of type ${rule.type}`
};
}
// Pattern validation
if (rule.pattern && typeof value === 'string') {
const pattern = new RegExp(rule.pattern);
if (!pattern.test(value)) {
return {
valid: false,
error: `Parameter ${paramName} does not match required pattern: ${rule.pattern}`
};
}
}
// Enum validation
if (rule.enum && !rule.enum.includes(value)) {
return {
valid: false,
error: `Parameter ${paramName} must be one of: ${rule.enum.join(', ')}`
};
}
return { valid: true };
}
// Private methods
_ensureInitialized() {
if (!this.initialized) {
throw new Error('OperationContextService must be initialized before use');
}
}
/**
* Extract resource name from user request
*/
_extractResourceName(request) {
// Look for common patterns - more specific patterns first
const patterns = [
// "stop my pg-instance-main1" - look after "my"
/(?:my\s+)([a-zA-Z0-9\-_]+(?:\-[a-zA-Z0-9]+)*)/,
// "stop instance pg-instance-main1"
/instance\s+([a-zA-Z0-9\-_]+)/,
// "stop database pg-instance-main1"
/database\s+([a-zA-Z0-9\-_]+)/,
// "stop bucket my-data-bucket"
/bucket\s+([a-zA-Z0-9\-_]+)/,
// NEW: "bring up escher-ec2", "restart escher-stream-ec2"
/(?:up|down)\s+([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+)*)/,
// Generic pattern - find resource-like names (avoid action words)
/(?:stop|start|restart|delete|modify|create|bring|power|boot|launch)\s+(?:my\s+)?([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+)+)/,
// NEW: Simple hyphenated names at end of request
/(?:stop|start|restart|delete|modify|create|bring|power|boot|launch|up|down)\s+([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+)*)/,
// Last resort - any hyphenated word that looks like a resource name
/\b([a-zA-Z0-9]+(?:\-[a-zA-Z0-9]+){1,})\b/
];
for (const pattern of patterns) {
const match = request.match(pattern);
if (match && match[1] && match[1].length > 2) {
// Filter out common words that aren't resource names
const commonWords = ['the', 'my', 'our', 'this', 'that', 'instance', 'database', 'bucket', 'server', 'stop', 'start', 'restart'];
if (!commonWords.includes(match[1].toLowerCase())) {
return match[1];
}
}
}
return 'unknown';
}
/**
* Detect service from request and resource name
*/
_detectService(request, resourceName) {
// Check resource name patterns FIRST for highest specificity
if (/\b(pg-|mysql-|db-|rds-)\b/i.test(resourceName)) {
return 'aws_rds';
}
if (/\b(bucket-|s3-|storage-)\b/i.test(resourceName)) {
return 'aws_s3';
}
// Then check for explicit service keywords
for (const [service, config] of Object.entries(this.servicePatterns)) {
// Check keywords
for (const keyword of config.keywords) {
if (request.includes(keyword)) {
return service;
}
}
// Check ID patterns
for (const pattern of config.id_patterns) {
if (pattern.test(resourceName)) {
return service;
}
}
}
// Enhanced detection based on resource name patterns and context
// Database-related keywords and resource name patterns - CHECK FIRST for specificity
if (/\b(db|database|postgres|mysql|oracle|sql|rds)\b/i.test(request) ||
/\b(pg-|mysql-|db-|rds-)\b/i.test(resourceName)) {
return 'aws_rds';
}
// S3/Storage keywords and patterns
if (/\b(bucket|storage|s3)\b/i.test(request) ||
/\b(bucket-|s3-|storage-)\b/i.test(resourceName)) {
return 'aws_s3';
}
// EC2/Instance keywords - check last to avoid false matches
// Also exclude database-like resource names from EC2 detection
if ((/\b(instance|server|vm|ec2)\b/i.test(request) ||
/\b(i-[0-9a-f]+|web-|app-|server-)\b/i.test(resourceName)) &&
!/\b(pg-|mysql-|db-|rds-)\b/i.test(resourceName)) {
return 'aws_ec2';
}
return 'unknown';
}
/**
* Detect operation from request
*/
_detectOperation(request, service) {
// Check each operation mapping
for (const [standardOp, variations] of Object.entries(this.operationMappings)) {
for (const variation of variations) {
if (request.includes(variation.replace(/_/g, ' ')) || request.includes(variation)) {
// Return the appropriate operation for the detected service
return this._mapOperationToService(standardOp, service);
}
}
}
// Default operation detection - CHECK MORE SPECIFIC TERMS FIRST
if (request.includes('restart') || request.includes('reboot')) {
return service === 'aws_rds' ? 'restart_db_instance' : 'reboot_instance';
}
if (request.includes('start')) {
return service === 'aws_rds' ? 'start_db_instance' : 'start_instance';
}
if (request.includes('stop')) {
return service === 'aws_rds' ? 'stop_db_instance' : 'stop_instance';
}
if (request.includes('delete') || request.includes('remove') || request.includes('terminate')) {
if (service === 'aws_rds') return 'delete_db_instance';
if (service === 'aws_s3') return 'delete_bucket';
return 'terminate_instance';
}
return 'unknown';
}
/**
* Map generic operation to service-specific operation
*/
_mapOperationToService(operation, service) {
const mappings = {
'start': {
'aws_ec2': 'start_instance',
'aws_rds': 'start_db_instance'
},
'stop': {
'aws_ec2': 'stop_instance',
'aws_rds': 'stop_db_instance'
},
'restart': {
'aws_ec2': 'reboot_instance',
'aws_rds': 'restart_db_instance'
},
'delete': {
'aws_ec2': 'terminate_instance',
'aws_rds': 'delete_db_instance',
'aws_s3': 'delete_bucket'
},
'modify': {
'aws_ec2': 'modify_instance',
'aws_rds': 'modify_db_instance',
'aws_s3': 'modify_bucket_policy'
}
};
return mappings[operation] && mappings[operation][service]
? mappings[operation][service]
: operation;
}
}
module.exports = OperationContextService;