UNPKG

@escher-dbai/rag-module

Version:

Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.

352 lines (307 loc) 12.4 kB
const { EventEmitter } = require('events'); /** * Data Filtering Service * Filters cloud resource data to remove confidential information before sending to service agents * Uses OperationContextService and ServiceRegistry for intelligent filtering */ class DataFilteringService extends EventEmitter { constructor(operationContextService) { super(); this.operationContext = operationContextService; } /** * Filter resource data for a specific operation * @param {Object} resource - Full resource data from RAG storage * @param {string} service - Service name (e.g., 'aws_ec2') * @param {string} operation - Operation name (e.g., 'stop_instance') * @returns {Object} Filtered resource data safe for service agents */ filterResourceForOperation(resource, service, operation) { try { // Get operation configuration const operationConfig = this.operationContext.getOperationConfig(service, operation); if (!operationConfig) { throw new Error(`Unknown operation: ${service}.${operation}`); } // Create filtered data object const filtered = { // Always include the resource ID (ARN or AWS-generated ID) resource_id: this._extractSafeResourceId(resource, service), // Operation context for the service agent operation_context: { service: service, operation: operation, region: resource.metadata?.region || 'unknown', cloud: resource.metadata?.cloud || 'aws' }, // Filtered operation data operation_data: {}, // Metadata about filtering filtering_metadata: { original_data_fields: Object.keys(resource.metadata || {}).length, filtered_data_fields: 0, confidential_fields_removed: [], timestamp: new Date().toISOString() } }; // Filter metadata based on operation requirements this._filterMetadata(resource, operationConfig, filtered); // Add validation info this._addValidationInfo(filtered, operationConfig); // Emit filtering event for audit this.emit('data-filtered', { service, operation, resource_id: filtered.resource_id, fields_removed: filtered.filtering_metadata.confidential_fields_removed.length, fields_kept: filtered.filtering_metadata.filtered_data_fields }); return filtered; } catch (error) { this.emit('filtering-error', { service, operation, resource_id: resource.id, error: error.message }); throw new Error(`Data filtering failed: ${error.message}`); } } /** * Filter resource data based on parsed operation * @param {Object} resource - Full resource data * @param {Object} parsedOperation - Result from OperationContextService.parseOperation * @returns {Object} Filtered resource data */ filterResourceForParsedOperation(resource, parsedOperation) { if (!parsedOperation.parsed_successfully) { throw new Error('Cannot filter data for unparsed operation'); } return this.filterResourceForOperation( resource, parsedOperation.service, parsedOperation.operation ); } /** * Batch filter multiple resources for the same operation * @param {Array} resources - Array of resource objects * @param {string} service - Service name * @param {string} operation - Operation name * @returns {Array} Array of filtered resource data */ batchFilterResources(resources, service, operation) { return resources.map(resource => { try { return this.filterResourceForOperation(resource, service, operation); } catch (error) { // Return error object for failed filtering return { resource_id: resource.id || 'unknown', error: error.message, filtering_failed: true }; } }); } /** * Get filtering summary for audit purposes * @param {Object} resource - Original resource * @param {Object} filteredData - Filtered result * @returns {Object} Summary of what was filtered */ getFilteringSummary(resource, filteredData) { const originalSize = JSON.stringify(resource).length; const filteredSize = JSON.stringify(filteredData).length; return { original_data_size: originalSize, filtered_data_size: filteredSize, size_reduction_bytes: originalSize - filteredSize, size_reduction_percentage: Math.round(((originalSize - filteredSize) / originalSize) * 100), confidential_fields_removed: filteredData.filtering_metadata?.confidential_fields_removed || [], non_confidential_fields_kept: filteredData.filtering_metadata?.filtered_data_fields || 0 }; } // Private methods /** * Extract safe resource ID based on service type */ _extractSafeResourceId(resource, service) { const metadata = resource.metadata || {}; switch (service) { case 'aws_ec2': // Use instance_id instead of instance name or ARN return metadata.instance_id || this._generateMockId('i-', 17); case 'aws_rds': // Use resource_id instead of db_instance_identifier or ARN if (metadata.resource_id) { return metadata.resource_id; } if (metadata.db_resource_id) { return metadata.db_resource_id; } // Generate a safe DB resource ID if not available return this._generateMockId('db-', 12); case 'aws_s3': // Use bucket_id instead of bucket name or ARN if (metadata.bucket_id) { return metadata.bucket_id; } if (metadata.s3_bucket_id) { return metadata.s3_bucket_id; } // Generate a safe bucket ID if not available return this._generateMockId('bucket-', 12); default: // For unknown services, try to extract AWS-generated ID from ARN if (resource.id && resource.id.includes('arn:aws:')) { // Extract the last part of ARN which is usually the resource ID const arnParts = resource.id.split(':'); const resourcePart = arnParts[arnParts.length - 1]; // Check if it looks like an AWS-generated ID (not a name) if (/^[a-z]+-[A-F0-9]+$/i.test(resourcePart) || /^[a-z]-[0-9a-f]+$/i.test(resourcePart)) { return resourcePart; } // If the resource part looks like a name, generate a mock ID return this._generateMockId('res-', 12); } return resource.id; } } /** * Generate a mock AWS-style resource ID for demo purposes */ _generateMockId(prefix, length) { const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'; let result = prefix; for (let i = 0; i < length; i++) { result += chars.charAt(Math.floor(Math.random() * chars.length)); } return result; } /** * Filter metadata based on operation configuration */ _filterMetadata(resource, operationConfig, filtered) { const metadata = resource.metadata || {}; const confidentialFields = operationConfig.confidential_data; const nonConfidentialFields = operationConfig.non_confidential_data; const requiredFields = operationConfig.required_params; const optionalFields = operationConfig.optional_params; // Process each metadata field for (const [fieldName, fieldValue] of Object.entries(metadata)) { // Skip null or undefined values if (fieldValue == null) continue; // Always exclude confidential fields if (confidentialFields.includes(fieldName)) { filtered.filtering_metadata.confidential_fields_removed.push(fieldName); continue; } // Include non-confidential fields that are required or optional for the operation if (nonConfidentialFields.includes(fieldName) && (requiredFields.includes(fieldName) || optionalFields.includes(fieldName))) { filtered.operation_data[fieldName] = fieldValue; filtered.filtering_metadata.filtered_data_fields++; continue; } // Include additional safe fields that might be useful if (this._isSafeField(fieldName, fieldValue)) { filtered.operation_data[fieldName] = fieldValue; filtered.filtering_metadata.filtered_data_fields++; } else { filtered.filtering_metadata.confidential_fields_removed.push(fieldName); } } // Always include region if available (required for most operations) if (metadata.region && !filtered.operation_data.region) { filtered.operation_data.region = metadata.region; filtered.filtering_metadata.filtered_data_fields++; } // Always include cloud provider if available if (metadata.cloud && !filtered.operation_data.cloud) { filtered.operation_data.cloud = metadata.cloud; filtered.filtering_metadata.filtered_data_fields++; } } /** * Check if a field is generally safe to include */ _isSafeField(fieldName, fieldValue) { // Always safe fields (technical data, not names or sensitive info) const alwaysSafeFields = [ 'region', 'cloud', 'availability_zone', 'state', 'status', 'instance_type', 'db_instance_class', 'engine', 'engine_version', 'allocated_storage', 'storage_type', 'port', 'publicly_accessible', 'platform', 'architecture', 'launch_time', 'creation_date', 'versioning_status', 'storage_class' ]; if (alwaysSafeFields.includes(fieldName)) { return true; } // Never safe fields (contain names or sensitive data) const neverSafeFields = [ 'name', 'tags', 'security_groups', 'key_name', 'user_data', 'endpoint', 'master_username', 'master_user_password', 'db_name', 'bucket_name', 'policy', 'lifecycle_configuration', 'cors_configuration' ]; if (neverSafeFields.includes(fieldName)) { return false; } // Check if field value looks like a name (contains letters and hyphens/underscores) if (typeof fieldValue === 'string') { // If it looks like a human-readable name, don't include it if (/^[a-zA-Z][a-zA-Z0-9\-_]*[a-zA-Z0-9]$/.test(fieldValue) && fieldValue.length > 3) { return false; } // If it looks like an AWS-generated ID, include it if (/^[a-z]+-[A-F0-9]+$/i.test(fieldValue) || /^[a-z]-[0-9a-f]+$/i.test(fieldValue)) { return true; } } // Default to safe for numeric values and booleans return typeof fieldValue === 'number' || typeof fieldValue === 'boolean'; } /** * Add validation information for service agents */ _addValidationInfo(filtered, operationConfig) { filtered.validation_info = { required_params: operationConfig.required_params, optional_params: operationConfig.optional_params, validation_rules: operationConfig.validation_rules, missing_required_params: [] }; // Check for missing required parameters for (const requiredParam of operationConfig.required_params) { if (!filtered.operation_data.hasOwnProperty(requiredParam) && requiredParam !== 'resource_id') { filtered.validation_info.missing_required_params.push(requiredParam); } } } /** * Generate a safe summary of resource for logging */ generateSafeSummary(resource, service, operation) { try { const filtered = this.filterResourceForOperation(resource, service, operation); return { service: service, operation: operation, resource_id: filtered.resource_id, region: filtered.operation_data.region, cloud: filtered.operation_context.cloud, has_required_data: filtered.validation_info.missing_required_params.length === 0, data_fields_available: filtered.filtering_metadata.filtered_data_fields, summary: `${service}.${operation} for resource in ${filtered.operation_data.region || 'unknown region'}` }; } catch (error) { return { service: service, operation: operation, error: error.message, summary: `Failed to generate summary for ${service}.${operation}` }; } } } module.exports = DataFilteringService;