@escher-dbai/rag-module
Version:
Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.
352 lines (307 loc) • 12.4 kB
JavaScript
const { EventEmitter } = require('events');
/**
* Data Filtering Service
* Filters cloud resource data to remove confidential information before sending to service agents
* Uses OperationContextService and ServiceRegistry for intelligent filtering
*/
class DataFilteringService extends EventEmitter {
constructor(operationContextService) {
super();
this.operationContext = operationContextService;
}
/**
* Filter resource data for a specific operation
* @param {Object} resource - Full resource data from RAG storage
* @param {string} service - Service name (e.g., 'aws_ec2')
* @param {string} operation - Operation name (e.g., 'stop_instance')
* @returns {Object} Filtered resource data safe for service agents
*/
filterResourceForOperation(resource, service, operation) {
try {
// Get operation configuration
const operationConfig = this.operationContext.getOperationConfig(service, operation);
if (!operationConfig) {
throw new Error(`Unknown operation: ${service}.${operation}`);
}
// Create filtered data object
const filtered = {
// Always include the resource ID (ARN or AWS-generated ID)
resource_id: this._extractSafeResourceId(resource, service),
// Operation context for the service agent
operation_context: {
service: service,
operation: operation,
region: resource.metadata?.region || 'unknown',
cloud: resource.metadata?.cloud || 'aws'
},
// Filtered operation data
operation_data: {},
// Metadata about filtering
filtering_metadata: {
original_data_fields: Object.keys(resource.metadata || {}).length,
filtered_data_fields: 0,
confidential_fields_removed: [],
timestamp: new Date().toISOString()
}
};
// Filter metadata based on operation requirements
this._filterMetadata(resource, operationConfig, filtered);
// Add validation info
this._addValidationInfo(filtered, operationConfig);
// Emit filtering event for audit
this.emit('data-filtered', {
service,
operation,
resource_id: filtered.resource_id,
fields_removed: filtered.filtering_metadata.confidential_fields_removed.length,
fields_kept: filtered.filtering_metadata.filtered_data_fields
});
return filtered;
} catch (error) {
this.emit('filtering-error', {
service,
operation,
resource_id: resource.id,
error: error.message
});
throw new Error(`Data filtering failed: ${error.message}`);
}
}
/**
* Filter resource data based on parsed operation
* @param {Object} resource - Full resource data
* @param {Object} parsedOperation - Result from OperationContextService.parseOperation
* @returns {Object} Filtered resource data
*/
filterResourceForParsedOperation(resource, parsedOperation) {
if (!parsedOperation.parsed_successfully) {
throw new Error('Cannot filter data for unparsed operation');
}
return this.filterResourceForOperation(
resource,
parsedOperation.service,
parsedOperation.operation
);
}
/**
* Batch filter multiple resources for the same operation
* @param {Array} resources - Array of resource objects
* @param {string} service - Service name
* @param {string} operation - Operation name
* @returns {Array} Array of filtered resource data
*/
batchFilterResources(resources, service, operation) {
return resources.map(resource => {
try {
return this.filterResourceForOperation(resource, service, operation);
} catch (error) {
// Return error object for failed filtering
return {
resource_id: resource.id || 'unknown',
error: error.message,
filtering_failed: true
};
}
});
}
/**
* Get filtering summary for audit purposes
* @param {Object} resource - Original resource
* @param {Object} filteredData - Filtered result
* @returns {Object} Summary of what was filtered
*/
getFilteringSummary(resource, filteredData) {
const originalSize = JSON.stringify(resource).length;
const filteredSize = JSON.stringify(filteredData).length;
return {
original_data_size: originalSize,
filtered_data_size: filteredSize,
size_reduction_bytes: originalSize - filteredSize,
size_reduction_percentage: Math.round(((originalSize - filteredSize) / originalSize) * 100),
confidential_fields_removed: filteredData.filtering_metadata?.confidential_fields_removed || [],
non_confidential_fields_kept: filteredData.filtering_metadata?.filtered_data_fields || 0
};
}
// Private methods
/**
* Extract safe resource ID based on service type
*/
_extractSafeResourceId(resource, service) {
const metadata = resource.metadata || {};
switch (service) {
case 'aws_ec2':
// Use instance_id instead of instance name or ARN
return metadata.instance_id || this._generateMockId('i-', 17);
case 'aws_rds':
// Use resource_id instead of db_instance_identifier or ARN
if (metadata.resource_id) {
return metadata.resource_id;
}
if (metadata.db_resource_id) {
return metadata.db_resource_id;
}
// Generate a safe DB resource ID if not available
return this._generateMockId('db-', 12);
case 'aws_s3':
// Use bucket_id instead of bucket name or ARN
if (metadata.bucket_id) {
return metadata.bucket_id;
}
if (metadata.s3_bucket_id) {
return metadata.s3_bucket_id;
}
// Generate a safe bucket ID if not available
return this._generateMockId('bucket-', 12);
default:
// For unknown services, try to extract AWS-generated ID from ARN
if (resource.id && resource.id.includes('arn:aws:')) {
// Extract the last part of ARN which is usually the resource ID
const arnParts = resource.id.split(':');
const resourcePart = arnParts[arnParts.length - 1];
// Check if it looks like an AWS-generated ID (not a name)
if (/^[a-z]+-[A-F0-9]+$/i.test(resourcePart) || /^[a-z]-[0-9a-f]+$/i.test(resourcePart)) {
return resourcePart;
}
// If the resource part looks like a name, generate a mock ID
return this._generateMockId('res-', 12);
}
return resource.id;
}
}
/**
* Generate a mock AWS-style resource ID for demo purposes
*/
_generateMockId(prefix, length) {
const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
let result = prefix;
for (let i = 0; i < length; i++) {
result += chars.charAt(Math.floor(Math.random() * chars.length));
}
return result;
}
/**
* Filter metadata based on operation configuration
*/
_filterMetadata(resource, operationConfig, filtered) {
const metadata = resource.metadata || {};
const confidentialFields = operationConfig.confidential_data;
const nonConfidentialFields = operationConfig.non_confidential_data;
const requiredFields = operationConfig.required_params;
const optionalFields = operationConfig.optional_params;
// Process each metadata field
for (const [fieldName, fieldValue] of Object.entries(metadata)) {
// Skip null or undefined values
if (fieldValue == null) continue;
// Always exclude confidential fields
if (confidentialFields.includes(fieldName)) {
filtered.filtering_metadata.confidential_fields_removed.push(fieldName);
continue;
}
// Include non-confidential fields that are required or optional for the operation
if (nonConfidentialFields.includes(fieldName) &&
(requiredFields.includes(fieldName) || optionalFields.includes(fieldName))) {
filtered.operation_data[fieldName] = fieldValue;
filtered.filtering_metadata.filtered_data_fields++;
continue;
}
// Include additional safe fields that might be useful
if (this._isSafeField(fieldName, fieldValue)) {
filtered.operation_data[fieldName] = fieldValue;
filtered.filtering_metadata.filtered_data_fields++;
} else {
filtered.filtering_metadata.confidential_fields_removed.push(fieldName);
}
}
// Always include region if available (required for most operations)
if (metadata.region && !filtered.operation_data.region) {
filtered.operation_data.region = metadata.region;
filtered.filtering_metadata.filtered_data_fields++;
}
// Always include cloud provider if available
if (metadata.cloud && !filtered.operation_data.cloud) {
filtered.operation_data.cloud = metadata.cloud;
filtered.filtering_metadata.filtered_data_fields++;
}
}
/**
* Check if a field is generally safe to include
*/
_isSafeField(fieldName, fieldValue) {
// Always safe fields (technical data, not names or sensitive info)
const alwaysSafeFields = [
'region', 'cloud', 'availability_zone', 'state', 'status',
'instance_type', 'db_instance_class', 'engine', 'engine_version',
'allocated_storage', 'storage_type', 'port', 'publicly_accessible',
'platform', 'architecture', 'launch_time', 'creation_date',
'versioning_status', 'storage_class'
];
if (alwaysSafeFields.includes(fieldName)) {
return true;
}
// Never safe fields (contain names or sensitive data)
const neverSafeFields = [
'name', 'tags', 'security_groups', 'key_name', 'user_data',
'endpoint', 'master_username', 'master_user_password', 'db_name',
'bucket_name', 'policy', 'lifecycle_configuration', 'cors_configuration'
];
if (neverSafeFields.includes(fieldName)) {
return false;
}
// Check if field value looks like a name (contains letters and hyphens/underscores)
if (typeof fieldValue === 'string') {
// If it looks like a human-readable name, don't include it
if (/^[a-zA-Z][a-zA-Z0-9\-_]*[a-zA-Z0-9]$/.test(fieldValue) && fieldValue.length > 3) {
return false;
}
// If it looks like an AWS-generated ID, include it
if (/^[a-z]+-[A-F0-9]+$/i.test(fieldValue) || /^[a-z]-[0-9a-f]+$/i.test(fieldValue)) {
return true;
}
}
// Default to safe for numeric values and booleans
return typeof fieldValue === 'number' || typeof fieldValue === 'boolean';
}
/**
* Add validation information for service agents
*/
_addValidationInfo(filtered, operationConfig) {
filtered.validation_info = {
required_params: operationConfig.required_params,
optional_params: operationConfig.optional_params,
validation_rules: operationConfig.validation_rules,
missing_required_params: []
};
// Check for missing required parameters
for (const requiredParam of operationConfig.required_params) {
if (!filtered.operation_data.hasOwnProperty(requiredParam) && requiredParam !== 'resource_id') {
filtered.validation_info.missing_required_params.push(requiredParam);
}
}
}
/**
* Generate a safe summary of resource for logging
*/
generateSafeSummary(resource, service, operation) {
try {
const filtered = this.filterResourceForOperation(resource, service, operation);
return {
service: service,
operation: operation,
resource_id: filtered.resource_id,
region: filtered.operation_data.region,
cloud: filtered.operation_context.cloud,
has_required_data: filtered.validation_info.missing_required_params.length === 0,
data_fields_available: filtered.filtering_metadata.filtered_data_fields,
summary: `${service}.${operation} for resource in ${filtered.operation_data.region || 'unknown region'}`
};
} catch (error) {
return {
service: service,
operation: operation,
error: error.message,
summary: `Failed to generate summary for ${service}.${operation}`
};
}
}
}
module.exports = DataFilteringService;