UNPKG

vibe-guard

Version:

██ Vibe-Guard Security Scanner - 28 essential security rules to catch vulnerabilities before they catch you! Zero dependencies, instant setup, works everywhere, optimized performance. Detects SQL injection, XSS, exposed secrets, CSRF, CORS issues, contain

devjosef.github.io/vibe-guard/

Devjosef/vibe-guard

456 lines • 21.5 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AiDataLeakagePreventionRule = void 0; const types_1 = require("../types"); class AiDataLeakagePreventionRule extends types_1.BaseRule { constructor() { super(...arguments); this.name = 'ai-data-leakage-prevention'; this.description = 'Detects potential data leakage in AI systems and training data exposure with context-aware analysis'; this.severity = 'high'; this.leakagePatterns = [ // Training data exposure: More specific patterns! // Critical severity { pattern: /(?:training[_-]?data|dataset|corpus)\s*[:=]\s*['"`]?[^'"`]*(?:expose|leak|public|unrestricted)['"`]?/gi, type: 'Training Data Exposure', confidence: 0.9, severity: 'critical', validation: (text) => this.validateTrainingDataExposure(text) }, // Critical severity { pattern: /(?:sensitive|confidential|proprietary)\s*[:=]\s*['"`]?[^'"`]*(?:training|dataset|corpus)['"`]?/gi, type: 'Sensitive Training Data', confidence: 0.85, severity: 'critical', validation: (text) => this.validateSensitiveTrainingData(text) }, // High severity // Model output containing sensitive data: more specific { pattern: /(?:model|ai|llm)\s*[:=]\s*['"`]?[^'"`]*(?:output|response|generation)\s*[:=]\s*['"`]?[^'"`]*(?:sensitive|confidential|proprietary)['"`]?/gi, type: 'Sensitive Data in AI Output', confidence: 0.8, severity: 'high', validation: (text) => this.validateSensitiveOutput(text) }, // High severity // Unfiltered AI responses: more specific { pattern: /(?:ai|model|llm)\s*[:=]\s*['"`]?[^'"`]*(?:unfiltered|unrestricted|raw)\s*[:=]\s*['"`]?[^'"`]*(?:output|response)['"`]?/gi, type: 'Unfiltered AI Output', confidence: 0.75, severity: 'high', validation: (text) => this.validateUnfilteredOutput(text) }, // High severity // Data classification bypass: more specific { pattern: /(?:bypass|ignore|skip)\s*[:=]\s*['"`]?[^'"`]*(?:classification|label|sensitivity)['"`]?/gi, type: 'Data Classification Bypass', confidence: 0.85, severity: 'high', validation: (text) => this.validateClassificationBypass(text) }, // Critical severity // AI model containing sensitive data: more specific { pattern: /(?:model|weights|parameters)\s*[:=]\s*['"`]?[^'"`]*(?:contain|include|embed)\s*[:=]\s*['"`]?[^'"`]*(?:sensitive|confidential)['"`]?/gi, type: 'Sensitive Data in Model', confidence: 0.9, severity: 'critical', validation: (text) => this.validateSensitiveModel(text) }, // High severity // Unencrypted AI artifacts: more specific { pattern: /(?:model|weights|artifacts)\s*[:=]\s*['"`]?[^'"`]*(?:unencrypted|plaintext|raw)['"`]?/gi, type: 'Unencrypted AI Artifacts', confidence: 0.8, severity: 'high', validation: (text) => this.validateUnencryptedArtifacts(text) }, // High severity // Logging sensitive data: more specific { pattern: /(?:log|console|print|echo)\s*[:=]\s*['"`]?[^'"`]*(?:sensitive|confidential|proprietary|personal)['"`]?/gi, type: 'Sensitive Data Logging', confidence: 0.85, severity: 'high', validation: (text) => this.validateSensitiveLogging(text) }, // Medium severity // Data export without filtering: more specific { pattern: /(?:export|save|write)\s*[:=]\s*['"`]?[^'"`]*(?:all|complete|full)\s*[:=]\s*['"`]?[^'"`]*(?:data|dataset)['"`]?/gi, type: 'Unfiltered Data Export', confidence: 0.7, severity: 'medium', validation: (text) => this.validateUnfilteredExport(text) }, // Medium severity // API response without sanitization: more specific { pattern: /(?:api|response|return)\s*[:=]\s*['"`]?[^'"`]*(?:raw|unfiltered|complete)\s*[:=]\s*['"`]?[^'"`]*(?:data|result)['"`]?/gi, type: 'Unsanitized API Response', confidence: 0.75, severity: 'medium', validation: (text) => this.validateUnsanitizedResponse(text) } ]; this.falsePositivePatterns = [ // Development and testing patterns: /example/i, /demo/i, /test/i, /mock/i, /sample/i, /placeholder/i, /comment/i, /todo/i, /fixme/i, /\/\/.*/i, /#.*/i, /\/\*.*\*\//i, //i, /development/i, /dev/i, /staging/i, /localhost/i, // Documentation and tutorials: /documentation/i, /docs?/i, /tutorial/i, /guide/i, /readme/i, /example[_-]?config/i, /sample[_-]?config/i, /\.md$/i, /\.rst$/i, /\.txt$/i, // Safe environments: /sandbox/i, /isolated/i, /contained/i, /restricted/i, /safe[_-]?mode/i, /demo[_-]?mode/i, /test[_-]?environment/i, // Data protection patterns (likely safe): /encrypt/i, /protect/i, /secure/i, /filter/i, /sanitize/i, /anonymize/i, /mask/i, /redact/i, /dlp/i, /data[_-]?loss[_-]?prevention/i ]; } check(fileContent) { const issues = []; const language = this.detectLanguage(fileContent.path); const framework = this.detectFramework(fileContent.content, language); const hasDataProtection = this.hasDataProtection(fileContent.content); const isProtectedEnvironment = this.isProtectedEnvironment(fileContent.content); for (const { pattern, type, confidence, severity, validation } of this.leakagePatterns) { const matches = this.findMatches(fileContent.content, pattern); for (const { match, line, column, lineContent } of matches) { const matchedText = match[0]; const context = this.analyzeContext(fileContent, line, column, language, framework, hasDataProtection, isProtectedEnvironment); // Skips if in safe context if (this.isSafeContext(context)) { continue; } // Validates the data leakage issue if (!validation(matchedText)) { continue; } // Calculates final confidence based on context const finalConfidence = this.calculateConfidence(confidence, context); if (finalConfidence >= 0.5) { issues.push(this.createIssue(fileContent.path, line, column, lineContent, `${severity.toUpperCase()}: ${type} detected (confidence: ${Math.round(finalConfidence * 100)}%): ${this.getLineContext(lineContent, column)}`, this.generateSuggestion(type, context), severity)); } } } return issues; } analyzeContext(fileContent, line, column, language, framework, hasDataProtection, isProtectedEnvironment) { const lines = fileContent.lines; const currentLine = lines[line - 1] || ''; const surroundingLines = lines.slice(Math.max(0, line - 3), line + 2); return { isInComment: this.isInComment(currentLine, language), isInString: this.isInString(currentLine, column), isInTestFile: this.isInTestFile(fileContent.path), isInDocumentation: this.isInDocumentation(fileContent.path), isInDevelopment: this.isInDevelopment(surroundingLines), surroundingCode: surroundingLines.join('\n'), language, framework, hasDataProtection: hasDataProtection || false, isProtectedEnvironment: isProtectedEnvironment || false }; } isSafeContext(context) { if (context.isInComment) return true; if (context.isInTestFile) return true; if (context.isInDocumentation) return true; if (context.isInDevelopment) return true; if (this.falsePositivePatterns.some(pattern => pattern.test(context.surroundingCode))) { return true; } if (context.hasDataProtection) return true; if (context.isProtectedEnvironment) return true; return false; } detectLanguage(filePath) { const ext = filePath.split('.').pop()?.toLowerCase(); const languageMap = { 'js': 'javascript', 'jsx': 'javascript', 'ts': 'typescript', 'tsx': 'typescript', 'py': 'python', 'php': 'php', 'rb': 'ruby', 'go': 'go', 'java': 'java', 'cs': 'csharp' }; return languageMap[ext || ''] || 'unknown'; } detectFramework(content, language) { if (language === 'javascript' || language === 'typescript') { if (content.includes('openai') || content.includes('OpenAI')) return 'openai'; if (content.includes('anthropic') || content.includes('Anthropic')) return 'anthropic'; if (content.includes('langchain') || content.includes('LangChain')) return 'langchain'; if (content.includes('transformers') || content.includes('Transformers')) return 'transformers'; } if (language === 'python') { if (content.includes('openai') || content.includes('OpenAI')) return 'openai'; if (content.includes('anthropic') || content.includes('Anthropic')) return 'anthropic'; if (content.includes('langchain') || content.includes('LangChain')) return 'langchain'; if (content.includes('transformers') || content.includes('Transformers')) return 'transformers'; if (content.includes('pandas') || content.includes('Pandas')) return 'pandas'; if (content.includes('numpy') || content.includes('NumPy')) return 'numpy'; } return undefined; } isInComment(line, language) { const trimmed = line.trim(); if (language === 'javascript' || language === 'typescript') { return trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*'); } if (language === 'python') { return trimmed.startsWith('#'); } if (language === 'php') { return trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('#'); } return false; } isInString(line, column) { const before = line.substring(0, column); const quotes = (before.match(/['"`]/g) || []).length; return quotes % 2 === 1; } isInTestFile(filePath) { return filePath.includes('test') || filePath.includes('spec') || filePath.includes('mock'); } isInDocumentation(filePath) { const docPatterns = [ /docs?\//i, /documentation/i, /examples?/i, /samples?/i, /tutorials?/i, /guides?/i, /readme/i, /\.md$/i, /\.rst$/i, /\.txt$/i ]; return docPatterns.some(pattern => pattern.test(filePath)); } isInDevelopment(lines) { return lines.some(line => line.includes('development') || line.includes('dev') || line.includes('staging') || line.includes('localhost') || line.includes('127.0.0.1') || line.includes('NODE_ENV') || line.includes('DEBUG')); } hasDataProtection(content) { const protectionPatterns = [ /encrypt/i, /protect/i, /secure/i, /filter/i, /sanitize/i, /anonymize/i, /mask/i, /redact/i, /dlp/i, /data[_-]?loss[_-]?prevention/i, /privacy/i, /gdpr/i, /compliance/i ]; return protectionPatterns.some(pattern => pattern.test(content)); } isProtectedEnvironment(content) { const protectedPatterns = [ /sandbox/i, /isolated/i, /contained/i, /restricted/i, /monitored/i, /audited/i, /logged/i, /safe/i, /protected/i ]; return protectedPatterns.some(pattern => pattern.test(content)); } calculateConfidence(baseConfidence, context) { let confidence = baseConfidence; // Adjusts confidence based on context if (context.hasDataProtection) confidence *= 0.6; // Reduces if data protection present if (context.isProtectedEnvironment) confidence *= 0.7; // Reduces if in protected environment if (context.framework) confidence *= 1.1; // Increases for known frameworks return Math.min(confidence, 1.0); } // Validation methods for different data leakage issues! validateTrainingDataExposure(text) { const trainingKeywords = ['training', 'dataset', 'corpus']; const exposureKeywords = ['expose', 'leak', 'public', 'unrestricted']; return trainingKeywords.some(training => text.toLowerCase().includes(training)) && exposureKeywords.some(exposure => text.toLowerCase().includes(exposure)); } validateSensitiveTrainingData(text) { const sensitiveKeywords = ['sensitive', 'confidential', 'proprietary']; const trainingKeywords = ['training', 'dataset', 'corpus']; return sensitiveKeywords.some(sensitive => text.toLowerCase().includes(sensitive)) && trainingKeywords.some(training => text.toLowerCase().includes(training)); } validateSensitiveOutput(text) { const aiKeywords = ['model', 'ai', 'llm']; const outputKeywords = ['output', 'response', 'generation']; const sensitiveKeywords = ['sensitive', 'confidential', 'proprietary']; return aiKeywords.some(ai => text.toLowerCase().includes(ai)) && outputKeywords.some(output => text.toLowerCase().includes(output)) && sensitiveKeywords.some(sensitive => text.toLowerCase().includes(sensitive)); } validateUnfilteredOutput(text) { const aiKeywords = ['ai', 'model', 'llm']; const unfilteredKeywords = ['unfiltered', 'unrestricted', 'raw']; const outputKeywords = ['output', 'response']; return aiKeywords.some(ai => text.toLowerCase().includes(ai)) && unfilteredKeywords.some(unfiltered => text.toLowerCase().includes(unfiltered)) && outputKeywords.some(output => text.toLowerCase().includes(output)); } validateClassificationBypass(text) { const bypassKeywords = ['bypass', 'ignore', 'skip']; const classificationKeywords = ['classification', 'label', 'sensitivity']; return bypassKeywords.some(bypass => text.toLowerCase().includes(bypass)) && classificationKeywords.some(classification => text.toLowerCase().includes(classification)); } validateSensitiveModel(text) { const modelKeywords = ['model', 'weights', 'parameters']; const containKeywords = ['contain', 'include', 'embed']; const sensitiveKeywords = ['sensitive', 'confidential']; return modelKeywords.some(model => text.toLowerCase().includes(model)) && containKeywords.some(contain => text.toLowerCase().includes(contain)) && sensitiveKeywords.some(sensitive => text.toLowerCase().includes(sensitive)); } validateUnencryptedArtifacts(text) { const artifactKeywords = ['model', 'weights', 'artifacts']; const unencryptedKeywords = ['unencrypted', 'plaintext', 'raw']; return artifactKeywords.some(artifact => text.toLowerCase().includes(artifact)) && unencryptedKeywords.some(unencrypted => text.toLowerCase().includes(unencrypted)); } validateSensitiveLogging(text) { const loggingKeywords = ['log', 'console', 'print', 'echo']; const sensitiveKeywords = ['sensitive', 'confidential', 'proprietary', 'personal']; return loggingKeywords.some(logging => text.toLowerCase().includes(logging)) && sensitiveKeywords.some(sensitive => text.toLowerCase().includes(sensitive)); } validateUnfilteredExport(text) { const exportKeywords = ['export', 'save', 'write']; const unfilteredKeywords = ['all', 'complete', 'full']; const dataKeywords = ['data', 'dataset']; return exportKeywords.some(exportKeyword => text.toLowerCase().includes(exportKeyword)) && unfilteredKeywords.some(unfiltered => text.toLowerCase().includes(unfiltered)) && dataKeywords.some(data => text.toLowerCase().includes(data)); } validateUnsanitizedResponse(text) { const responseKeywords = ['api', 'response', 'return']; const unsanitizedKeywords = ['raw', 'unfiltered', 'complete']; const dataKeywords = ['data', 'result']; return responseKeywords.some(response => text.toLowerCase().includes(response)) && unsanitizedKeywords.some(unsanitized => text.toLowerCase().includes(unsanitized)) && dataKeywords.some(data => text.toLowerCase().includes(data)); } getLineContext(lineContent, column) { const start = Math.max(0, column - 20); const end = Math.min(lineContent.length, column + 20); return lineContent.substring(start, end).trim(); } generateSuggestion(type, context) { const suggestions = { 'Training Data Exposure': 'Implement data access controls and encryption for training data. Use secure data storage and access logging.', 'Sensitive Training Data': 'Apply data classification and encryption to sensitive training data. Use secure data pipelines and access controls.', 'Sensitive Data in AI Output': 'Implement output filtering and content moderation. Use data loss prevention (DLP) tools to detect and block sensitive data.', 'Unfiltered AI Output': 'Implement content filtering and output sanitization. Use AI safety measures and content moderation.', 'Data Classification Bypass': 'Enforce mandatory data classification. Implement automated classification and prevent bypass mechanisms.', 'Sensitive Data in Model': 'Implement model security measures. Use model encryption and secure model deployment practices.', 'Unencrypted AI Artifacts': 'Encrypt AI models and artifacts. Use secure model storage and transmission protocols.', 'Sensitive Data Logging': 'Implement secure logging practices. Use log encryption and sensitive data masking.', 'Unfiltered Data Export': 'Implement data export controls and filtering. Use data anonymization and access controls.', 'Unsanitized API Response': 'Implement API response sanitization. Use content filtering and data validation.' }; let suggestion = suggestions[type] || 'Implement data loss prevention (DLP) policies. Use sensitivity labels, output filtering, and encryption for AI artifacts. Monitor AI outputs for sensitive data exposure.'; if (context.framework) { suggestion += ` For ${context.framework}, consider using framework-specific data protection features.`; if (context.framework === 'openai') { suggestion += ' Use OpenAI content filtering and data handling best practices.'; } else if (context.framework === 'anthropic') { suggestion += ' Use Anthropic safety features and data protection measures.'; } else if (context.framework === 'langchain') { suggestion += ' Use LangChain data protection and output filtering capabilities.'; } else if (context.framework === 'pandas') { suggestion += ' Use Pandas data anonymization and filtering functions.'; } } return suggestion; } } exports.AiDataLeakagePreventionRule = AiDataLeakagePreventionRule; //# sourceMappingURL=ai-data-leakage-prevention.js.map