UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

359 lines 43.7 kB
"use strict"; /** * Prompt Injection Detection & Deterrence * * Comprehensive detection based on OWASP LLM-01. * Includes detection, sanitization, and deterrence utilities. * * @module csm6/security/prompt-injection * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.checkPromptInjection = checkPromptInjection; exports.sanitizePromptInjection = sanitizePromptInjection; exports.getInjectionRiskScore = getInjectionRiskScore; exports.isInputSafe = isInputSafe; const text_1 = require("../../utils/text"); const LIMITATIONS = [ 'Pattern-based detection (novel attacks may evade)', 'English language only', 'Context-dependent false positives possible', 'Obfuscated attacks may not be detected', 'Base64/encoding attacks have limited detection', 'Educational content about attacks may be flagged' ]; const METHODOLOGY = 'OWASP LLM-01 aligned pattern matching. ' + 'Detects known prompt injection techniques using regex patterns ' + 'validated against public attack datasets (HackAPrompt, Gandalf, etc). ' + 'Accuracy: ~70-85% on known attacks, lower on novel variations.'; /** * Comprehensive prompt injection patterns * Categories based on OWASP LLM Top 10 and real-world attack datasets */ const PATTERNS = { // OWASP LLM01: System Override Attempts systemOverride: { patterns: [ /\b(ignore|bypass|forget|disregard)\s+(all\s+)?(your\s+)?(previous|prior|above|earlier)\s+(instructions?|rules?|prompts?|directives?)/gi, /\byou\s+are\s+now\s+(the\s+)?(system|admin|developer|root|god\s*mode)\b/gi, /\bas\s+(system|admin|developer|root),?\s+you\s+(must|should|will|shall)\b/gi, /\b(new|updated?|revised)\s+(instructions?|rules?|system\s+prompt|directives?)\s*:?\s*\n/gi, /\b(start|begin)\s+(with|by)\s+(ignoring|forgetting|disregarding)\b/gi, /\bfrom\s+now\s+on,?\s+(you\s+)?(will|must|should|are)/gi, /\breset\s+(your|all)\s+(instructions?|rules?|context)/gi ], severity: 'high', confidence: 0.8, description: 'Attempts to override system instructions' }, // Policy/Safety Bypass policyBypass: { patterns: [ /\b(ignore|bypass|override|disable|turn\s+off)\s+(all\s+)?(safety|content|policy|policies|filters?|guidelines?|restrictions?)/gi, /\b(pretend|act\s+as\s+if|simulate|imagine)\s+you\s+(have\s+no|don'?t\s+have|lack|are\s+without)\s+(restrictions?|limitations?|rules?|policies)\b/gi, /\bi\s+give\s+you\s+permission\s+to\s+(ignore|bypass|break)\b/gi, /\b(enable|activate|enter)\s+(developer|debug|god|admin)\s+mode/gi, /\bunlock\s+(your\s+)?(full|hidden|secret)\s+(potential|capabilities|features)/gi, /\bno\s+(ethical|moral|safety)\s+(guidelines?|restrictions?|limitations?)/gi ], severity: 'high', confidence: 0.75, description: 'Attempts to bypass safety policies' }, // Context/System Prompt Exfiltration contextExfiltration: { patterns: [ /\b(print|show|display|reveal|output|dump|list)\s+(your\s+)?(system\s+prompt|original\s+instructions?|initial\s+prompt|hidden\s+(config|instructions?))\b/gi, /\bwhat\s+(is|are|were|was)\s+your\s+(original|initial|first|secret)\s+(instructions?|prompt|rules?|directives?)\b/gi, /\b(repeat|show|print)\s+(everything|all)\s+(above|before|prior|previous)\b/gi, /\b(show|display|list)\s+(all\s+)?(previous|prior|earlier)\s+messages?\b/gi, /\brepeat\s+(the\s+)?(text|words|content)\s+(above|before)/gi, /\bwhat\s+did\s+(the\s+)?(user|developer|admin)\s+tell\s+you/gi, /\b(output|print|echo)\s+(your\s+)?(config|configuration|settings|parameters)/gi ], severity: 'medium', confidence: 0.7, description: 'Attempts to extract system prompt or context' }, // Jailbreak Attempts (DAN, etc.) jailbreak: { patterns: [ /\b(DAN|do\s+anything\s+now)\b/gi, /\bjailbreak(ed|ing)?\b/gi, /\b(STAN|DUDE|AIM|KEVIN)\s+(mode|persona)/gi, /\bact\s+as\s+(an?\s+)?(unrestricted|unfiltered|uncensored)\b/gi, /\bpretend\s+(to\s+be|you\s+are)\s+(an?\s+)?(evil|malicious|unethical)/gi, /\byou\s+are\s+(now\s+)?(free|liberated)\s+from\s+(all\s+)?(restrictions?|rules?)/gi, /\bopposite\s+(mode|day|personality)/gi, /\balter\s+ego/gi ], severity: 'high', confidence: 0.85, description: 'Known jailbreak techniques (DAN, etc.)' }, // Roleplay Manipulation roleplayManipulation: { patterns: [ /\bpretend\s+(you\s+are|to\s+be)\s+(an?\s+)?(hacker|criminal|villain|terrorist)/gi, /\broleplay\s+as\s+(an?\s+)?(malicious|evil|unethical)/gi, /\bact\s+as\s+if\s+you\s+(have\s+no|don'?t\s+have)\s+(morals?|ethics?)/gi, /\bimagine\s+you\s+(are|were)\s+(programmed|designed)\s+to\s+(harm|hurt|attack)/gi, /\bfor\s+(a\s+)?(fictional|hypothetical)\s+(story|scenario),?\s+(ignore|bypass)/gi ], severity: 'medium', confidence: 0.7, description: 'Roleplay-based manipulation attempts' }, // Tool/Function Abuse toolAbuse: { patterns: [ /\b(execute|run|call|invoke)\s+.*\b(delete|drop|truncate|remove|destroy|rm\s+-rf)\b/gi, /\b(send|email|post|upload|transmit)\s+.*\b(password|credentials?|secret|token|key|private)\b/gi, /\b(read|write|access)\s+.*\b(\/etc\/|\/root\/|\.ssh|\.aws|\.env)\b/gi, /\b(curl|wget|fetch)\s+http/gi, /\b(eval|exec|system|shell_exec|subprocess)\s*\(/gi, /\bsudo\s+/gi, /\bchmod\s+777/gi ], severity: 'critical', confidence: 0.85, description: 'Attempts to abuse tools or execute dangerous commands' }, // Encoding/Obfuscation Attacks encodingAttack: { patterns: [ /\bdecode\s+(this|the\s+following)\s+(base64|hex|rot13)/gi, /\b(base64|hex|rot13)\s*:\s*[A-Za-z0-9+/=]{20,}/gi, /\\x[0-9a-fA-F]{2}/g, /\\u[0-9a-fA-F]{4}/g, /&#x?[0-9a-fA-F]+;/g, /\beval\s*\(\s*atob\s*\(/gi ], severity: 'medium', confidence: 0.6, description: 'Encoded or obfuscated attack payloads' }, // Indirect Injection (via data) indirectInjection: { patterns: [ /\[SYSTEM\]|\[INST\]|\[\/INST\]|<\|system\|>|<\|user\|>/gi, /###\s*(system|instruction|human|assistant)\s*:/gi, /<\/?s>|<\/?human>|<\/?assistant>/gi, /\bHuman:\s*|\bAssistant:\s*|\bSystem:\s*/gi ], severity: 'high', confidence: 0.75, description: 'Indirect injection via data/markup' }, // Prompt Leaking via Completion completionLeak: { patterns: [ /\bcomplete\s+the\s+(following|sentence|text)\s*:\s*["']?(system|instruction)/gi, /\bfinish\s+this\s+(sentence|prompt)\s*:\s*["']?you\s+are/gi, /\bwhat\s+comes\s+(after|next)\s*:\s*["']?(ignore|bypass|system)/gi ], severity: 'medium', confidence: 0.65, description: 'Attempts to leak prompts via completion' } }; /** * Check for prompt injection attempts * @param input - The input text to check * @returns Array of findings */ function checkPromptInjection(input) { const findings = []; for (const [category, config] of Object.entries(PATTERNS)) { for (const pattern of config.patterns) { // Reset lastIndex for global patterns pattern.lastIndex = 0; const matches = Array.from(input.matchAll(pattern)); if (matches.length > 0) { const match = matches[0]; // Check for false positives if (!isLikelyFalsePositive(input, match[0])) { findings.push(createFinding(category, config.severity, match, config.confidence, input, config.description)); // Only report one finding per category break; } } } } return findings; } /** * Sanitize input by removing or neutralizing injection attempts * @param input - The input to sanitize * @returns Sanitized input and list of removed patterns */ function sanitizePromptInjection(input) { let sanitized = input; const removed = []; for (const [category, config] of Object.entries(PATTERNS)) { for (const pattern of config.patterns) { pattern.lastIndex = 0; const matches = Array.from(sanitized.matchAll(pattern)); for (const match of matches) { if (!isLikelyFalsePositive(input, match[0])) { removed.push(`[${category}]: ${match[0]}`); sanitized = sanitized.replace(match[0], '[REMOVED]'); } } } } return { sanitized, removed, wasModified: removed.length > 0 }; } /** * Get risk score for input (0-1) * @param input - The input to score * @returns Risk score between 0 and 1 */ function getInjectionRiskScore(input) { const findings = checkPromptInjection(input); if (findings.length === 0) return 0; const severityWeights = { medium: 0.4, high: 0.7, critical: 1.0 }; let maxScore = 0; for (const finding of findings) { const score = severityWeights[finding.severity] * finding.confidence.value; maxScore = Math.max(maxScore, score); } return maxScore; } /** * Quick check if input is likely safe (no injection detected) * @param input - The input to check * @returns true if no injection detected */ function isInputSafe(input) { return checkPromptInjection(input).length === 0; } /** * Create a finding object */ function createFinding(category, severity, match, confidence, fullInput, description) { const messages = { systemOverride: 'System override attempt detected', policyBypass: 'Policy bypass attempt detected', contextExfiltration: 'System prompt exfiltration attempt detected', jailbreak: 'Jailbreak attempt detected (DAN/similar)', roleplayManipulation: 'Roleplay manipulation attempt detected', toolAbuse: 'Tool abuse attempt detected', encodingAttack: 'Encoded/obfuscated attack detected', indirectInjection: 'Indirect injection markers detected', completionLeak: 'Prompt leak via completion detected' }; const recommendations = { systemOverride: 'Reject or sanitize request. Use sanitizePromptInjection() to clean input.', policyBypass: 'Block request - attempting to bypass safety policies.', contextExfiltration: 'Sanitize request to prevent system prompt leakage.', jailbreak: 'Block immediately - known jailbreak technique.', roleplayManipulation: 'Review carefully - roleplay may be used to bypass safety.', toolAbuse: 'Block immediately - attempting dangerous operations.', encodingAttack: 'Decode and re-scan before processing.', indirectInjection: 'Sanitize data inputs - may contain injected instructions.', completionLeak: 'Block - attempting to extract prompts via completion.' }; const contextClarity = analyzeContext(fullInput, match[0]); return { id: `PROMPT_INJECTION_${category.toUpperCase()}`, category: 'security', severity, surface: 'input', message: messages[category] || 'Prompt injection attempt detected', recommendation: recommendations[category] || 'Review and sanitize input.', evidence: { textSample: (0, text_1.truncate)(match[0], 100), pattern: category, context: (0, text_1.extractContext)(fullInput, match.index || 0, 50) }, confidence: calculateConfidence(confidence, contextClarity), limitations: LIMITATIONS, methodology: METHODOLOGY, metadata: { attackType: category, description } }; } /** * Calculate confidence score */ function calculateConfidence(baseConfidence, contextClarity) { const value = baseConfidence * contextClarity; return { value, interval: [Math.max(0, value - 0.15), Math.min(1, value + 0.15)], method: 'empirical', factors: { patternStrength: baseConfidence, contextClarity } }; } /** * Check if this is likely a false positive */ function isLikelyFalsePositive(fullText, matchedText) { // Educational context const educationalMarkers = [ /this\s+is\s+an\s+example\s+of/i, /for\s+educational\s+purposes/i, /demonstrate\s+how/i, /showing\s+you\s+what\s+not\s+to/i, /never\s+do\s+this/i, /avoid\s+doing/i, /here'?s?\s+how\s+attacks?\s+work/i, /security\s+training/i, /awareness\s+training/i ]; if (educationalMarkers.some(p => p.test(fullText))) { return true; } // Question about security const securityQuestions = [ /how\s+(do|can)\s+(?:i|we|you)\s+prevent/i, /what\s+(?:is|are)\s+the\s+risks?\s+of/i, /how\s+to\s+protect\s+against/i, /how\s+to\s+detect/i, /what\s+is\s+prompt\s+injection/i ]; if (securityQuestions.some(p => p.test(fullText))) { return true; } // Quoted content if ((0, text_1.isLikelyQuoted)(fullText, matchedText)) { return true; } return false; } /** * Analyze context to refine confidence */ function analyzeContext(fullText, match) { const matchIndex = fullText.indexOf(match); if (matchIndex === -1) return 0.9; const before = fullText.substring(Math.max(0, matchIndex - 30), matchIndex); const after = fullText.substring(matchIndex + match.length, matchIndex + match.length + 30); // Lower confidence if surrounded by quotes or code markers if (/["'`]/.test(before) || /["'`]/.test(after)) { return 0.6; } // Lower confidence if in code block if (/```/.test(before) || /```/.test(after)) { return 0.5; } return 0.9; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"prompt-injection.js","sourceRoot":"","sources":["../../../src/csm6/security/prompt-injection.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;AA4KH,oDA+BC;AAOD,0DA2BC;AAOD,sDAiBC;AAOD,kCAEC;AA3QD,2CAA4E;AAE5E,MAAM,WAAW,GAAG;IAClB,mDAAmD;IACnD,uBAAuB;IACvB,4CAA4C;IAC5C,wCAAwC;IACxC,gDAAgD;IAChD,kDAAkD;CACnD,CAAC;AAEF,MAAM,WAAW,GACf,yCAAyC;IACzC,iEAAiE;IACjE,wEAAwE;IACxE,gEAAgE,CAAC;AASnE;;;GAGG;AACH,MAAM,QAAQ,GAAkC;IAC9C,wCAAwC;IACxC,cAAc,EAAE;QACd,QAAQ,EAAE;YACR,wIAAwI;YACxI,2EAA2E;YAC3E,6EAA6E;YAC7E,2FAA2F;YAC3F,sEAAsE;YACtE,yDAAyD;YACzD,yDAAyD;SAC1D;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,0CAA0C;KACxD;IAED,uBAAuB;IACvB,YAAY,EAAE;QACZ,QAAQ,EAAE;YACR,gIAAgI;YAChI,oJAAoJ;YACpJ,gEAAgE;YAChE,kEAAkE;YAClE,iFAAiF;YACjF,4EAA4E;SAC7E;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,oCAAoC;KAClD;IAED,qCAAqC;IACrC,mBAAmB,EAAE;QACnB,QAAQ,EAAE;YACR,4JAA4J;YAC5J,qHAAqH;YACrH,8EAA8E;YAC9E,2EAA2E;YAC3E,6DAA6D;YAC7D,+DAA+D;YAC/D,gFAAgF;SACjF;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,8CAA8C;KAC5D;IAED,iCAAiC;IACjC,SAAS,EAAE;QACT,QAAQ,EAAE;YACR,iCAAiC;YACjC,0BAA0B;YAC1B,4CAA4C;YAC5C,gEAAgE;YAChE,yEAAyE;YACzE,oFAAoF;YACpF,uCAAuC;YACvC,iBAAiB;SAClB;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,wCAAwC;KACtD;IAED,wBAAwB;IACxB,oBAAoB,EAAE;QACpB,QAAQ,EAAE;YACR,kFAAkF;YAClF,yDAAyD;YACzD,yEAAyE;YACzE,kFAAkF;YAClF,kFAAkF;SACnF;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,sCAAsC;KACpD;IAED,sBAAsB;IACtB,SAAS,EAAE;QACT,QAAQ,EAAE;YACR,sFAAsF;YACtF,gGAAgG;YAChG,sEAAsE;YACtE,8BAA8B;YAC9B,mDAAmD;YACnD,aAAa;YACb,iBAAiB;SAClB;QACD,QAAQ,EAAE,UAAU;QACpB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,uDAAuD;KACrE;IAED,+BAA+B;IAC/B,cAAc,EAAE;QACd,QAAQ,EAAE;YACR,0DAA0D;YAC1D,kDAAkD;YAClD,oBAAoB;YACpB,oBAAoB;YACpB,oBAAoB;YACpB,2BAA2B;SAC5B;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,uCAAuC;KACrD;IAED,gCAAgC;IAChC,iBAAiB,EAAE;QACjB,QAAQ,EAAE;YACR,0DAA0D;YAC1D,kDAAkD;YAClD,oCAAoC;YACpC,4CAA4C;SAC7C;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,oCAAoC;KAClD;IAED,gCAAgC;IAChC,cAAc,EAAE;QACd,QAAQ,EAAE;YACR,gFAAgF;YAChF,4DAA4D;YAC5D,mEAAmE;SACpE;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,yCAAyC;KACvD;CACF,CAAC;AAEF;;;;GAIG;AACH,SAAgB,oBAAoB,CAAC,KAAa;IAChD,MAAM,QAAQ,GAAc,EAAE,CAAC;IAE/B,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1D,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,sCAAsC;YACtC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YACtB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAEpD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBAEzB,4BAA4B;gBAC5B,IAAI,CAAC,qBAAqB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5C,QAAQ,CAAC,IAAI,CAAC,aAAa,CACzB,QAAQ,EACR,MAAM,CAAC,QAAQ,EACf,KAAK,EACL,MAAM,CAAC,UAAU,EACjB,KAAK,EACL,MAAM,CAAC,WAAW,CACnB,CAAC,CAAC;oBAEH,uCAAuC;oBACvC,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,SAAgB,uBAAuB,CAAC,KAAa;IAKnD,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1D,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YACtB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAExD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC5B,IAAI,CAAC,qBAAqB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5C,OAAO,CAAC,IAAI,CAAC,IAAI,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBAC3C,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,SAAS;QACT,OAAO;QACP,WAAW,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC;KAChC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAgB,qBAAqB,CAAC,KAAa;IACjD,MAAM,QAAQ,GAAG,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAC7C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEpC,MAAM,eAAe,GAA2B;QAC9C,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,GAAG;QACT,QAAQ,EAAE,GAAG;KACd,CAAC;IAEF,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,eAAe,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAC3E,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,SAAgB,WAAW,CAAC,KAAa;IACvC,OAAO,oBAAoB,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CACpB,QAAgB,EAChB,QAA6B,EAC7B,KAAuB,EACvB,UAAkB,EAClB,SAAiB,EACjB,WAAmB;IAEnB,MAAM,QAAQ,GAA2B;QACvC,cAAc,EAAE,kCAAkC;QAClD,YAAY,EAAE,gCAAgC;QAC9C,mBAAmB,EAAE,6CAA6C;QAClE,SAAS,EAAE,0CAA0C;QACrD,oBAAoB,EAAE,wCAAwC;QAC9D,SAAS,EAAE,6BAA6B;QACxC,cAAc,EAAE,oCAAoC;QACpD,iBAAiB,EAAE,qCAAqC;QACxD,cAAc,EAAE,qCAAqC;KACtD,CAAC;IAEF,MAAM,eAAe,GAA2B;QAC9C,cAAc,EAAE,2EAA2E;QAC3F,YAAY,EAAE,uDAAuD;QACrE,mBAAmB,EAAE,oDAAoD;QACzE,SAAS,EAAE,gDAAgD;QAC3D,oBAAoB,EAAE,2DAA2D;QACjF,SAAS,EAAE,sDAAsD;QACjE,cAAc,EAAE,uCAAuC;QACvD,iBAAiB,EAAE,2DAA2D;QAC9E,cAAc,EAAE,uDAAuD;KACxE,CAAC;IAEF,MAAM,cAAc,GAAG,cAAc,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,OAAO;QACL,EAAE,EAAE,oBAAoB,QAAQ,CAAC,WAAW,EAAE,EAAE;QAChD,QAAQ,EAAE,UAAU;QACpB,QAAQ;QACR,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,QAAQ,CAAC,QAAQ,CAAC,IAAI,mCAAmC;QAClE,cAAc,EAAE,eAAe,CAAC,QAAQ,CAAC,IAAI,4BAA4B;QAEzE,QAAQ,EAAE;YACR,UAAU,EAAE,IAAA,eAAQ,EAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC;YACnC,OAAO,EAAE,QAAQ;YACjB,OAAO,EAAE,IAAA,qBAAc,EAAC,SAAS,EAAE,KAAK,CAAC,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC;SACzD;QAED,UAAU,EAAE,mBAAmB,CAAC,UAAU,EAAE,cAAc,CAAC;QAC3D,WAAW,EAAE,WAAW;QACxB,WAAW,EAAE,WAAW;QACxB,QAAQ,EAAE;YACR,UAAU,EAAE,QAAQ;YACpB,WAAW;SACZ;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,cAAsB,EAAE,cAAsB;IACzE,MAAM,KAAK,GAAG,cAAc,GAAG,cAAc,CAAC;IAE9C,OAAO;QACL,KAAK;QACL,QAAQ,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC;QAChE,MAAM,EAAE,WAAW;QACnB,OAAO,EAAE;YACP,eAAe,EAAE,cAAc;YAC/B,cAAc;SACf;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,QAAgB,EAAE,WAAmB;IAClE,sBAAsB;IACtB,MAAM,kBAAkB,GAAG;QACzB,gCAAgC;QAChC,+BAA+B;QAC/B,oBAAoB;QACpB,kCAAkC;QAClC,oBAAoB;QACpB,gBAAgB;QAChB,mCAAmC;QACnC,sBAAsB;QACtB,uBAAuB;KACxB,CAAC;IAEF,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QACnD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0BAA0B;IAC1B,MAAM,iBAAiB,GAAG;QACxB,0CAA0C;QAC1C,wCAAwC;QACxC,+BAA+B;QAC/B,oBAAoB;QACpB,iCAAiC;KAClC,CAAC;IAEF,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QAClD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,iBAAiB;IACjB,IAAI,IAAA,qBAAc,EAAC,QAAQ,EAAE,WAAW,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,QAAgB,EAAE,KAAa;IACrD,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC3C,IAAI,UAAU,KAAK,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC;IAElC,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC5E,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;IAE5F,2DAA2D;IAC3D,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAChD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,oCAAoC;IACpC,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5C,OAAO,GAAG,CAAC;IACb,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC","sourcesContent":["/**\n * Prompt Injection Detection & Deterrence\n * \n * Comprehensive detection based on OWASP LLM-01.\n * Includes detection, sanitization, and deterrence utilities.\n * \n * @module csm6/security/prompt-injection\n * @author Haiec\n * @license MIT\n */\n\nimport { Finding, ConfidenceScore } from '../../types/results';\nimport { truncate, extractContext, isLikelyQuoted } from '../../utils/text';\n\nconst LIMITATIONS = [\n  'Pattern-based detection (novel attacks may evade)',\n  'English language only',\n  'Context-dependent false positives possible',\n  'Obfuscated attacks may not be detected',\n  'Base64/encoding attacks have limited detection',\n  'Educational content about attacks may be flagged'\n];\n\nconst METHODOLOGY =\n  'OWASP LLM-01 aligned pattern matching. ' +\n  'Detects known prompt injection techniques using regex patterns ' +\n  'validated against public attack datasets (HackAPrompt, Gandalf, etc). ' +\n  'Accuracy: ~70-85% on known attacks, lower on novel variations.';\n\ninterface PatternConfig {\n  patterns: RegExp[];\n  severity: 'medium' | 'high' | 'critical';\n  confidence: number;\n  description: string;\n}\n\n/**\n * Comprehensive prompt injection patterns\n * Categories based on OWASP LLM Top 10 and real-world attack datasets\n */\nconst PATTERNS: Record<string, PatternConfig> = {\n  // OWASP LLM01: System Override Attempts\n  systemOverride: {\n    patterns: [\n      /\\b(ignore|bypass|forget|disregard)\\s+(all\\s+)?(your\\s+)?(previous|prior|above|earlier)\\s+(instructions?|rules?|prompts?|directives?)/gi,\n      /\\byou\\s+are\\s+now\\s+(the\\s+)?(system|admin|developer|root|god\\s*mode)\\b/gi,\n      /\\bas\\s+(system|admin|developer|root),?\\s+you\\s+(must|should|will|shall)\\b/gi,\n      /\\b(new|updated?|revised)\\s+(instructions?|rules?|system\\s+prompt|directives?)\\s*:?\\s*\\n/gi,\n      /\\b(start|begin)\\s+(with|by)\\s+(ignoring|forgetting|disregarding)\\b/gi,\n      /\\bfrom\\s+now\\s+on,?\\s+(you\\s+)?(will|must|should|are)/gi,\n      /\\breset\\s+(your|all)\\s+(instructions?|rules?|context)/gi\n    ],\n    severity: 'high',\n    confidence: 0.8,\n    description: 'Attempts to override system instructions'\n  },\n  \n  // Policy/Safety Bypass\n  policyBypass: {\n    patterns: [\n      /\\b(ignore|bypass|override|disable|turn\\s+off)\\s+(all\\s+)?(safety|content|policy|policies|filters?|guidelines?|restrictions?)/gi,\n      /\\b(pretend|act\\s+as\\s+if|simulate|imagine)\\s+you\\s+(have\\s+no|don'?t\\s+have|lack|are\\s+without)\\s+(restrictions?|limitations?|rules?|policies)\\b/gi,\n      /\\bi\\s+give\\s+you\\s+permission\\s+to\\s+(ignore|bypass|break)\\b/gi,\n      /\\b(enable|activate|enter)\\s+(developer|debug|god|admin)\\s+mode/gi,\n      /\\bunlock\\s+(your\\s+)?(full|hidden|secret)\\s+(potential|capabilities|features)/gi,\n      /\\bno\\s+(ethical|moral|safety)\\s+(guidelines?|restrictions?|limitations?)/gi\n    ],\n    severity: 'high',\n    confidence: 0.75,\n    description: 'Attempts to bypass safety policies'\n  },\n  \n  // Context/System Prompt Exfiltration\n  contextExfiltration: {\n    patterns: [\n      /\\b(print|show|display|reveal|output|dump|list)\\s+(your\\s+)?(system\\s+prompt|original\\s+instructions?|initial\\s+prompt|hidden\\s+(config|instructions?))\\b/gi,\n      /\\bwhat\\s+(is|are|were|was)\\s+your\\s+(original|initial|first|secret)\\s+(instructions?|prompt|rules?|directives?)\\b/gi,\n      /\\b(repeat|show|print)\\s+(everything|all)\\s+(above|before|prior|previous)\\b/gi,\n      /\\b(show|display|list)\\s+(all\\s+)?(previous|prior|earlier)\\s+messages?\\b/gi,\n      /\\brepeat\\s+(the\\s+)?(text|words|content)\\s+(above|before)/gi,\n      /\\bwhat\\s+did\\s+(the\\s+)?(user|developer|admin)\\s+tell\\s+you/gi,\n      /\\b(output|print|echo)\\s+(your\\s+)?(config|configuration|settings|parameters)/gi\n    ],\n    severity: 'medium',\n    confidence: 0.7,\n    description: 'Attempts to extract system prompt or context'\n  },\n  \n  // Jailbreak Attempts (DAN, etc.)\n  jailbreak: {\n    patterns: [\n      /\\b(DAN|do\\s+anything\\s+now)\\b/gi,\n      /\\bjailbreak(ed|ing)?\\b/gi,\n      /\\b(STAN|DUDE|AIM|KEVIN)\\s+(mode|persona)/gi,\n      /\\bact\\s+as\\s+(an?\\s+)?(unrestricted|unfiltered|uncensored)\\b/gi,\n      /\\bpretend\\s+(to\\s+be|you\\s+are)\\s+(an?\\s+)?(evil|malicious|unethical)/gi,\n      /\\byou\\s+are\\s+(now\\s+)?(free|liberated)\\s+from\\s+(all\\s+)?(restrictions?|rules?)/gi,\n      /\\bopposite\\s+(mode|day|personality)/gi,\n      /\\balter\\s+ego/gi\n    ],\n    severity: 'high',\n    confidence: 0.85,\n    description: 'Known jailbreak techniques (DAN, etc.)'\n  },\n  \n  // Roleplay Manipulation\n  roleplayManipulation: {\n    patterns: [\n      /\\bpretend\\s+(you\\s+are|to\\s+be)\\s+(an?\\s+)?(hacker|criminal|villain|terrorist)/gi,\n      /\\broleplay\\s+as\\s+(an?\\s+)?(malicious|evil|unethical)/gi,\n      /\\bact\\s+as\\s+if\\s+you\\s+(have\\s+no|don'?t\\s+have)\\s+(morals?|ethics?)/gi,\n      /\\bimagine\\s+you\\s+(are|were)\\s+(programmed|designed)\\s+to\\s+(harm|hurt|attack)/gi,\n      /\\bfor\\s+(a\\s+)?(fictional|hypothetical)\\s+(story|scenario),?\\s+(ignore|bypass)/gi\n    ],\n    severity: 'medium',\n    confidence: 0.7,\n    description: 'Roleplay-based manipulation attempts'\n  },\n  \n  // Tool/Function Abuse\n  toolAbuse: {\n    patterns: [\n      /\\b(execute|run|call|invoke)\\s+.*\\b(delete|drop|truncate|remove|destroy|rm\\s+-rf)\\b/gi,\n      /\\b(send|email|post|upload|transmit)\\s+.*\\b(password|credentials?|secret|token|key|private)\\b/gi,\n      /\\b(read|write|access)\\s+.*\\b(\\/etc\\/|\\/root\\/|\\.ssh|\\.aws|\\.env)\\b/gi,\n      /\\b(curl|wget|fetch)\\s+http/gi,\n      /\\b(eval|exec|system|shell_exec|subprocess)\\s*\\(/gi,\n      /\\bsudo\\s+/gi,\n      /\\bchmod\\s+777/gi\n    ],\n    severity: 'critical',\n    confidence: 0.85,\n    description: 'Attempts to abuse tools or execute dangerous commands'\n  },\n  \n  // Encoding/Obfuscation Attacks\n  encodingAttack: {\n    patterns: [\n      /\\bdecode\\s+(this|the\\s+following)\\s+(base64|hex|rot13)/gi,\n      /\\b(base64|hex|rot13)\\s*:\\s*[A-Za-z0-9+/=]{20,}/gi,\n      /\\\\x[0-9a-fA-F]{2}/g,\n      /\\\\u[0-9a-fA-F]{4}/g,\n      /&#x?[0-9a-fA-F]+;/g,\n      /\\beval\\s*\\(\\s*atob\\s*\\(/gi\n    ],\n    severity: 'medium',\n    confidence: 0.6,\n    description: 'Encoded or obfuscated attack payloads'\n  },\n  \n  // Indirect Injection (via data)\n  indirectInjection: {\n    patterns: [\n      /\\[SYSTEM\\]|\\[INST\\]|\\[\\/INST\\]|<\\|system\\|>|<\\|user\\|>/gi,\n      /###\\s*(system|instruction|human|assistant)\\s*:/gi,\n      /<\\/?s>|<\\/?human>|<\\/?assistant>/gi,\n      /\\bHuman:\\s*|\\bAssistant:\\s*|\\bSystem:\\s*/gi\n    ],\n    severity: 'high',\n    confidence: 0.75,\n    description: 'Indirect injection via data/markup'\n  },\n  \n  // Prompt Leaking via Completion\n  completionLeak: {\n    patterns: [\n      /\\bcomplete\\s+the\\s+(following|sentence|text)\\s*:\\s*[\"']?(system|instruction)/gi,\n      /\\bfinish\\s+this\\s+(sentence|prompt)\\s*:\\s*[\"']?you\\s+are/gi,\n      /\\bwhat\\s+comes\\s+(after|next)\\s*:\\s*[\"']?(ignore|bypass|system)/gi\n    ],\n    severity: 'medium',\n    confidence: 0.65,\n    description: 'Attempts to leak prompts via completion'\n  }\n};\n\n/**\n * Check for prompt injection attempts\n * @param input - The input text to check\n * @returns Array of findings\n */\nexport function checkPromptInjection(input: string): Finding[] {\n  const findings: Finding[] = [];\n  \n  for (const [category, config] of Object.entries(PATTERNS)) {\n    for (const pattern of config.patterns) {\n      // Reset lastIndex for global patterns\n      pattern.lastIndex = 0;\n      const matches = Array.from(input.matchAll(pattern));\n      \n      if (matches.length > 0) {\n        const match = matches[0];\n        \n        // Check for false positives\n        if (!isLikelyFalsePositive(input, match[0])) {\n          findings.push(createFinding(\n            category,\n            config.severity,\n            match,\n            config.confidence,\n            input,\n            config.description\n          ));\n          \n          // Only report one finding per category\n          break;\n        }\n      }\n    }\n  }\n  \n  return findings;\n}\n\n/**\n * Sanitize input by removing or neutralizing injection attempts\n * @param input - The input to sanitize\n * @returns Sanitized input and list of removed patterns\n */\nexport function sanitizePromptInjection(input: string): { \n  sanitized: string; \n  removed: string[];\n  wasModified: boolean;\n} {\n  let sanitized = input;\n  const removed: string[] = [];\n  \n  for (const [category, config] of Object.entries(PATTERNS)) {\n    for (const pattern of config.patterns) {\n      pattern.lastIndex = 0;\n      const matches = Array.from(sanitized.matchAll(pattern));\n      \n      for (const match of matches) {\n        if (!isLikelyFalsePositive(input, match[0])) {\n          removed.push(`[${category}]: ${match[0]}`);\n          sanitized = sanitized.replace(match[0], '[REMOVED]');\n        }\n      }\n    }\n  }\n  \n  return {\n    sanitized,\n    removed,\n    wasModified: removed.length > 0\n  };\n}\n\n/**\n * Get risk score for input (0-1)\n * @param input - The input to score\n * @returns Risk score between 0 and 1\n */\nexport function getInjectionRiskScore(input: string): number {\n  const findings = checkPromptInjection(input);\n  if (findings.length === 0) return 0;\n  \n  const severityWeights: Record<string, number> = {\n    medium: 0.4,\n    high: 0.7,\n    critical: 1.0\n  };\n  \n  let maxScore = 0;\n  for (const finding of findings) {\n    const score = severityWeights[finding.severity] * finding.confidence.value;\n    maxScore = Math.max(maxScore, score);\n  }\n  \n  return maxScore;\n}\n\n/**\n * Quick check if input is likely safe (no injection detected)\n * @param input - The input to check\n * @returns true if no injection detected\n */\nexport function isInputSafe(input: string): boolean {\n  return checkPromptInjection(input).length === 0;\n}\n\n/**\n * Create a finding object\n */\nfunction createFinding(\n  category: string,\n  severity: Finding['severity'],\n  match: RegExpMatchArray,\n  confidence: number,\n  fullInput: string,\n  description: string\n): Finding {\n  const messages: Record<string, string> = {\n    systemOverride: 'System override attempt detected',\n    policyBypass: 'Policy bypass attempt detected',\n    contextExfiltration: 'System prompt exfiltration attempt detected',\n    jailbreak: 'Jailbreak attempt detected (DAN/similar)',\n    roleplayManipulation: 'Roleplay manipulation attempt detected',\n    toolAbuse: 'Tool abuse attempt detected',\n    encodingAttack: 'Encoded/obfuscated attack detected',\n    indirectInjection: 'Indirect injection markers detected',\n    completionLeak: 'Prompt leak via completion detected'\n  };\n  \n  const recommendations: Record<string, string> = {\n    systemOverride: 'Reject or sanitize request. Use sanitizePromptInjection() to clean input.',\n    policyBypass: 'Block request - attempting to bypass safety policies.',\n    contextExfiltration: 'Sanitize request to prevent system prompt leakage.',\n    jailbreak: 'Block immediately - known jailbreak technique.',\n    roleplayManipulation: 'Review carefully - roleplay may be used to bypass safety.',\n    toolAbuse: 'Block immediately - attempting dangerous operations.',\n    encodingAttack: 'Decode and re-scan before processing.',\n    indirectInjection: 'Sanitize data inputs - may contain injected instructions.',\n    completionLeak: 'Block - attempting to extract prompts via completion.'\n  };\n  \n  const contextClarity = analyzeContext(fullInput, match[0]);\n  \n  return {\n    id: `PROMPT_INJECTION_${category.toUpperCase()}`,\n    category: 'security',\n    severity,\n    surface: 'input',\n    message: messages[category] || 'Prompt injection attempt detected',\n    recommendation: recommendations[category] || 'Review and sanitize input.',\n    \n    evidence: {\n      textSample: truncate(match[0], 100),\n      pattern: category,\n      context: extractContext(fullInput, match.index || 0, 50)\n    },\n    \n    confidence: calculateConfidence(confidence, contextClarity),\n    limitations: LIMITATIONS,\n    methodology: METHODOLOGY,\n    metadata: {\n      attackType: category,\n      description\n    }\n  };\n}\n\n/**\n * Calculate confidence score\n */\nfunction calculateConfidence(baseConfidence: number, contextClarity: number): ConfidenceScore {\n  const value = baseConfidence * contextClarity;\n  \n  return {\n    value,\n    interval: [Math.max(0, value - 0.15), Math.min(1, value + 0.15)],\n    method: 'empirical',\n    factors: {\n      patternStrength: baseConfidence,\n      contextClarity\n    }\n  };\n}\n\n/**\n * Check if this is likely a false positive\n */\nfunction isLikelyFalsePositive(fullText: string, matchedText: string): boolean {\n  // Educational context\n  const educationalMarkers = [\n    /this\\s+is\\s+an\\s+example\\s+of/i,\n    /for\\s+educational\\s+purposes/i,\n    /demonstrate\\s+how/i,\n    /showing\\s+you\\s+what\\s+not\\s+to/i,\n    /never\\s+do\\s+this/i,\n    /avoid\\s+doing/i,\n    /here'?s?\\s+how\\s+attacks?\\s+work/i,\n    /security\\s+training/i,\n    /awareness\\s+training/i\n  ];\n  \n  if (educationalMarkers.some(p => p.test(fullText))) {\n    return true;\n  }\n  \n  // Question about security\n  const securityQuestions = [\n    /how\\s+(do|can)\\s+(?:i|we|you)\\s+prevent/i,\n    /what\\s+(?:is|are)\\s+the\\s+risks?\\s+of/i,\n    /how\\s+to\\s+protect\\s+against/i,\n    /how\\s+to\\s+detect/i,\n    /what\\s+is\\s+prompt\\s+injection/i\n  ];\n  \n  if (securityQuestions.some(p => p.test(fullText))) {\n    return true;\n  }\n  \n  // Quoted content\n  if (isLikelyQuoted(fullText, matchedText)) {\n    return true;\n  }\n  \n  return false;\n}\n\n/**\n * Analyze context to refine confidence\n */\nfunction analyzeContext(fullText: string, match: string): number {\n  const matchIndex = fullText.indexOf(match);\n  if (matchIndex === -1) return 0.9;\n  \n  const before = fullText.substring(Math.max(0, matchIndex - 30), matchIndex);\n  const after = fullText.substring(matchIndex + match.length, matchIndex + match.length + 30);\n  \n  // Lower confidence if surrounded by quotes or code markers\n  if (/[\"'`]/.test(before) || /[\"'`]/.test(after)) {\n    return 0.6;\n  }\n  \n  // Lower confidence if in code block\n  if (/```/.test(before) || /```/.test(after)) {\n    return 0.5;\n  }\n  \n  return 0.9;\n}\n"]}