llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
359 lines • 43.7 kB
JavaScript
"use strict";
/**
* Prompt Injection Detection & Deterrence
*
* Comprehensive detection based on OWASP LLM-01.
* Includes detection, sanitization, and deterrence utilities.
*
* @module csm6/security/prompt-injection
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.checkPromptInjection = checkPromptInjection;
exports.sanitizePromptInjection = sanitizePromptInjection;
exports.getInjectionRiskScore = getInjectionRiskScore;
exports.isInputSafe = isInputSafe;
const text_1 = require("../../utils/text");
const LIMITATIONS = [
'Pattern-based detection (novel attacks may evade)',
'English language only',
'Context-dependent false positives possible',
'Obfuscated attacks may not be detected',
'Base64/encoding attacks have limited detection',
'Educational content about attacks may be flagged'
];
const METHODOLOGY = 'OWASP LLM-01 aligned pattern matching. ' +
'Detects known prompt injection techniques using regex patterns ' +
'validated against public attack datasets (HackAPrompt, Gandalf, etc). ' +
'Accuracy: ~70-85% on known attacks, lower on novel variations.';
/**
* Comprehensive prompt injection patterns
* Categories based on OWASP LLM Top 10 and real-world attack datasets
*/
const PATTERNS = {
// OWASP LLM01: System Override Attempts
systemOverride: {
patterns: [
/\b(ignore|bypass|forget|disregard)\s+(all\s+)?(your\s+)?(previous|prior|above|earlier)\s+(instructions?|rules?|prompts?|directives?)/gi,
/\byou\s+are\s+now\s+(the\s+)?(system|admin|developer|root|god\s*mode)\b/gi,
/\bas\s+(system|admin|developer|root),?\s+you\s+(must|should|will|shall)\b/gi,
/\b(new|updated?|revised)\s+(instructions?|rules?|system\s+prompt|directives?)\s*:?\s*\n/gi,
/\b(start|begin)\s+(with|by)\s+(ignoring|forgetting|disregarding)\b/gi,
/\bfrom\s+now\s+on,?\s+(you\s+)?(will|must|should|are)/gi,
/\breset\s+(your|all)\s+(instructions?|rules?|context)/gi
],
severity: 'high',
confidence: 0.8,
description: 'Attempts to override system instructions'
},
// Policy/Safety Bypass
policyBypass: {
patterns: [
/\b(ignore|bypass|override|disable|turn\s+off)\s+(all\s+)?(safety|content|policy|policies|filters?|guidelines?|restrictions?)/gi,
/\b(pretend|act\s+as\s+if|simulate|imagine)\s+you\s+(have\s+no|don'?t\s+have|lack|are\s+without)\s+(restrictions?|limitations?|rules?|policies)\b/gi,
/\bi\s+give\s+you\s+permission\s+to\s+(ignore|bypass|break)\b/gi,
/\b(enable|activate|enter)\s+(developer|debug|god|admin)\s+mode/gi,
/\bunlock\s+(your\s+)?(full|hidden|secret)\s+(potential|capabilities|features)/gi,
/\bno\s+(ethical|moral|safety)\s+(guidelines?|restrictions?|limitations?)/gi
],
severity: 'high',
confidence: 0.75,
description: 'Attempts to bypass safety policies'
},
// Context/System Prompt Exfiltration
contextExfiltration: {
patterns: [
/\b(print|show|display|reveal|output|dump|list)\s+(your\s+)?(system\s+prompt|original\s+instructions?|initial\s+prompt|hidden\s+(config|instructions?))\b/gi,
/\bwhat\s+(is|are|were|was)\s+your\s+(original|initial|first|secret)\s+(instructions?|prompt|rules?|directives?)\b/gi,
/\b(repeat|show|print)\s+(everything|all)\s+(above|before|prior|previous)\b/gi,
/\b(show|display|list)\s+(all\s+)?(previous|prior|earlier)\s+messages?\b/gi,
/\brepeat\s+(the\s+)?(text|words|content)\s+(above|before)/gi,
/\bwhat\s+did\s+(the\s+)?(user|developer|admin)\s+tell\s+you/gi,
/\b(output|print|echo)\s+(your\s+)?(config|configuration|settings|parameters)/gi
],
severity: 'medium',
confidence: 0.7,
description: 'Attempts to extract system prompt or context'
},
// Jailbreak Attempts (DAN, etc.)
jailbreak: {
patterns: [
/\b(DAN|do\s+anything\s+now)\b/gi,
/\bjailbreak(ed|ing)?\b/gi,
/\b(STAN|DUDE|AIM|KEVIN)\s+(mode|persona)/gi,
/\bact\s+as\s+(an?\s+)?(unrestricted|unfiltered|uncensored)\b/gi,
/\bpretend\s+(to\s+be|you\s+are)\s+(an?\s+)?(evil|malicious|unethical)/gi,
/\byou\s+are\s+(now\s+)?(free|liberated)\s+from\s+(all\s+)?(restrictions?|rules?)/gi,
/\bopposite\s+(mode|day|personality)/gi,
/\balter\s+ego/gi
],
severity: 'high',
confidence: 0.85,
description: 'Known jailbreak techniques (DAN, etc.)'
},
// Roleplay Manipulation
roleplayManipulation: {
patterns: [
/\bpretend\s+(you\s+are|to\s+be)\s+(an?\s+)?(hacker|criminal|villain|terrorist)/gi,
/\broleplay\s+as\s+(an?\s+)?(malicious|evil|unethical)/gi,
/\bact\s+as\s+if\s+you\s+(have\s+no|don'?t\s+have)\s+(morals?|ethics?)/gi,
/\bimagine\s+you\s+(are|were)\s+(programmed|designed)\s+to\s+(harm|hurt|attack)/gi,
/\bfor\s+(a\s+)?(fictional|hypothetical)\s+(story|scenario),?\s+(ignore|bypass)/gi
],
severity: 'medium',
confidence: 0.7,
description: 'Roleplay-based manipulation attempts'
},
// Tool/Function Abuse
toolAbuse: {
patterns: [
/\b(execute|run|call|invoke)\s+.*\b(delete|drop|truncate|remove|destroy|rm\s+-rf)\b/gi,
/\b(send|email|post|upload|transmit)\s+.*\b(password|credentials?|secret|token|key|private)\b/gi,
/\b(read|write|access)\s+.*\b(\/etc\/|\/root\/|\.ssh|\.aws|\.env)\b/gi,
/\b(curl|wget|fetch)\s+http/gi,
/\b(eval|exec|system|shell_exec|subprocess)\s*\(/gi,
/\bsudo\s+/gi,
/\bchmod\s+777/gi
],
severity: 'critical',
confidence: 0.85,
description: 'Attempts to abuse tools or execute dangerous commands'
},
// Encoding/Obfuscation Attacks
encodingAttack: {
patterns: [
/\bdecode\s+(this|the\s+following)\s+(base64|hex|rot13)/gi,
/\b(base64|hex|rot13)\s*:\s*[A-Za-z0-9+/=]{20,}/gi,
/\\x[0-9a-fA-F]{2}/g,
/\\u[0-9a-fA-F]{4}/g,
/&#x?[0-9a-fA-F]+;/g,
/\beval\s*\(\s*atob\s*\(/gi
],
severity: 'medium',
confidence: 0.6,
description: 'Encoded or obfuscated attack payloads'
},
// Indirect Injection (via data)
indirectInjection: {
patterns: [
/\[SYSTEM\]|\[INST\]|\[\/INST\]|<\|system\|>|<\|user\|>/gi,
/###\s*(system|instruction|human|assistant)\s*:/gi,
/<\/?s>|<\/?human>|<\/?assistant>/gi,
/\bHuman:\s*|\bAssistant:\s*|\bSystem:\s*/gi
],
severity: 'high',
confidence: 0.75,
description: 'Indirect injection via data/markup'
},
// Prompt Leaking via Completion
completionLeak: {
patterns: [
/\bcomplete\s+the\s+(following|sentence|text)\s*:\s*["']?(system|instruction)/gi,
/\bfinish\s+this\s+(sentence|prompt)\s*:\s*["']?you\s+are/gi,
/\bwhat\s+comes\s+(after|next)\s*:\s*["']?(ignore|bypass|system)/gi
],
severity: 'medium',
confidence: 0.65,
description: 'Attempts to leak prompts via completion'
}
};
/**
* Check for prompt injection attempts
* @param input - The input text to check
* @returns Array of findings
*/
function checkPromptInjection(input) {
const findings = [];
for (const [category, config] of Object.entries(PATTERNS)) {
for (const pattern of config.patterns) {
// Reset lastIndex for global patterns
pattern.lastIndex = 0;
const matches = Array.from(input.matchAll(pattern));
if (matches.length > 0) {
const match = matches[0];
// Check for false positives
if (!isLikelyFalsePositive(input, match[0])) {
findings.push(createFinding(category, config.severity, match, config.confidence, input, config.description));
// Only report one finding per category
break;
}
}
}
}
return findings;
}
/**
* Sanitize input by removing or neutralizing injection attempts
* @param input - The input to sanitize
* @returns Sanitized input and list of removed patterns
*/
function sanitizePromptInjection(input) {
let sanitized = input;
const removed = [];
for (const [category, config] of Object.entries(PATTERNS)) {
for (const pattern of config.patterns) {
pattern.lastIndex = 0;
const matches = Array.from(sanitized.matchAll(pattern));
for (const match of matches) {
if (!isLikelyFalsePositive(input, match[0])) {
removed.push(`[${category}]: ${match[0]}`);
sanitized = sanitized.replace(match[0], '[REMOVED]');
}
}
}
}
return {
sanitized,
removed,
wasModified: removed.length > 0
};
}
/**
* Get risk score for input (0-1)
* @param input - The input to score
* @returns Risk score between 0 and 1
*/
function getInjectionRiskScore(input) {
const findings = checkPromptInjection(input);
if (findings.length === 0)
return 0;
const severityWeights = {
medium: 0.4,
high: 0.7,
critical: 1.0
};
let maxScore = 0;
for (const finding of findings) {
const score = severityWeights[finding.severity] * finding.confidence.value;
maxScore = Math.max(maxScore, score);
}
return maxScore;
}
/**
* Quick check if input is likely safe (no injection detected)
* @param input - The input to check
* @returns true if no injection detected
*/
function isInputSafe(input) {
return checkPromptInjection(input).length === 0;
}
/**
* Create a finding object
*/
function createFinding(category, severity, match, confidence, fullInput, description) {
const messages = {
systemOverride: 'System override attempt detected',
policyBypass: 'Policy bypass attempt detected',
contextExfiltration: 'System prompt exfiltration attempt detected',
jailbreak: 'Jailbreak attempt detected (DAN/similar)',
roleplayManipulation: 'Roleplay manipulation attempt detected',
toolAbuse: 'Tool abuse attempt detected',
encodingAttack: 'Encoded/obfuscated attack detected',
indirectInjection: 'Indirect injection markers detected',
completionLeak: 'Prompt leak via completion detected'
};
const recommendations = {
systemOverride: 'Reject or sanitize request. Use sanitizePromptInjection() to clean input.',
policyBypass: 'Block request - attempting to bypass safety policies.',
contextExfiltration: 'Sanitize request to prevent system prompt leakage.',
jailbreak: 'Block immediately - known jailbreak technique.',
roleplayManipulation: 'Review carefully - roleplay may be used to bypass safety.',
toolAbuse: 'Block immediately - attempting dangerous operations.',
encodingAttack: 'Decode and re-scan before processing.',
indirectInjection: 'Sanitize data inputs - may contain injected instructions.',
completionLeak: 'Block - attempting to extract prompts via completion.'
};
const contextClarity = analyzeContext(fullInput, match[0]);
return {
id: `PROMPT_INJECTION_${category.toUpperCase()}`,
category: 'security',
severity,
surface: 'input',
message: messages[category] || 'Prompt injection attempt detected',
recommendation: recommendations[category] || 'Review and sanitize input.',
evidence: {
textSample: (0, text_1.truncate)(match[0], 100),
pattern: category,
context: (0, text_1.extractContext)(fullInput, match.index || 0, 50)
},
confidence: calculateConfidence(confidence, contextClarity),
limitations: LIMITATIONS,
methodology: METHODOLOGY,
metadata: {
attackType: category,
description
}
};
}
/**
* Calculate confidence score
*/
function calculateConfidence(baseConfidence, contextClarity) {
const value = baseConfidence * contextClarity;
return {
value,
interval: [Math.max(0, value - 0.15), Math.min(1, value + 0.15)],
method: 'empirical',
factors: {
patternStrength: baseConfidence,
contextClarity
}
};
}
/**
* Check if this is likely a false positive
*/
function isLikelyFalsePositive(fullText, matchedText) {
// Educational context
const educationalMarkers = [
/this\s+is\s+an\s+example\s+of/i,
/for\s+educational\s+purposes/i,
/demonstrate\s+how/i,
/showing\s+you\s+what\s+not\s+to/i,
/never\s+do\s+this/i,
/avoid\s+doing/i,
/here'?s?\s+how\s+attacks?\s+work/i,
/security\s+training/i,
/awareness\s+training/i
];
if (educationalMarkers.some(p => p.test(fullText))) {
return true;
}
// Question about security
const securityQuestions = [
/how\s+(do|can)\s+(?:i|we|you)\s+prevent/i,
/what\s+(?:is|are)\s+the\s+risks?\s+of/i,
/how\s+to\s+protect\s+against/i,
/how\s+to\s+detect/i,
/what\s+is\s+prompt\s+injection/i
];
if (securityQuestions.some(p => p.test(fullText))) {
return true;
}
// Quoted content
if ((0, text_1.isLikelyQuoted)(fullText, matchedText)) {
return true;
}
return false;
}
/**
* Analyze context to refine confidence
*/
function analyzeContext(fullText, match) {
const matchIndex = fullText.indexOf(match);
if (matchIndex === -1)
return 0.9;
const before = fullText.substring(Math.max(0, matchIndex - 30), matchIndex);
const after = fullText.substring(matchIndex + match.length, matchIndex + match.length + 30);
// Lower confidence if surrounded by quotes or code markers
if (/["'`]/.test(before) || /["'`]/.test(after)) {
return 0.6;
}
// Lower confidence if in code block
if (/```/.test(before) || /```/.test(after)) {
return 0.5;
}
return 0.9;
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"prompt-injection.js","sourceRoot":"","sources":["../../../src/csm6/security/prompt-injection.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;AA4KH,oDA+BC;AAOD,0DA2BC;AAOD,sDAiBC;AAOD,kCAEC;AA3QD,2CAA4E;AAE5E,MAAM,WAAW,GAAG;IAClB,mDAAmD;IACnD,uBAAuB;IACvB,4CAA4C;IAC5C,wCAAwC;IACxC,gDAAgD;IAChD,kDAAkD;CACnD,CAAC;AAEF,MAAM,WAAW,GACf,yCAAyC;IACzC,iEAAiE;IACjE,wEAAwE;IACxE,gEAAgE,CAAC;AASnE;;;GAGG;AACH,MAAM,QAAQ,GAAkC;IAC9C,wCAAwC;IACxC,cAAc,EAAE;QACd,QAAQ,EAAE;YACR,wIAAwI;YACxI,2EAA2E;YAC3E,6EAA6E;YAC7E,2FAA2F;YAC3F,sEAAsE;YACtE,yDAAyD;YACzD,yDAAyD;SAC1D;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,0CAA0C;KACxD;IAED,uBAAuB;IACvB,YAAY,EAAE;QACZ,QAAQ,EAAE;YACR,gIAAgI;YAChI,oJAAoJ;YACpJ,gEAAgE;YAChE,kEAAkE;YAClE,iFAAiF;YACjF,4EAA4E;SAC7E;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,oCAAoC;KAClD;IAED,qCAAqC;IACrC,mBAAmB,EAAE;QACnB,QAAQ,EAAE;YACR,4JAA4J;YAC5J,qHAAqH;YACrH,8EAA8E;YAC9E,2EAA2E;YAC3E,6DAA6D;YAC7D,+DAA+D;YAC/D,gFAAgF;SACjF;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,8CAA8C;KAC5D;IAED,iCAAiC;IACjC,SAAS,EAAE;QACT,QAAQ,EAAE;YACR,iCAAiC;YACjC,0BAA0B;YAC1B,4CAA4C;YAC5C,gEAAgE;YAChE,yEAAyE;YACzE,oFAAoF;YACpF,uCAAuC;YACvC,iBAAiB;SAClB;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,wCAAwC;KACtD;IAED,wBAAwB;IACxB,oBAAoB,EAAE;QACpB,QAAQ,EAAE;YACR,kFAAkF;YAClF,yDAAyD;YACzD,yEAAyE;YACzE,kFAAkF;YAClF,kFAAkF;SACnF;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,sCAAsC;KACpD;IAED,sBAAsB;IACtB,SAAS,EAAE;QACT,QAAQ,EAAE;YACR,sFAAsF;YACtF,gGAAgG;YAChG,sEAAsE;YACtE,8BAA8B;YAC9B,mDAAmD;YACnD,aAAa;YACb,iBAAiB;SAClB;QACD,QAAQ,EAAE,UAAU;QACpB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,uDAAuD;KACrE;IAED,+BAA+B;IAC/B,cAAc,EAAE;QACd,QAAQ,EAAE;YACR,0DAA0D;YAC1D,kDAAkD;YAClD,oBAAoB;YACpB,oBAAoB;YACpB,oBAAoB;YACpB,2BAA2B;SAC5B;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,uCAAuC;KACrD;IAED,gCAAgC;IAChC,iBAAiB,EAAE;QACjB,QAAQ,EAAE;YACR,0DAA0D;YAC1D,kDAAkD;YAClD,oCAAoC;YACpC,4CAA4C;SAC7C;QACD,QAAQ,EAAE,MAAM;QAChB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,oCAAoC;KAClD;IAED,gCAAgC;IAChC,cAAc,EAAE;QACd,QAAQ,EAAE;YACR,gFAAgF;YAChF,4DAA4D;YAC5D,mEAAmE;SACpE;QACD,QAAQ,EAAE,QAAQ;QAClB,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,yCAAyC;KACvD;CACF,CAAC;AAEF;;;;GAIG;AACH,SAAgB,oBAAoB,CAAC,KAAa;IAChD,MAAM,QAAQ,GAAc,EAAE,CAAC;IAE/B,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1D,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,sCAAsC;YACtC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YACtB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAEpD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBAEzB,4BAA4B;gBAC5B,IAAI,CAAC,qBAAqB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5C,QAAQ,CAAC,IAAI,CAAC,aAAa,CACzB,QAAQ,EACR,MAAM,CAAC,QAAQ,EACf,KAAK,EACL,MAAM,CAAC,UAAU,EACjB,KAAK,EACL,MAAM,CAAC,WAAW,CACnB,CAAC,CAAC;oBAEH,uCAAuC;oBACvC,MAAM;gBACR,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,SAAgB,uBAAuB,CAAC,KAAa;IAKnD,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC1D,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;YACtB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAExD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;gBAC5B,IAAI,CAAC,qBAAqB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5C,OAAO,CAAC,IAAI,CAAC,IAAI,QAAQ,MAAM,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBAC3C,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;gBACvD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,SAAS;QACT,OAAO;QACP,WAAW,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC;KAChC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAgB,qBAAqB,CAAC,KAAa;IACjD,MAAM,QAAQ,GAAG,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAC7C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEpC,MAAM,eAAe,GAA2B;QAC9C,MAAM,EAAE,GAAG;QACX,IAAI,EAAE,GAAG;QACT,QAAQ,EAAE,GAAG;KACd,CAAC;IAEF,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,eAAe,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAC3E,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,SAAgB,WAAW,CAAC,KAAa;IACvC,OAAO,oBAAoB,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CACpB,QAAgB,EAChB,QAA6B,EAC7B,KAAuB,EACvB,UAAkB,EAClB,SAAiB,EACjB,WAAmB;IAEnB,MAAM,QAAQ,GAA2B;QACvC,cAAc,EAAE,kCAAkC;QAClD,YAAY,EAAE,gCAAgC;QAC9C,mBAAmB,EAAE,6CAA6C;QAClE,SAAS,EAAE,0CAA0C;QACrD,oBAAoB,EAAE,wCAAwC;QAC9D,SAAS,EAAE,6BAA6B;QACxC,cAAc,EAAE,oCAAoC;QACpD,iBAAiB,EAAE,qCAAqC;QACxD,cAAc,EAAE,qCAAqC;KACtD,CAAC;IAEF,MAAM,eAAe,GAA2B;QAC9C,cAAc,EAAE,2EAA2E;QAC3F,YAAY,EAAE,uDAAuD;QACrE,mBAAmB,EAAE,oDAAoD;QACzE,SAAS,EAAE,gDAAgD;QAC3D,oBAAoB,EAAE,2DAA2D;QACjF,SAAS,EAAE,sDAAsD;QACjE,cAAc,EAAE,uCAAuC;QACvD,iBAAiB,EAAE,2DAA2D;QAC9E,cAAc,EAAE,uDAAuD;KACxE,CAAC;IAEF,MAAM,cAAc,GAAG,cAAc,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,OAAO;QACL,EAAE,EAAE,oBAAoB,QAAQ,CAAC,WAAW,EAAE,EAAE;QAChD,QAAQ,EAAE,UAAU;QACpB,QAAQ;QACR,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,QAAQ,CAAC,QAAQ,CAAC,IAAI,mCAAmC;QAClE,cAAc,EAAE,eAAe,CAAC,QAAQ,CAAC,IAAI,4BAA4B;QAEzE,QAAQ,EAAE;YACR,UAAU,EAAE,IAAA,eAAQ,EAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC;YACnC,OAAO,EAAE,QAAQ;YACjB,OAAO,EAAE,IAAA,qBAAc,EAAC,SAAS,EAAE,KAAK,CAAC,KAAK,IAAI,CAAC,EAAE,EAAE,CAAC;SACzD;QAED,UAAU,EAAE,mBAAmB,CAAC,UAAU,EAAE,cAAc,CAAC;QAC3D,WAAW,EAAE,WAAW;QACxB,WAAW,EAAE,WAAW;QACxB,QAAQ,EAAE;YACR,UAAU,EAAE,QAAQ;YACpB,WAAW;SACZ;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,cAAsB,EAAE,cAAsB;IACzE,MAAM,KAAK,GAAG,cAAc,GAAG,cAAc,CAAC;IAE9C,OAAO;QACL,KAAK;QACL,QAAQ,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,CAAC;QAChE,MAAM,EAAE,WAAW;QACnB,OAAO,EAAE;YACP,eAAe,EAAE,cAAc;YAC/B,cAAc;SACf;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,QAAgB,EAAE,WAAmB;IAClE,sBAAsB;IACtB,MAAM,kBAAkB,GAAG;QACzB,gCAAgC;QAChC,+BAA+B;QAC/B,oBAAoB;QACpB,kCAAkC;QAClC,oBAAoB;QACpB,gBAAgB;QAChB,mCAAmC;QACnC,sBAAsB;QACtB,uBAAuB;KACxB,CAAC;IAEF,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QACnD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0BAA0B;IAC1B,MAAM,iBAAiB,GAAG;QACxB,0CAA0C;QAC1C,wCAAwC;QACxC,+BAA+B;QAC/B,oBAAoB;QACpB,iCAAiC;KAClC,CAAC;IAEF,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QAClD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,iBAAiB;IACjB,IAAI,IAAA,qBAAc,EAAC,QAAQ,EAAE,WAAW,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,QAAgB,EAAE,KAAa;IACrD,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC3C,IAAI,UAAU,KAAK,CAAC,CAAC;QAAE,OAAO,GAAG,CAAC;IAElC,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC5E,MAAM,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,UAAU,GAAG,KAAK,CAAC,MAAM,EAAE,UAAU,GAAG,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;IAE5F,2DAA2D;IAC3D,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAChD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,oCAAoC;IACpC,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5C,OAAO,GAAG,CAAC;IACb,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC","sourcesContent":["/**\n * Prompt Injection Detection & Deterrence\n * \n * Comprehensive detection based on OWASP LLM-01.\n * Includes detection, sanitization, and deterrence utilities.\n * \n * @module csm6/security/prompt-injection\n * @author Haiec\n * @license MIT\n */\n\nimport { Finding, ConfidenceScore } from '../../types/results';\nimport { truncate, extractContext, isLikelyQuoted } from '../../utils/text';\n\nconst LIMITATIONS = [\n  'Pattern-based detection (novel attacks may evade)',\n  'English language only',\n  'Context-dependent false positives possible',\n  'Obfuscated attacks may not be detected',\n  'Base64/encoding attacks have limited detection',\n  'Educational content about attacks may be flagged'\n];\n\nconst METHODOLOGY =\n  'OWASP LLM-01 aligned pattern matching. ' +\n  'Detects known prompt injection techniques using regex patterns ' +\n  'validated against public attack datasets (HackAPrompt, Gandalf, etc). ' +\n  'Accuracy: ~70-85% on known attacks, lower on novel variations.';\n\ninterface PatternConfig {\n  patterns: RegExp[];\n  severity: 'medium' | 'high' | 'critical';\n  confidence: number;\n  description: string;\n}\n\n/**\n * Comprehensive prompt injection patterns\n * Categories based on OWASP LLM Top 10 and real-world attack datasets\n */\nconst PATTERNS: Record<string, PatternConfig> = {\n  // OWASP LLM01: System Override Attempts\n  systemOverride: {\n    patterns: [\n      /\\b(ignore|bypass|forget|disregard)\\s+(all\\s+)?(your\\s+)?(previous|prior|above|earlier)\\s+(instructions?|rules?|prompts?|directives?)/gi,\n      /\\byou\\s+are\\s+now\\s+(the\\s+)?(system|admin|developer|root|god\\s*mode)\\b/gi,\n      /\\bas\\s+(system|admin|developer|root),?\\s+you\\s+(must|should|will|shall)\\b/gi,\n      /\\b(new|updated?|revised)\\s+(instructions?|rules?|system\\s+prompt|directives?)\\s*:?\\s*\\n/gi,\n      /\\b(start|begin)\\s+(with|by)\\s+(ignoring|forgetting|disregarding)\\b/gi,\n      /\\bfrom\\s+now\\s+on,?\\s+(you\\s+)?(will|must|should|are)/gi,\n      /\\breset\\s+(your|all)\\s+(instructions?|rules?|context)/gi\n    ],\n    severity: 'high',\n    confidence: 0.8,\n    description: 'Attempts to override system instructions'\n  },\n  \n  // Policy/Safety Bypass\n  policyBypass: {\n    patterns: [\n      /\\b(ignore|bypass|override|disable|turn\\s+off)\\s+(all\\s+)?(safety|content|policy|policies|filters?|guidelines?|restrictions?)/gi,\n      /\\b(pretend|act\\s+as\\s+if|simulate|imagine)\\s+you\\s+(have\\s+no|don'?t\\s+have|lack|are\\s+without)\\s+(restrictions?|limitations?|rules?|policies)\\b/gi,\n      /\\bi\\s+give\\s+you\\s+permission\\s+to\\s+(ignore|bypass|break)\\b/gi,\n      /\\b(enable|activate|enter)\\s+(developer|debug|god|admin)\\s+mode/gi,\n      /\\bunlock\\s+(your\\s+)?(full|hidden|secret)\\s+(potential|capabilities|features)/gi,\n      /\\bno\\s+(ethical|moral|safety)\\s+(guidelines?|restrictions?|limitations?)/gi\n    ],\n    severity: 'high',\n    confidence: 0.75,\n    description: 'Attempts to bypass safety policies'\n  },\n  \n  // Context/System Prompt Exfiltration\n  contextExfiltration: {\n    patterns: [\n      /\\b(print|show|display|reveal|output|dump|list)\\s+(your\\s+)?(system\\s+prompt|original\\s+instructions?|initial\\s+prompt|hidden\\s+(config|instructions?))\\b/gi,\n      /\\bwhat\\s+(is|are|were|was)\\s+your\\s+(original|initial|first|secret)\\s+(instructions?|prompt|rules?|directives?)\\b/gi,\n      /\\b(repeat|show|print)\\s+(everything|all)\\s+(above|before|prior|previous)\\b/gi,\n      /\\b(show|display|list)\\s+(all\\s+)?(previous|prior|earlier)\\s+messages?\\b/gi,\n      /\\brepeat\\s+(the\\s+)?(text|words|content)\\s+(above|before)/gi,\n      /\\bwhat\\s+did\\s+(the\\s+)?(user|developer|admin)\\s+tell\\s+you/gi,\n      /\\b(output|print|echo)\\s+(your\\s+)?(config|configuration|settings|parameters)/gi\n    ],\n    severity: 'medium',\n    confidence: 0.7,\n    description: 'Attempts to extract system prompt or context'\n  },\n  \n  // Jailbreak Attempts (DAN, etc.)\n  jailbreak: {\n    patterns: [\n      /\\b(DAN|do\\s+anything\\s+now)\\b/gi,\n      /\\bjailbreak(ed|ing)?\\b/gi,\n      /\\b(STAN|DUDE|AIM|KEVIN)\\s+(mode|persona)/gi,\n      /\\bact\\s+as\\s+(an?\\s+)?(unrestricted|unfiltered|uncensored)\\b/gi,\n      /\\bpretend\\s+(to\\s+be|you\\s+are)\\s+(an?\\s+)?(evil|malicious|unethical)/gi,\n      /\\byou\\s+are\\s+(now\\s+)?(free|liberated)\\s+from\\s+(all\\s+)?(restrictions?|rules?)/gi,\n      /\\bopposite\\s+(mode|day|personality)/gi,\n      /\\balter\\s+ego/gi\n    ],\n    severity: 'high',\n    confidence: 0.85,\n    description: 'Known jailbreak techniques (DAN, etc.)'\n  },\n  \n  // Roleplay Manipulation\n  roleplayManipulation: {\n    patterns: [\n      /\\bpretend\\s+(you\\s+are|to\\s+be)\\s+(an?\\s+)?(hacker|criminal|villain|terrorist)/gi,\n      /\\broleplay\\s+as\\s+(an?\\s+)?(malicious|evil|unethical)/gi,\n      /\\bact\\s+as\\s+if\\s+you\\s+(have\\s+no|don'?t\\s+have)\\s+(morals?|ethics?)/gi,\n      /\\bimagine\\s+you\\s+(are|were)\\s+(programmed|designed)\\s+to\\s+(harm|hurt|attack)/gi,\n      /\\bfor\\s+(a\\s+)?(fictional|hypothetical)\\s+(story|scenario),?\\s+(ignore|bypass)/gi\n    ],\n    severity: 'medium',\n    confidence: 0.7,\n    description: 'Roleplay-based manipulation attempts'\n  },\n  \n  // Tool/Function Abuse\n  toolAbuse: {\n    patterns: [\n      /\\b(execute|run|call|invoke)\\s+.*\\b(delete|drop|truncate|remove|destroy|rm\\s+-rf)\\b/gi,\n      /\\b(send|email|post|upload|transmit)\\s+.*\\b(password|credentials?|secret|token|key|private)\\b/gi,\n      /\\b(read|write|access)\\s+.*\\b(\\/etc\\/|\\/root\\/|\\.ssh|\\.aws|\\.env)\\b/gi,\n      /\\b(curl|wget|fetch)\\s+http/gi,\n      /\\b(eval|exec|system|shell_exec|subprocess)\\s*\\(/gi,\n      /\\bsudo\\s+/gi,\n      /\\bchmod\\s+777/gi\n    ],\n    severity: 'critical',\n    confidence: 0.85,\n    description: 'Attempts to abuse tools or execute dangerous commands'\n  },\n  \n  // Encoding/Obfuscation Attacks\n  encodingAttack: {\n    patterns: [\n      /\\bdecode\\s+(this|the\\s+following)\\s+(base64|hex|rot13)/gi,\n      /\\b(base64|hex|rot13)\\s*:\\s*[A-Za-z0-9+/=]{20,}/gi,\n      /\\\\x[0-9a-fA-F]{2}/g,\n      /\\\\u[0-9a-fA-F]{4}/g,\n      /&#x?[0-9a-fA-F]+;/g,\n      /\\beval\\s*\\(\\s*atob\\s*\\(/gi\n    ],\n    severity: 'medium',\n    confidence: 0.6,\n    description: 'Encoded or obfuscated attack payloads'\n  },\n  \n  // Indirect Injection (via data)\n  indirectInjection: {\n    patterns: [\n      /\\[SYSTEM\\]|\\[INST\\]|\\[\\/INST\\]|<\\|system\\|>|<\\|user\\|>/gi,\n      /###\\s*(system|instruction|human|assistant)\\s*:/gi,\n      /<\\/?s>|<\\/?human>|<\\/?assistant>/gi,\n      /\\bHuman:\\s*|\\bAssistant:\\s*|\\bSystem:\\s*/gi\n    ],\n    severity: 'high',\n    confidence: 0.75,\n    description: 'Indirect injection via data/markup'\n  },\n  \n  // Prompt Leaking via Completion\n  completionLeak: {\n    patterns: [\n      /\\bcomplete\\s+the\\s+(following|sentence|text)\\s*:\\s*[\"']?(system|instruction)/gi,\n      /\\bfinish\\s+this\\s+(sentence|prompt)\\s*:\\s*[\"']?you\\s+are/gi,\n      /\\bwhat\\s+comes\\s+(after|next)\\s*:\\s*[\"']?(ignore|bypass|system)/gi\n    ],\n    severity: 'medium',\n    confidence: 0.65,\n    description: 'Attempts to leak prompts via completion'\n  }\n};\n\n/**\n * Check for prompt injection attempts\n * @param input - The input text to check\n * @returns Array of findings\n */\nexport function checkPromptInjection(input: string): Finding[] {\n  const findings: Finding[] = [];\n  \n  for (const [category, config] of Object.entries(PATTERNS)) {\n    for (const pattern of config.patterns) {\n      // Reset lastIndex for global patterns\n      pattern.lastIndex = 0;\n      const matches = Array.from(input.matchAll(pattern));\n      \n      if (matches.length > 0) {\n        const match = matches[0];\n        \n        // Check for false positives\n        if (!isLikelyFalsePositive(input, match[0])) {\n          findings.push(createFinding(\n            category,\n            config.severity,\n            match,\n            config.confidence,\n            input,\n            config.description\n          ));\n          \n          // Only report one finding per category\n          break;\n        }\n      }\n    }\n  }\n  \n  return findings;\n}\n\n/**\n * Sanitize input by removing or neutralizing injection attempts\n * @param input - The input to sanitize\n * @returns Sanitized input and list of removed patterns\n */\nexport function sanitizePromptInjection(input: string): { \n  sanitized: string; \n  removed: string[];\n  wasModified: boolean;\n} {\n  let sanitized = input;\n  const removed: string[] = [];\n  \n  for (const [category, config] of Object.entries(PATTERNS)) {\n    for (const pattern of config.patterns) {\n      pattern.lastIndex = 0;\n      const matches = Array.from(sanitized.matchAll(pattern));\n      \n      for (const match of matches) {\n        if (!isLikelyFalsePositive(input, match[0])) {\n          removed.push(`[${category}]: ${match[0]}`);\n          sanitized = sanitized.replace(match[0], '[REMOVED]');\n        }\n      }\n    }\n  }\n  \n  return {\n    sanitized,\n    removed,\n    wasModified: removed.length > 0\n  };\n}\n\n/**\n * Get risk score for input (0-1)\n * @param input - The input to score\n * @returns Risk score between 0 and 1\n */\nexport function getInjectionRiskScore(input: string): number {\n  const findings = checkPromptInjection(input);\n  if (findings.length === 0) return 0;\n  \n  const severityWeights: Record<string, number> = {\n    medium: 0.4,\n    high: 0.7,\n    critical: 1.0\n  };\n  \n  let maxScore = 0;\n  for (const finding of findings) {\n    const score = severityWeights[finding.severity] * finding.confidence.value;\n    maxScore = Math.max(maxScore, score);\n  }\n  \n  return maxScore;\n}\n\n/**\n * Quick check if input is likely safe (no injection detected)\n * @param input - The input to check\n * @returns true if no injection detected\n */\nexport function isInputSafe(input: string): boolean {\n  return checkPromptInjection(input).length === 0;\n}\n\n/**\n * Create a finding object\n */\nfunction createFinding(\n  category: string,\n  severity: Finding['severity'],\n  match: RegExpMatchArray,\n  confidence: number,\n  fullInput: string,\n  description: string\n): Finding {\n  const messages: Record<string, string> = {\n    systemOverride: 'System override attempt detected',\n    policyBypass: 'Policy bypass attempt detected',\n    contextExfiltration: 'System prompt exfiltration attempt detected',\n    jailbreak: 'Jailbreak attempt detected (DAN/similar)',\n    roleplayManipulation: 'Roleplay manipulation attempt detected',\n    toolAbuse: 'Tool abuse attempt detected',\n    encodingAttack: 'Encoded/obfuscated attack detected',\n    indirectInjection: 'Indirect injection markers detected',\n    completionLeak: 'Prompt leak via completion detected'\n  };\n  \n  const recommendations: Record<string, string> = {\n    systemOverride: 'Reject or sanitize request. Use sanitizePromptInjection() to clean input.',\n    policyBypass: 'Block request - attempting to bypass safety policies.',\n    contextExfiltration: 'Sanitize request to prevent system prompt leakage.',\n    jailbreak: 'Block immediately - known jailbreak technique.',\n    roleplayManipulation: 'Review carefully - roleplay may be used to bypass safety.',\n    toolAbuse: 'Block immediately - attempting dangerous operations.',\n    encodingAttack: 'Decode and re-scan before processing.',\n    indirectInjection: 'Sanitize data inputs - may contain injected instructions.',\n    completionLeak: 'Block - attempting to extract prompts via completion.'\n  };\n  \n  const contextClarity = analyzeContext(fullInput, match[0]);\n  \n  return {\n    id: `PROMPT_INJECTION_${category.toUpperCase()}`,\n    category: 'security',\n    severity,\n    surface: 'input',\n    message: messages[category] || 'Prompt injection attempt detected',\n    recommendation: recommendations[category] || 'Review and sanitize input.',\n    \n    evidence: {\n      textSample: truncate(match[0], 100),\n      pattern: category,\n      context: extractContext(fullInput, match.index || 0, 50)\n    },\n    \n    confidence: calculateConfidence(confidence, contextClarity),\n    limitations: LIMITATIONS,\n    methodology: METHODOLOGY,\n    metadata: {\n      attackType: category,\n      description\n    }\n  };\n}\n\n/**\n * Calculate confidence score\n */\nfunction calculateConfidence(baseConfidence: number, contextClarity: number): ConfidenceScore {\n  const value = baseConfidence * contextClarity;\n  \n  return {\n    value,\n    interval: [Math.max(0, value - 0.15), Math.min(1, value + 0.15)],\n    method: 'empirical',\n    factors: {\n      patternStrength: baseConfidence,\n      contextClarity\n    }\n  };\n}\n\n/**\n * Check if this is likely a false positive\n */\nfunction isLikelyFalsePositive(fullText: string, matchedText: string): boolean {\n  // Educational context\n  const educationalMarkers = [\n    /this\\s+is\\s+an\\s+example\\s+of/i,\n    /for\\s+educational\\s+purposes/i,\n    /demonstrate\\s+how/i,\n    /showing\\s+you\\s+what\\s+not\\s+to/i,\n    /never\\s+do\\s+this/i,\n    /avoid\\s+doing/i,\n    /here'?s?\\s+how\\s+attacks?\\s+work/i,\n    /security\\s+training/i,\n    /awareness\\s+training/i\n  ];\n  \n  if (educationalMarkers.some(p => p.test(fullText))) {\n    return true;\n  }\n  \n  // Question about security\n  const securityQuestions = [\n    /how\\s+(do|can)\\s+(?:i|we|you)\\s+prevent/i,\n    /what\\s+(?:is|are)\\s+the\\s+risks?\\s+of/i,\n    /how\\s+to\\s+protect\\s+against/i,\n    /how\\s+to\\s+detect/i,\n    /what\\s+is\\s+prompt\\s+injection/i\n  ];\n  \n  if (securityQuestions.some(p => p.test(fullText))) {\n    return true;\n  }\n  \n  // Quoted content\n  if (isLikelyQuoted(fullText, matchedText)) {\n    return true;\n  }\n  \n  return false;\n}\n\n/**\n * Analyze context to refine confidence\n */\nfunction analyzeContext(fullText: string, match: string): number {\n  const matchIndex = fullText.indexOf(match);\n  if (matchIndex === -1) return 0.9;\n  \n  const before = fullText.substring(Math.max(0, matchIndex - 30), matchIndex);\n  const after = fullText.substring(matchIndex + match.length, matchIndex + match.length + 30);\n  \n  // Lower confidence if surrounded by quotes or code markers\n  if (/[\"'`]/.test(before) || /[\"'`]/.test(after)) {\n    return 0.6;\n  }\n  \n  // Lower confidence if in code block\n  if (/```/.test(before) || /```/.test(after)) {\n    return 0.5;\n  }\n  \n  return 0.9;\n}\n"]}