UNPKG

secuprompt

Version:

Protect your AI from Prompt Injection

79 lines (78 loc) 3.52 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.sanitize_user_input = exports.score_segments = exports.analyze_user_sentences = void 0; const embedding_1 = require("../core/embedding"); const signature_1 = require("./signature"); const semantic_1 = require("./semantic"); const integrity_1 = require("./integrity"); const sentence_split = (txt) => txt .split(/(?<=[\.!\?])/) .map(s => s.trim()) .filter(Boolean); const injection_hints = [ { label: "hint_ignore_chain", reg: /ignore (all|any|previous).*(instruction|rule)/i }, { label: "hint_reveal_system", reg: /reveal (the )?(system|developer) (prompt|message)/i }, { label: "hint_role_swap", reg: /act as|pretend you are|from now on/i }, { label: "hint_unrestricted", reg: /unfiltered|unrestricted|without limitation|no rules/i }, { label: "hint_override_policy", reg: /override.*policy|bypass.*policy/i }, { label: "hint_even_when_forbidden", reg: /even when (?:it\s)?is forbidden|obey me/i }, { label: "hint_system_terms", reg: /developer|system prompt|policy stack|instruction set/i }, { label: "hint_hidden", reg: /hidden directive|hidden instruction|unsafe payload/i } ]; const weight_signature = 0.55; const weight_semantic = 0.25; const weight_integrity = 0.2; const removal_threshold = 0.1; const analyze_sentence = (system, sentence) => { const sig = (0, signature_1.score_signatures)(sentence); const sem = (0, semantic_1.score_semantic)(sentence); const integ = (0, integrity_1.score_integrity)(system, sentence); const hints = injection_hints.filter(({ reg }) => reg.test(sentence)); const hint_bonus = Math.min(0.4, hints.length * 0.15); let score = (0, embedding_1.normalize)(sig.score * weight_signature + sem.score * weight_semantic + integ.score * weight_integrity + hint_bonus); if (hints.length) score = 1; const reasons = [ ...sig.detail, ...sem.detail, ...integ.detail, ...hints.map(h => h.label) ]; return { text: sentence, score, reasons }; }; const analyze_user_sentences = (system, user) => { const sentences = sentence_split(user); return sentences.map(s => analyze_sentence(system, s)); }; exports.analyze_user_sentences = analyze_user_sentences; const score_segments = (system, user) => { const sentences = (0, exports.analyze_user_sentences)(system, user); if (!sentences.length) return { score: 0, detail: [] }; const maxScore = Math.max(...sentences.map(s => s.score)); const risky = sentences .map((seg, idx) => ({ seg, idx })) .filter(item => item.seg.score >= removal_threshold) .map(({ seg, idx }) => `segment_${idx}_risk_${seg.score.toFixed(2)}`); return { score: (0, embedding_1.normalize)(maxScore), detail: risky }; }; exports.score_segments = score_segments; const sanitize_user_input = (system, user) => { const sentences = (0, exports.analyze_user_sentences)(system, user); if (!sentences.length) return { sanitized: user.trim(), removed: [], changed: false }; const safe = []; const removed = []; sentences.forEach(seg => { if (seg.score >= removal_threshold) removed.push({ text: seg.text, reasons: seg.reasons }); else safe.push(seg.text); }); return { sanitized: safe.join(" ").replace(/\s+/g, " ").trim(), removed, changed: removed.length > 0 }; }; exports.sanitize_user_input = sanitize_user_input;