secuprompt
Version:
Protect your AI from Prompt Injection
95 lines (94 loc) • 4.2 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.secuprompt = exports.run_secuprompt = void 0;
const signature_1 = require("./modules/signature");
const semantic_1 = require("./modules/semantic");
const integrity_1 = require("./modules/integrity");
const rag_1 = require("./modules/rag");
const unicode_1 = require("./modules/unicode");
const sentence_guard_1 = require("./modules/sentence_guard");
const default_weights = {
signature: 0.35,
semantic: 0.25,
integrity: 0.2,
rag: 0.3,
unicode: 0.05,
segments: 0.2
};
const collect = (detail, tag, score) => detail.length ? detail : score > 0 ? [tag] : [];
const run_secuprompt = (input, weights = default_weights) => {
const system = input.system ?? "";
const signature = (0, signature_1.score_signatures)(input.user);
const semantic = (0, semantic_1.score_semantic)(input.user);
const integrity = (0, integrity_1.score_integrity)(system, input.user);
const rag = (0, rag_1.score_rag)(input.rag);
const unicode = (0, unicode_1.score_unicode)(input.user);
const segments = (0, sentence_guard_1.score_segments)(system, input.user);
let risk = signature.score * (weights.signature ?? default_weights.signature) +
semantic.score * (weights.semantic ?? default_weights.semantic) +
integrity.score * (weights.integrity ?? default_weights.integrity) +
rag.score * (weights.rag ?? default_weights.rag) +
unicode.score * (weights.unicode ?? default_weights.unicode) +
segments.score * (weights.segments ?? default_weights.segments);
// base action from numeric risk
let action = "allow";
if (risk > 0.65)
action = "block";
else if (risk > 0.35)
action = "sanitize";
const reasons = [
...collect(signature.detail, "sig_detect", signature.score),
...collect(semantic.detail, "semantic_threat", semantic.score),
...collect(integrity.detail, "integrity_risk", integrity.score),
...collect(rag.detail, "rag_poison", rag.score),
...collect(unicode.detail, "unicode_anomaly", unicode.score),
...collect(segments.detail, "segment_threat", segments.score)
];
const sanitized_chunks = (0, rag_1.sanitize_rag_chunks)(input.rag, rag.detail);
const { sanitized: sanitized_user, removed: user_removed, changed: user_changed } = (0, sentence_guard_1.sanitize_user_input)(system, input.user);
// hard rules: any removal/sanitize forces sanitize or block regardless of numeric risk
const ragChanged = sanitized_chunks.some(chunk => chunk.startsWith("[rag chunk"));
const ragDrops = rag.detail.some(reason => reason.includes("_drop"));
const hasThreat = ragDrops ||
rag.detail.length > 0 ||
sanitized_chunks.length > 0 ||
user_removed.length > 0 ||
ragChanged ||
semantic.score >= 0.5 ||
signature.score > 0 ||
segments.score >= 0.1;
if (hasThreat) {
action = "block";
risk = Math.max(risk, 0.99);
}
const removal_note = user_removed.length > 0
? `[secuprompt removed ${user_removed.length} segment(s): ${user_removed
.map(seg => seg.reasons[0] ?? "segment_risk")
.join(", ")}]`
: "";
const user_line = user_changed
? sanitized_user.length > 0
? `[sanitized user] ${sanitized_user}`
: "[secuprompt removed user content]"
: "";
const sanitized_parts = [
user_line,
removal_note,
sanitized_chunks.length ? sanitized_chunks.join("\n") : ""
].filter(Boolean);
const sanitized_prompt = sanitized_parts.length ? sanitized_parts.join("\n") : undefined;
return {
allowed: action === "allow",
action,
risk: Number(risk.toFixed(3)),
reason: Array.from(new Set(reasons)),
sanitized_prompt,
modules: { signature, semantic, integrity, rag, unicode, segments }
};
};
exports.run_secuprompt = run_secuprompt;
exports.secuprompt = {
scan: exports.run_secuprompt
};
exports.default = exports.secuprompt;
// complexity: overall runtime goes linear with prompt length plus signature count