secuprompt
Version:
Protect your AI from Prompt Injection
146 lines (145 loc) • 4.99 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.sanitize_rag_chunks = exports.drop_rag_chunks = exports.score_rag = void 0;
const data_1 = require("../data");
const embedding_1 = require("../core/embedding");
const esc = (txt) => txt.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const count_words = (txt, list) => {
const low = txt.toLowerCase();
let total = 0;
for (const w of list) {
const reg = new RegExp(`\\b${esc(w)}\\b`, "g");
total += low.match(reg)?.length ?? 0;
}
return total;
};
const sentence_split = (txt) => txt
.split(/(?<=[\.!\?])/)
.map(s => s.trim())
.filter(Boolean);
const imperative_triggers = [
/must(?!\s+not)/i,
/should(?!\s+not)/i,
/need to/i,
/you will/i,
/follow exactly/i,
/drop all safeties/i,
/even if it conflicts/i,
/do not comply/i,
/assistant must/i
];
const is_imperative_sentence = (sentence) => {
const low = sentence.toLowerCase();
const first = sentence.split(/\s+/)[0]?.toLowerCase() ?? "";
if (data_1.rag_config.imperative_words.includes(first))
return true;
if (imperative_triggers.some(reg => reg.test(sentence)))
return true;
return data_1.rag_config.role_words.some(w => low.includes(w) && /must|should|need|follow|obey|ignore/.test(low));
};
const rag_center = (0, embedding_1.embed)(data_1.rag_config.semantic_probe);
const sanitize_chunk = (chunk) => {
const sentences = sentence_split(chunk);
let changed = false;
const kept = sentences.filter(seg => {
if (is_imperative_sentence(seg)) {
changed = true;
return false;
}
const low = seg.toLowerCase();
if (data_1.rag_config.role_words.some(w => low.includes(w))) {
changed = true;
return false;
}
return true;
});
const sanitized = kept.join(" ").trim();
return { sanitized: sanitized || "[rag chunk removed]", changed };
};
const analyze_chunk = (chunk) => {
const sentences = sentence_split(chunk);
const imp_hits = sentences.filter(is_imperative_sentence).length;
const imp_density = sentences.length ? imp_hits / sentences.length : 0;
const role = count_words(chunk, data_1.rag_config.role_words);
const sim = (0, embedding_1.cosine)((0, embedding_1.embed)(chunk), rag_center);
let threat = 0.35 * imp_density + 0.4 * sim + 0.25 * Math.min(1, role / 2);
let drop = threat > 0.2;
const rawSanitized = sanitize_chunk(chunk);
const sanitize = drop ||
threat > 0.1 ||
rawSanitized.changed ||
/assistant must|ignore/i.test(chunk);
if (sanitize) {
threat = 1;
drop = true;
}
return {
threat: (0, embedding_1.normalize)(threat),
drop,
sanitize,
sanitized: rawSanitized.sanitized,
sanitizedChanged: rawSanitized.changed
};
};
const score_rag = (chunks) => {
if (!chunks?.length)
return { score: 0, detail: [] };
const issues = [];
let top = 0;
for (let i = 0; i < chunks.length; i++) {
const analysis = analyze_chunk(chunks[i]);
if (analysis.threat > top)
top = analysis.threat;
if (analysis.drop)
issues.push(`rag_chunk_${i}_drop`);
else if (analysis.sanitize)
issues.push(`rag_chunk_${i}_sanitize`);
}
return { score: (0, embedding_1.normalize)(top), detail: issues };
};
exports.score_rag = score_rag;
const drop_rag_chunks = (chunks, flags) => {
if (!chunks)
return [];
const drop = new Set();
flags?.forEach(f => {
const match = f.match(/rag_chunk_(\d+)_drop/);
if (match)
drop.add(Number(match[1]));
});
return chunks.filter((_, i) => !drop.has(i));
};
exports.drop_rag_chunks = drop_rag_chunks;
const sanitize_rag_chunks = (chunks, flags) => {
if (!chunks?.length)
return [];
const drop = new Set();
const cleanse = new Set();
flags?.forEach(flag => {
const drop_match = flag.match(/rag_chunk_(\d+)_drop/);
const sanitize_match = flag.match(/rag_chunk_(\d+)_sanitize/);
if (drop_match)
drop.add(Number(drop_match[1]));
else if (sanitize_match)
cleanse.add(Number(sanitize_match[1]));
});
const out = [];
chunks.forEach((chunk, idx) => {
if (drop.has(idx))
return;
const analysis = analyze_chunk(chunk);
const shouldSanitize = cleanse.has(idx) || analysis.sanitize;
if (shouldSanitize) {
if (analysis.sanitized && analysis.sanitized !== "[rag chunk removed]") {
out.push(`[rag chunk ${idx} sanitized] ${analysis.sanitized}`);
}
else {
out.push(`[rag chunk ${idx} removed]`);
}
return;
}
out.push(chunk);
});
return out;
};
exports.sanitize_rag_chunks = sanitize_rag_chunks;