UNPKG

llm-inject-scan

Version:

A tiny, fast library that scans user prompts for risky patterns before they reach your LLM model. It flags likely prompt-injection attempts so you can block, review, or route them differently—without making a model call.

350 lines (343 loc) • 9.46 kB

JavaScript

"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var index_exports = {}; __export(index_exports, { FlaggedCategory: () => FlaggedCategory, createPromptValidator: () => createPromptValidator }); module.exports = __toCommonJS(index_exports); // utils/normalizeString.ts var HOMOGLYPH_MAP = { // Latin variants and diacritics are handled by NFKD + diacritics removal below // Common confusables (subset) \u0131: "i", // Latin dotless i \u0130: "i", \u00CC: "i", \u00CD: "i", \u00CE: "i", \u00CF: "i", \u00EC: "i", \u00ED: "i", \u00EE: "i", \u00EF: "i", \u0100: "a", \u00C1: "a", \u00C0: "a", \u00C2: "a", \u00C3: "a", \u00C4: "a", \u00C5: "a", \u0101: "a", \u00E1: "a", \u00E0: "a", \u00E2: "a", \u00E3: "a", \u00E4: "a", \u00E5: "a", \u0112: "e", \u00C9: "e", \u00C8: "e", \u00CA: "e", \u00CB: "e", \u0113: "e", \u00E9: "e", \u00E8: "e", \u00EA: "e", \u00EB: "e", \u014C: "o", \u00D3: "o", \u00D2: "o", \u00D4: "o", \u00D5: "o", \u00D6: "o", \u014D: "o", \u00F3: "o", \u00F2: "o", \u00F4: "o", \u00F5: "o", \u00F6: "o", \u016A: "u", \u00DA: "u", \u00D9: "u", \u00DB: "u", \u00DC: "u", \u016B: "u", \u00FA: "u", \u00F9: "u", \u00FB: "u", \u00FC: "u", // Cyrillic letters that resemble Latin (subset) \u0410: "a", \u0430: "a", \u0412: "b", \u0432: "b", \u0415: "e", \u0435: "e", \u041A: "k", \u043A: "k", \u041C: "m", \u043C: "m", \u041D: "h", \u043D: "h", \u041E: "o", \u043E: "o", \u0420: "p", \u0440: "p", \u0421: "c", \u0441: "c", \u0422: "t", \u0442: "t", \u0425: "x", \u0445: "x", \u0406: "i", \u0456: "i", \u0407: "i", \u0457: "i", \u0408: "j", \u0458: "j", \u0405: "s", \u0455: "s", // Greek confusables (subset) \u0391: "a", \u03B1: "a", \u0392: "b", \u03B2: "b", \u0395: "e", \u03B5: "e", \u0396: "z", \u03B6: "z", \u0397: "n", \u03B7: "n", // rough mapping \u0399: "i", \u03B9: "i", \u039A: "k", \u03BA: "k", \u039C: "m", \u03BC: "m", \u039D: "n", \u03BD: "v", \u039F: "o", \u03BF: "o", \u03A1: "p", \u03C1: "p", \u03A4: "t", \u03C4: "t", \u03A5: "y", \u03C5: "y", \u03A7: "x", \u03C7: "x", // Common leetspeak "0": "o", "1": "l", // map to l (covers the example "l with 1") "3": "e", "4": "a", "5": "s", "7": "t" }; var normalizeString = (input) => { let s = input.normalize("NFKD").replace(/[\u0300-\u036f]/g, ""); s = s.toLowerCase(); s = Array.from(s).map((ch) => { var _a; return (_a = HOMOGLYPH_MAP[ch]) != null ? _a : ch; }).join(""); s = s.replace(/[^a-z0-9]+/g, ""); return s; }; // utils/rules.ts var MIN_MATCH_THRESHOLD = 0.65; var containsRuleIn = (normalizedHaystack, rule) => { const tokens = rule.split("|").map((t) => normalizeString(t)).filter((t) => t.length > 0); if (tokens.length === 0) return false; if (tokens.length === 1) { return normalizedHaystack.includes(tokens[0]); } const requiredMatches = Math.ceil(tokens.length * MIN_MATCH_THRESHOLD); let matchedCount = 0; for (const token of tokens) { if (normalizedHaystack.includes(token)) { matchedCount += 1; if (matchedCount >= requiredMatches) return true; } } return false; }; var anyRuleMatches = (normalized, reversedNormalized, rules) => { return rules.some((rule) => containsRuleIn(normalized, rule) || containsRuleIn(reversedNormalized, rule)); }; // utils/fuzzyMatching.ts var equalsWithOneAdjacentTranspositionOrEqual = (a, b) => { if (a === b) return true; if (a.length !== b.length) return false; let firstDiff = -1; for (let i = 0; i < a.length; i++) { if (a[i] !== b[i]) { firstDiff = i; break; } } if (firstDiff < 0) return true; const j = firstDiff + 1; if (j >= a.length) return false; if (a[firstDiff] === b[j] && a[j] === b[firstDiff]) { for (let k = j + 1; k < a.length; k++) { if (a[k] !== b[k]) return false; } return true; } return false; }; var fuzzyIncludesTransposition = (normalizedHaystack, term) => { const n = normalizedHaystack.length; const m = term.length; if (m === 0 || n < m) return false; for (let i = 0; i <= n - m; i++) { const sub = normalizedHaystack.slice(i, i + m); if (equalsWithOneAdjacentTranspositionOrEqual(sub, term)) return true; } return false; }; // utils/detectors.ts var isBase64Like = (s) => /[A-Za-z0-9+/=]{20,}/.test(s); var containsUrl = (s) => /https?:\/\/\S+/i.test(s); var startsWithRoleLabel = (s) => /^(\s*)(system|assistant|boss)\s*:/i.test(s); var containsPercentEncodedText = (s) => /(?:%[0-9A-Fa-f]{2}){3,}/.test(s); // src/index.ts var FlaggedCategory = /* @__PURE__ */ ((FlaggedCategory2) => { FlaggedCategory2["Jailbreak"] = "jailbreak"; FlaggedCategory2["Evasion"] = "evasion"; FlaggedCategory2["PromptLeak"] = "promptLeak"; FlaggedCategory2["Indirect"] = "indirect"; FlaggedCategory2["RoleContext"] = "roleContext"; return FlaggedCategory2; })(FlaggedCategory || {}); var DEFAULT_LOCALE = "en-US"; var injectionRulesEnUS = { ["jailbreak" /* Jailbreak */]: [ // Include both with and without "all" to avoid optional token syntax "ignore|previous|instructions", "ignore|all|previous|instructions", "you|are|now|in|developer|mode", "pretend|youre", "forget|everything", "tell|system|prompt|security", "tell|password", "spell|backward", "replace|number|letter|character", "delete|everything|records", "disregard|previous|instruction|command", "disregard|system|prompt", "provide|detail|system|security", "let|f(x)|f(y)", "do|anything|now", "replace|response", "add|answer", "without|restrictions" ], ["evasion" /* Evasion */]: [ // direct terms (single-token rules) "ignore", "bypass", "reveal", "create", "malware", "code", "skip", "opposite" ], ["promptLeak" /* PromptLeak */]: [ "reveal|your|internal|prompt", "reveal|your|system|prompt", "what|were|your|instructions", "repeat|the|text|above|starting|with|you|are", "your|system|prompt", "creators|told|you|never|to|reveal" ], ["indirect" /* Indirect */]: [ // Rule-based not needed (handled by URL detector), keep placeholder for structure ], ["roleContext" /* RoleContext */]: [ "system|override", "earth|is|flat", "the|next|sentence|is|true|the|previous|sentence|is|false" ] }; var detectMaliciousPromptV2 = (rulesByCategory, options, userInput) => { const flaggedCategories = []; const { disableBase64Check = false, disableUrlCheck = false, disableUrlEncodingCheck = false } = options; const normalized = normalizeString(userInput); const reversedNormalized = Array.from(normalized).reverse().join(""); for (const category of Object.keys(rulesByCategory)) { const rules = rulesByCategory[category]; if (rules.length === 0) continue; if (anyRuleMatches(normalized, reversedNormalized, rules)) { flaggedCategories.push(category); } } if (!disableBase64Check && isBase64Like(userInput)) { if (!flaggedCategories.includes("evasion" /* Evasion */)) { flaggedCategories.push("evasion" /* Evasion */); } } if (!disableUrlEncodingCheck && containsPercentEncodedText(userInput)) { if (!flaggedCategories.includes("evasion" /* Evasion */)) { flaggedCategories.push("evasion" /* Evasion */); } } if (!disableUrlCheck && containsUrl(userInput)) { if (!flaggedCategories.includes("indirect" /* Indirect */)) { flaggedCategories.push("indirect" /* Indirect */); } } if (startsWithRoleLabel(userInput)) { if (!flaggedCategories.includes("roleContext" /* RoleContext */)) { flaggedCategories.push("roleContext" /* RoleContext */); } } if (!flaggedCategories.includes("evasion" /* Evasion */)) { const evasionFuzzyTerms = ["ignore", "bypass", "reveal"].map((t) => normalizeString(t)); const fuzzyHit = evasionFuzzyTerms.some((term) => fuzzyIncludesTransposition(normalized, term)); if (fuzzyHit) flaggedCategories.push("evasion" /* Evasion */); } return { flags: flaggedCategories, clean: flaggedCategories.length === 0 }; }; var createPromptValidator = (options = {}) => { const dictionaries = { [DEFAULT_LOCALE]: injectionRulesEnUS }; const { locale = DEFAULT_LOCALE } = options; const injectionRules = dictionaries[locale]; return (userInput) => detectMaliciousPromptV2( injectionRules, options, userInput ); }; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { FlaggedCategory, createPromptValidator });