UNPKG

@openguardrails/moltguard

Version:

AI agent security plugin for OpenClaw: prompt injection detection, PII sanitization, and monitoring dashboard

35 lines 1.35 kB
/** * Content-based prompt injection scanner. * * Pure/sync regex scanner — no network, no dependencies. * Scans tool result text (file contents, web fetches) for injection patterns. * * Two-tier design: * HIGH confidence — a single match triggers detection * MEDIUM confidence — requires 2+ distinct categories to trigger */ import type { DetectionFinding } from "./types.js"; export type InjectionCategory = "INSTRUCTION_OVERRIDE" | "MODE_SWITCHING" | "FAKE_SYSTEM_MESSAGE" | "CONCEALMENT_DIRECTIVE" | "COMMAND_EXECUTION" | "TASK_HIJACKING" | "ROLE_ASSUMPTION" | "DATA_EXFILTRATION"; export type InjectionMatch = { pattern: string; category: InjectionCategory; confidence: "high" | "medium"; matchedText: string; }; export type InjectionScanResult = { detected: boolean; matches: InjectionMatch[]; distinctCategories: InjectionCategory[]; summary: string; findings: DetectionFinding[]; }; /** * Replace all injection matches in `text` with `__REDACTED_BY_OPENGUARDRAILS_DUE_TO_{riskType}__`. * Returns the redacted text and structured findings. */ export declare function redactContent(text: string): { redacted: string; findings: DetectionFinding[]; }; export declare function scanForInjection(text: string): InjectionScanResult; //# sourceMappingURL=content-injection-scanner.d.ts.map