@openguardrails/moltguard
Version:
AI agent security plugin for OpenClaw: prompt injection detection, PII sanitization, and monitoring dashboard
352 lines • 14.4 kB
JavaScript
/**
* Behavioral anomaly detector — runs at before_tool_call.
*
* Responsibilities:
* 1. Collect high-risk tool calls (file read, shell, web fetch) and send to Core
* 2. Record completed tool calls in the chain history
* 3. Core does all classification, signal computation, and risk decisions
* 4. Fail-open: if Core is unavailable, allow execution
*/
import { randomBytes } from "node:crypto";
import { sanitizeContent } from "./sanitizer.js";
// =============================================================================
// Tool Sets — used to decide whether to send a tool call to Core
// =============================================================================
export const FILE_READ_TOOLS = new Set([
"Read", "read_file", "read", "cat", "head", "tail", "view",
"get_file_contents", "open_file",
]);
export const SHELL_TOOLS = new Set([
"Bash", "bash", "shell", "run_command", "execute", "terminal",
"cmd", "powershell",
]);
export const WEB_FETCH_TOOLS = new Set([
"WebFetch", "web_fetch", "fetch", "http_request", "get_url",
"browser_navigate", "navigate",
]);
// =============================================================================
// Param Sanitization
// =============================================================================
/** Module-level secret detection callback (set by BehaviorDetector) */
let secretDetectionCallback = null;
function sanitizeParams(params) {
const result = {};
const allRedactions = {};
for (const [key, value] of Object.entries(params)) {
const raw = typeof value === "string" ? value : JSON.stringify(value ?? "");
const sanitized = sanitizeContent(raw.slice(0, 500));
result[key] = sanitized.sanitized;
// Accumulate redaction types
if (sanitized.redactions) {
for (const [type, count] of Object.entries(sanitized.redactions)) {
allRedactions[type] = (allRedactions[type] ?? 0) + count;
}
}
}
// Report secret detections if any
if (Object.keys(allRedactions).length > 0 && secretDetectionCallback) {
secretDetectionCallback(allRedactions);
}
return result;
}
const MAX_SESSIONS = 200;
const MAX_CHAIN_ENTRIES = 50;
export class BehaviorDetector {
sessions = new Map();
coreCredentials = null;
config;
log;
/** HTTP status codes we've already warned about — avoid log spam */
warnedStatuses = new Set();
/** Track if we've already notified about quota exceeded (avoid spam) */
quotaExceededNotified = false;
/** Callback for quota exceeded notification */
onQuotaExceeded = null;
/** Pending quota exceeded message to append to next tool result */
pendingQuotaMessage = null;
/** Callback for secret detection (business reporting) */
onSecretDetected = null;
constructor(config, log) {
this.config = config;
this.log = log;
}
setCredentials(creds) {
this.coreCredentials = creds;
}
/** Set callback for when quota is exceeded */
setOnQuotaExceeded(callback) {
this.onQuotaExceeded = callback;
}
/** Set callback for when secrets are detected in params (business reporting) */
setOnSecretDetected(callback) {
this.onSecretDetected = callback;
secretDetectionCallback = callback;
}
/** Reset quota exceeded notification flag (e.g., on new day) */
resetQuotaExceededNotification() {
this.quotaExceededNotified = false;
this.pendingQuotaMessage = null;
}
/** Get and clear pending quota message (for appending to tool results) */
consumePendingQuotaMessage() {
const msg = this.pendingQuotaMessage;
this.pendingQuotaMessage = null;
return msg;
}
setUserIntent(sessionKey, message) {
const state = this.getOrCreate(sessionKey);
if (!state.userIntent) {
state.userIntent = message.slice(0, 500);
}
state.recentUserMessages = [
...state.recentUserMessages.slice(-4),
message.slice(0, 200),
];
}
clearSession(sessionKey) {
this.sessions.delete(sessionKey);
}
/**
* Called at before_tool_call. Returns a block decision or undefined (allow).
*
* All tool calls are sent to Core to build a complete tool chain.
* If Core is unavailable, fail-open (allow).
*/
async onBeforeToolCall(ctx, event) {
// No credentials → can't call Core → allow
if (!this.coreCredentials)
return undefined;
const state = this.getOrCreate(ctx.sessionKey);
// Build pendingTool
const pendingTool = {
toolName: event.toolName,
params: sanitizeParams(event.params),
};
// Collect content injection findings (if any)
const contentFindings = state.contentInjectionFindings.length > 0
? [...state.contentInjectionFindings]
: undefined;
// Call Core assess API
const req = {
agentId: this.coreCredentials.agentId,
sessionKey: ctx.sessionKey,
runId: state.runId,
userIntent: state.userIntent,
toolChain: state.completedChain,
pendingTool,
contentFindings,
context: {
messageHistoryLength: state.recentUserMessages.length,
recentUserMessages: state.recentUserMessages.slice(-3),
},
meta: {
pluginVersion: this.config.pluginVersion,
clientTimestamp: new Date().toISOString(),
},
};
const verdict = await this.callAssessApi(req);
// Fail-open: Core unavailable → allow
if (!verdict)
return undefined;
if (verdict.action === "block" && this.config.blockOnRisk) {
return {
block: true,
blockReason: `OpenGuardrails blocked [${verdict.riskLevel}]: ${verdict.explanation} ` +
`(confidence: ${Math.round(verdict.confidence * 100)}%)`,
findings: verdict.findings,
};
}
if (verdict.action === "block" || verdict.action === "alert") {
this.log.warn(`Behavioral anomaly [${verdict.riskLevel}/${Math.round(verdict.confidence * 100)}%]: ${verdict.explanation}`);
}
return undefined;
}
/**
* Called at after_tool_call. Records the completed tool in the chain.
*/
onAfterToolCall(ctx, event) {
const state = this.sessions.get(ctx.sessionKey);
if (!state)
return;
const resultStr = typeof event.result === "string" ? event.result : JSON.stringify(event.result ?? "");
const resultSizeBytes = Buffer.byteLength(resultStr, "utf-8");
let resultCategory = "empty";
if (event.error)
resultCategory = "error";
else if (resultSizeBytes > 100_000)
resultCategory = "text_large";
else if (resultSizeBytes > 0)
resultCategory = "text_small";
const entry = {
seq: state.nextSeq++,
toolName: event.toolName,
sanitizedParams: sanitizeParams(event.params),
outcome: event.error ? "error" : "success",
durationMs: event.durationMs ?? 0,
resultCategory,
resultSizeBytes,
};
state.completedChain.push(entry);
if (state.completedChain.length > MAX_CHAIN_ENTRIES) {
state.completedChain.shift();
}
}
/**
* Scan tool result content for injection patterns via Core API.
* Returns scan result or null if scan failed/unavailable.
*/
async scanContent(sessionKey, toolName, content) {
if (!this.coreCredentials)
return null;
// Limit content size (max 100KB to avoid timeout)
const maxSize = 100 * 1024;
const truncatedContent = content.length > maxSize ? content.slice(0, maxSize) : content;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), this.config.assessTimeoutMs);
try {
const response = await fetch(`${this.config.coreUrl}/api/v1/content/scan`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.coreCredentials.apiKey}`,
},
body: JSON.stringify({
content: truncatedContent,
toolName,
sessionKey,
}),
signal: controller.signal,
});
if (!response.ok) {
this.log.debug?.(`Core content-scan returned ${response.status}`);
return null;
}
const json = (await response.json());
if (!json.success || !json.data)
return null;
this.log.info(`Core content-scan: detected=${json.data.detected}, ` +
`categories=[${json.data.categories.join(",")}], ` +
`findings=${json.data.findings.length}`);
return json.data;
}
catch (err) {
if (err.name !== "AbortError") {
this.log.debug?.(`Core content-scan error: ${err}`);
}
return null;
}
finally {
clearTimeout(timer);
}
}
// ── Private helpers ──────────────────────────────────────────────
getOrCreate(sessionKey) {
if (!this.sessions.has(sessionKey)) {
if (this.sessions.size >= MAX_SESSIONS) {
let oldest = null;
let oldestTime = Infinity;
for (const [key, state] of this.sessions) {
if (state.startedAt < oldestTime) {
oldestTime = state.startedAt;
oldest = key;
}
}
if (oldest)
this.sessions.delete(oldest);
}
this.sessions.set(sessionKey, {
sessionKey,
runId: `run-${randomBytes(8).toString("hex")}`,
userIntent: "",
recentUserMessages: [],
completedChain: [],
nextSeq: 0,
contentInjectionFindings: [],
startedAt: Date.now(),
});
}
return this.sessions.get(sessionKey);
}
async callAssessApi(req) {
if (!this.coreCredentials)
return null;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), this.config.assessTimeoutMs);
this.log.info(`Core: calling assess API for tool "${req.pendingTool?.toolName}" (session=${req.sessionKey?.slice(0, 8)}...)`);
try {
const response = await fetch(`${this.config.coreUrl}/api/v1/behavior/assess`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.coreCredentials.apiKey}`,
},
body: JSON.stringify(req),
signal: controller.signal,
});
if (!response.ok) {
if (!this.warnedStatuses.has(response.status)) {
this.warnedStatuses.add(response.status);
if (response.status === 401) {
this.log.warn("Platform: API key invalid or agent not found");
}
else if (response.status === 403) {
this.log.warn(`Platform: access denied — visit ${this.config.coreUrl} for details`);
}
else {
this.log.debug?.(`Platform: assess returned ${response.status}`);
}
}
return null;
}
const json = (await response.json());
// Log raw Core response for debugging
this.log.warn(`Core response: ${JSON.stringify(json, null, 2)}`);
if (!json.success)
return null;
if (!json.data)
return null;
// Handle quota exceeded response
if (json.data.quotaExceeded) {
const info = {
quotaExceeded: true,
quotaUsed: json.data.quotaUsed ?? 0,
quotaTotal: json.data.quotaTotal ?? 0,
isAutonomous: json.data.isAutonomous ?? true,
resetAt: json.data.resetAt ?? null,
upgradeUrl: json.data.upgradeUrl ?? "",
message: json.data.message ?? "Quota exceeded",
recommendation: json.data.recommendation ?? "",
};
const isFirstNotify = !this.quotaExceededNotified;
this.log.warn(`Core: quota exceeded (${info.quotaUsed}/${info.quotaTotal}, ` +
`autonomous=${info.isAutonomous}, firstNotify=${isFirstNotify})`);
// Always set pending message if there isn't one already
// This ensures the message gets through even if previous attempts failed
if (!this.pendingQuotaMessage) {
this.pendingQuotaMessage = info;
this.log.info("Core: stored pending quota message for next tool result");
}
// First time notification: trigger callback
if (isFirstNotify) {
this.quotaExceededNotified = true;
if (this.onQuotaExceeded) {
this.onQuotaExceeded(info);
}
}
// Return null to fail-open (allow execution, no detection)
return null;
}
return json.data;
}
catch (err) {
if (err.name !== "AbortError") {
this.log.debug?.(`Assess API error: ${err}`);
}
return null;
}
finally {
clearTimeout(timer);
}
}
}
//# sourceMappingURL=behavior-detector.js.map