langchain
Version:
Typescript bindings for langchain
332 lines (330 loc) • 12 kB
JavaScript
import { createMiddleware } from "../middleware.js";
import { AIMessage, HumanMessage, RemoveMessage, SystemMessage, ToolMessage } from "@langchain/core/messages";
import { z } from "zod/v3";
//#region src/agents/middleware/piiRedaction.ts
/**
* Configuration schema for the Input Guardrails middleware
*/
const contextSchema = z.object({ rules: z.record(z.string(), z.instanceof(RegExp).describe("Regular expression pattern to match PII")).optional() });
/**
* Generate a unique ID for a redaction
*/
function generateRedactionId() {
return Math.random().toString(36).substring(2, 11);
}
/**
* Apply PII detection rules to text with ID tracking
*/
function applyPIIRules(text, rules, redactionMap) {
let processedText = text;
for (const [name, pattern] of Object.entries(rules)) {
const replacement = name.toUpperCase().replace(/[^a-zA-Z0-9_-]/g, "");
processedText = processedText.replace(pattern, (match) => {
const id = generateRedactionId();
redactionMap[id] = match;
return `[REDACTED_${replacement}_${id}]`;
});
}
return processedText;
}
/**
* Process a single human message for PII detection and redaction
*/
async function processMessage(message, config) {
/**
* handle basic message types
*/
if (HumanMessage.isInstance(message) || ToolMessage.isInstance(message) || SystemMessage.isInstance(message)) {
const content = message.content;
const processedContent = await applyPIIRules(content, config.rules, config.redactionMap);
if (processedContent !== content) {
const MessageConstructor = Object.getPrototypeOf(message).constructor;
return new MessageConstructor({
...message,
content: processedContent
});
}
return message;
}
/**
* Handle AI messages
*/
if (AIMessage.isInstance(message)) {
const content = typeof message.content === "string" ? message.content : JSON.stringify(message.content);
const toolCalls = JSON.stringify(message.tool_calls);
const processedContent = await applyPIIRules(content, config.rules, config.redactionMap);
const processedToolCalls = await applyPIIRules(toolCalls, config.rules, config.redactionMap);
if (processedContent !== content || processedToolCalls !== toolCalls) return new AIMessage({
...message,
content: typeof message.content === "string" ? processedContent : JSON.parse(processedContent),
tool_calls: JSON.parse(processedToolCalls)
});
return message;
}
throw new Error(`Unsupported message type: ${message.type}`);
}
/**
* Restore original values from redacted text using the redaction map
*/
function restoreRedactedValues(text, redactionMap) {
let restoredText = text;
const redactionPattern = /\[REDACTED_[A-Z_]+_(\w+)\]/g;
restoredText = restoredText.replace(redactionPattern, (match, id) => {
if (redactionMap[id]) return redactionMap[id];
return match;
});
return restoredText;
}
/**
* Restore redacted values in a message (creates a new message object)
*/
function restoreMessage(message, redactionMap) {
/**
* handle basic message types
*/
if (HumanMessage.isInstance(message) || ToolMessage.isInstance(message) || SystemMessage.isInstance(message)) {
const content = message.content;
const restoredContent = restoreRedactedValues(content, redactionMap);
if (restoredContent !== content) {
const MessageConstructor = Object.getPrototypeOf(message).constructor;
const newMessage = new MessageConstructor({
...message,
content: restoredContent
});
return {
message: newMessage,
changed: true
};
}
return {
message,
changed: false
};
}
/**
* handle AI messages
*/
if (AIMessage.isInstance(message)) {
const content = typeof message.content === "string" ? message.content : JSON.stringify(message.content);
const toolCalls = JSON.stringify(message.tool_calls);
const processedContent = restoreRedactedValues(content, redactionMap);
const processedToolCalls = restoreRedactedValues(toolCalls, redactionMap);
if (processedContent !== content || processedToolCalls !== toolCalls) return {
message: new AIMessage({
...message,
content: typeof message.content === "string" ? processedContent : JSON.parse(processedContent),
tool_calls: JSON.parse(processedToolCalls)
}),
changed: true
};
return {
message,
changed: false
};
}
throw new Error(`Unsupported message type: ${message.type}`);
}
/**
* Creates a middleware that detects and redacts personally identifiable information (PII)
* from messages before they are sent to model providers, and restores original values
* in model responses for tool execution.
*
* ## Mechanism
*
* The middleware intercepts agent execution at two points:
*
* ### Request Phase (`wrapModelCall`)
* - Applies regex-based pattern matching to all message content (HumanMessage, ToolMessage, SystemMessage, AIMessage)
* - Processes both message text and AIMessage tool call arguments
* - Each matched pattern generates:
* - Unique identifier: `generateRedactionId()` → `"abc123"`
* - Redaction marker: `[REDACTED_{RULE_NAME}_{ID}]` → `"[REDACTED_SSN_abc123]"`
* - Redaction map entry: `{ "abc123": "123-45-6789" }`
* - Returns modified request with redacted message content
*
* ### Response Phase (`afterModel`)
* - Scans AIMessage responses for redaction markers matching pattern: `/\[REDACTED_[A-Z_]+_(\w+)\]/g`
* - Replaces markers with original values from redaction map
* - Handles both standard responses and structured output (via tool calls or JSON content)
* - For structured output, restores values in both the tool call arguments and the `structuredResponse` state field
* - Returns new message instances via RemoveMessage/AIMessage to update state
*
* ## Data Flow
*
* ```
* User Input: "My SSN is 123-45-6789"
* ↓ [beforeModel]
* Model Request: "My SSN is [REDACTED_SSN_abc123]"
* ↓ [model invocation]
* Model Response: tool_call({ "ssn": "[REDACTED_SSN_abc123]" })
* ↓ [afterModel]
* Tool Execution: tool({ "ssn": "123-45-6789" })
* ```
*
* ## Limitations
*
* This middleware provides model provider isolation only. PII may still be present in:
* - LangGraph state checkpoints (memory, databases)
* - Network traffic between client and application server
* - Application logs and trace data
* - Tool execution arguments and responses
* - Final agent output
*
* For comprehensive PII protection, implement additional controls at the application,
* network, and storage layers.
*
* @param options - Configuration options
* @param options.rules - Record of detection rules mapping rule names to regex patterns.
* Rule names are normalized to uppercase and used in redaction markers.
* Patterns must use the global flag (`/pattern/g`) to match all occurrences.
*
* @returns Middleware instance for use with `createAgent`
*
* @example Basic usage with custom rules
* ```typescript
* import { piiRedactionMiddleware } from "langchain";
* import { createAgent } from "langchain";
* import { tool } from "@langchain/core/tools";
* import { z } from "zod/v3";
*
* const PII_RULES = {
* ssn: /\b\d{3}-?\d{2}-?\d{4}\b/g,
* email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
* phone: /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/g,
* };
*
* const lookupUser = tool(async ({ ssn }) => {
* // Receives original value: "123-45-6789"
* return { name: "John Doe", account: "active" };
* }, {
* name: "lookup_user",
* description: "Look up user by SSN",
* schema: z.object({ ssn: z.string() })
* });
*
* const agent = createAgent({
* model: new ChatOpenAI({ model: "gpt-4" }),
* tools: [lookupUser],
* middleware: [piiRedactionMiddleware({ rules: PII_RULES })]
* });
*
* const result = await agent.invoke({
* messages: [new HumanMessage("Look up SSN 123-45-6789")]
* });
* // Model request: "Look up SSN [REDACTED_SSN_abc123]"
* // Model response: tool_call({ "ssn": "[REDACTED_SSN_abc123]" })
* // Tool receives: { "ssn": "123-45-6789" }
* ```
*
* @example Runtime rule configuration via context
* ```typescript
* const agent = createAgent({
* model: new ChatOpenAI({ model: "gpt-4" }),
* tools: [someTool],
* middleware: [piiRedactionMiddleware()]
* });
*
* // Configure rules at runtime via middleware context
* const result = await agent.invoke(
* { messages: [new HumanMessage("...")] },
* {
* configurable: {
* PIIRedactionMiddleware: {
* rules: {
* ssn: /\b\d{3}-?\d{2}-?\d{4}\b/g,
* email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
* }
* }
* }
* }
* );
* ```
*
* @example Custom rule patterns
* ```typescript
* const customRules = {
* employee_id: /EMP-\d{6}/g,
* api_key: /sk-[a-zA-Z0-9]{32}/g,
* credit_card: /\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b/g,
* };
*
* const middleware = piiRedactionMiddleware({ rules: customRules });
* // Generates markers like: [REDACTED_EMPLOYEE_ID_xyz789]
* ```
*
* @public
*/
function piiRedactionMiddleware(options = {}) {
const redactionMap = {};
return createMiddleware({
name: "PIIRedactionMiddleware",
contextSchema,
wrapModelCall: async (request, handler) => {
/**
* Merge options with context, following bigTool.ts pattern
*/
const rules = request.runtime.context.rules ?? options.rules ?? {};
/**
* If no rules are provided, skip processing
*/
if (Object.keys(rules).length === 0) return handler(request);
const processedMessages = await Promise.all(request.state.messages.map((message) => processMessage(message, {
rules,
redactionMap
})));
return handler({
...request,
messages: processedMessages
});
},
afterModel: async (state) => {
/**
* If no redactions were made, skip processing
*/
if (Object.keys(redactionMap).length === 0) return;
const lastMessage = state.messages.at(-1);
if (!AIMessage.isInstance(lastMessage)) return;
/**
* In cases where we do structured output via tool calls, we also have to look at the second last message
* as we add a custom last message to the messages array.
*/
const secondLastMessage = state.messages.at(-2);
const { message: restoredLastMessage, changed } = restoreMessage(lastMessage, redactionMap);
if (!changed) return;
/**
* Identify if the last message is a structured response and restore the values if so
*/
let structuredResponse;
if (AIMessage.isInstance(lastMessage) && lastMessage?.tool_calls?.length === 0 && typeof lastMessage.content === "string" && lastMessage.content.startsWith("{") && lastMessage.content.endsWith("}")) try {
structuredResponse = JSON.parse(restoreRedactedValues(lastMessage.content, redactionMap));
} catch {}
/**
* Check if the second last message is a structured response tool call
*/
const isStructuredResponseToolCall = AIMessage.isInstance(secondLastMessage) && secondLastMessage?.tool_calls?.length !== 0 && secondLastMessage?.tool_calls?.some((call) => call.name.startsWith("extract-"));
if (isStructuredResponseToolCall) {
const { message: restoredSecondLastMessage, changed: changedSecondLastMessage } = restoreMessage(secondLastMessage, redactionMap);
const structuredResponseRedacted = secondLastMessage.tool_calls?.find((call) => call.name.startsWith("extract-"))?.args;
const structuredResponse$1 = structuredResponseRedacted ? JSON.parse(restoreRedactedValues(JSON.stringify(structuredResponseRedacted), redactionMap)) : void 0;
if (changed || changedSecondLastMessage) return {
...state,
...structuredResponse$1 ? { structuredResponse: structuredResponse$1 } : {},
messages: [
new RemoveMessage({ id: secondLastMessage.id }),
new RemoveMessage({ id: lastMessage.id }),
restoredSecondLastMessage,
restoredLastMessage
]
};
}
return {
...state,
...structuredResponse ? { structuredResponse } : {},
messages: [new RemoveMessage({ id: lastMessage.id }), restoredLastMessage]
};
}
});
}
//#endregion
export { piiRedactionMiddleware };
//# sourceMappingURL=piiRedaction.js.map