langchain
Version:
Typescript bindings for langchain
436 lines (434 loc) • 14.2 kB
JavaScript
import { createMiddleware } from "../middleware.js";
import { AIMessage, HumanMessage, ToolMessage } from "@langchain/core/messages";
import { z } from "zod/v3";
import { sha256 } from "@langchain/core/utils/hash";
//#region src/agents/middleware/pii.ts
/**
* Error thrown when PII is detected and strategy is 'block'
*/
var PIIDetectionError = class extends Error {
constructor(piiType, matches) {
super(`PII detected: ${piiType} found ${matches.length} occurrence(s)`);
this.piiType = piiType;
this.matches = matches;
this.name = "PIIDetectionError";
}
};
/**
* Email detection regex pattern
*/
const EMAIL_PATTERN = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g;
/**
* Credit card detection regex pattern (basic, will be validated with Luhn)
*/
const CREDIT_CARD_PATTERN = /\b(?:\d{4}[-\s]?){3}\d{4}\b/g;
/**
* IP address detection regex pattern
*/
const IP_PATTERN = /\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g;
/**
* MAC address detection regex pattern
*/
const MAC_ADDRESS_PATTERN = /\b(?:[0-9A-Fa-f]{2}[:-]){5}(?:[0-9A-Fa-f]{2})\b/g;
/**
* URL detection regex pattern
*/
const URL_PATTERN = /(?:https?:\/\/|www\.)[^\s<>"{}|\\^`[\]]+/gi;
/**
* Luhn algorithm for credit card validation
*/
function luhnCheck(cardNumber) {
const digits = cardNumber.replace(/\D/g, "");
let sum = 0;
let isEven = false;
for (let i = digits.length - 1; i >= 0; i--) {
let digit = parseInt(digits[i], 10);
if (isEven) {
digit *= 2;
if (digit > 9) digit -= 9;
}
sum += digit;
isEven = !isEven;
}
return sum % 10 === 0;
}
/**
* Convert regex match to PIIMatch
*/
function regexMatchToPIIMatch(match) {
return {
text: match[0],
start: match.index ?? 0,
end: (match.index ?? 0) + match[0].length
};
}
/**
* Detect email addresses in content
*/
function detectEmail(content) {
const matches = [];
const regex = new RegExp(EMAIL_PATTERN);
let match;
while ((match = regex.exec(content)) !== null) matches.push(regexMatchToPIIMatch(match));
return matches;
}
/**
* Detect credit card numbers in content (validated with Luhn algorithm)
*/
function detectCreditCard(content) {
const matches = [];
const regex = new RegExp(CREDIT_CARD_PATTERN);
let match;
while ((match = regex.exec(content)) !== null) {
const cardNumber = match[0].replace(/\D/g, "");
if (cardNumber.length >= 13 && cardNumber.length <= 19 && luhnCheck(cardNumber)) matches.push(regexMatchToPIIMatch(match));
}
return matches;
}
/**
* Detect IP addresses in content (validated)
*/
function detectIP(content) {
const matches = [];
const regex = new RegExp(IP_PATTERN);
let match;
while ((match = regex.exec(content)) !== null) {
const ip = match[0];
const parts = ip.split(".");
if (parts.length === 4 && parts.every((part) => {
const num = parseInt(part, 10);
return num >= 0 && num <= 255;
})) matches.push(regexMatchToPIIMatch(match));
}
return matches;
}
/**
* Detect MAC addresses in content
*/
function detectMacAddress(content) {
const matches = [];
const regex = new RegExp(MAC_ADDRESS_PATTERN);
let match;
while ((match = regex.exec(content)) !== null) matches.push(regexMatchToPIIMatch(match));
return matches;
}
/**
* Detect URLs in content
*/
function detectUrl(content) {
const matches = [];
const regex = new RegExp(URL_PATTERN);
let match;
while ((match = regex.exec(content)) !== null) matches.push(regexMatchToPIIMatch(match));
return matches;
}
/**
* Built-in detector map
*/
const BUILT_IN_DETECTORS = {
email: detectEmail,
credit_card: detectCreditCard,
ip: detectIP,
mac_address: detectMacAddress,
url: detectUrl
};
/**
* Resolve a redaction rule to a concrete detector function
*/
function resolveRedactionRule(config) {
let detector;
if (config.detector) if (typeof config.detector === "string") {
const regex = new RegExp(config.detector, "g");
detector = (content) => {
const matches = [];
let match;
const regexCopy = new RegExp(regex);
while ((match = regexCopy.exec(content)) !== null) matches.push(regexMatchToPIIMatch(match));
return matches;
};
} else if (config.detector instanceof RegExp) detector = (content) => {
if (!(config.detector instanceof RegExp)) throw new Error("Detector is required");
const matches = [];
let match;
while ((match = config.detector.exec(content)) !== null) matches.push(regexMatchToPIIMatch(match));
return matches;
};
else detector = config.detector;
else {
const builtInType = config.piiType;
if (!BUILT_IN_DETECTORS[builtInType]) throw new Error(`Unknown PII type: ${config.piiType}. Must be one of: ${Object.keys(BUILT_IN_DETECTORS).join(", ")}, or provide a custom detector.`);
detector = BUILT_IN_DETECTORS[builtInType];
}
return {
piiType: config.piiType,
strategy: config.strategy,
detector
};
}
/**
* Apply redact strategy: replace with [REDACTED_TYPE]
*/
function applyRedactStrategy(content, matches, piiType) {
let result = content;
for (let i = matches.length - 1; i >= 0; i--) {
const match = matches[i];
const replacement = `[REDACTED_${piiType.toUpperCase()}]`;
result = result.slice(0, match.start) + replacement + result.slice(match.end);
}
return result;
}
/**
* Apply mask strategy: partially mask PII (show last few characters)
*/
function applyMaskStrategy(content, matches, piiType) {
let result = content;
for (let i = matches.length - 1; i >= 0; i--) {
const match = matches[i];
const text = match.text;
let masked;
if (piiType === "credit_card") {
const digits = text.replace(/\D/g, "");
const last4 = digits.slice(-4);
masked = `****-****-****-${last4}`;
} else if (piiType === "email") {
const [local, domain] = text.split("@");
if (local && domain) masked = `${local[0]}***@${domain}`;
else masked = "***";
} else {
const visibleChars = Math.min(4, text.length);
masked = `${"*".repeat(Math.max(0, text.length - visibleChars))}${text.slice(-visibleChars)}`;
}
result = result.slice(0, match.start) + masked + result.slice(match.end);
}
return result;
}
/**
* Apply hash strategy: replace with deterministic hash
*/
function applyHashStrategy(content, matches, piiType) {
let result = content;
for (let i = matches.length - 1; i >= 0; i--) {
const match = matches[i];
const hash = sha256(match.text).slice(0, 8);
const replacement = `<${piiType}_hash:${hash}>`;
result = result.slice(0, match.start) + replacement + result.slice(match.end);
}
return result;
}
/**
* Apply strategy to content based on matches
*/
function applyStrategy(content, matches, strategy, piiType) {
if (matches.length === 0) return content;
switch (strategy) {
case "block": throw new PIIDetectionError(piiType, matches);
case "redact": return applyRedactStrategy(content, matches, piiType);
case "mask": return applyMaskStrategy(content, matches, piiType);
case "hash": return applyHashStrategy(content, matches, piiType);
default: throw new Error(`Unknown strategy: ${strategy}`);
}
}
/**
* Configuration schema for PII middleware
*/
const contextSchema = z.object({
applyToInput: z.boolean().optional(),
applyToOutput: z.boolean().optional(),
applyToToolResults: z.boolean().optional()
});
/**
* Process content for PII detection and apply strategy
*/
function processContent(content, rule) {
const matches = rule.detector(content);
if (matches.length === 0) return {
content,
matches: []
};
const sanitized = applyStrategy(content, matches, rule.strategy, rule.piiType);
return {
content: sanitized,
matches
};
}
/**
* Creates a middleware that detects and handles personally identifiable information (PII)
* in conversations.
*
* This middleware detects common PII types and applies configurable strategies to handle them.
* It can detect emails, credit cards, IP addresses, MAC addresses, and URLs in both user input
* and agent output.
*
* Built-in PII types:
* - `email`: Email addresses
* - `credit_card`: Credit card numbers (validated with Luhn algorithm)
* - `ip`: IP addresses (validated)
* - `mac_address`: MAC addresses
* - `url`: URLs (both `http`/`https` and bare URLs)
*
* Strategies:
* - `block`: Raise an exception when PII is detected
* - `redact`: Replace PII with `[REDACTED_TYPE]` placeholders
* - `mask`: Partially mask PII (e.g., `****-****-****-1234` for credit card)
* - `hash`: Replace PII with deterministic hash (e.g., `<email_hash:a1b2c3d4>`)
*
* Strategy Selection Guide:
* | Strategy | Preserves Identity? | Best For |
* | -------- | ------------------- | --------------------------------------- |
* | `block` | N/A | Avoid PII completely |
* | `redact` | No | General compliance, log sanitization |
* | `mask` | No | Human readability, customer service UIs |
* | `hash` | Yes (pseudonymous) | Analytics, debugging |
*
* @param piiType - Type of PII to detect. Can be a built-in type (`email`, `credit_card`, `ip`, `mac_address`, `url`) or a custom type name.
* @param options - Configuration options
* @param options.strategy - How to handle detected PII. Defaults to `"redact"`.
* @param options.detector - Custom detector function or regex pattern string. If not provided, uses built-in detector for the `piiType`.
* @param options.applyToInput - Whether to check user messages before model call. Defaults to `true`.
* @param options.applyToOutput - Whether to check AI messages after model call. Defaults to `false`.
* @param options.applyToToolResults - Whether to check tool result messages after tool execution. Defaults to `false`.
*
* @returns Middleware instance for use with `createAgent`
*
* @throws {PIIDetectionError} When PII is detected and strategy is `'block'`
* @throws {Error} If `piiType` is not built-in and no detector is provided
*
* @example Basic usage
* ```typescript
* import { piiMiddleware } from "langchain";
* import { createAgent } from "langchain";
*
* // Redact all emails in user input
* const agent = createAgent({
* model: "openai:gpt-4",
* middleware: [
* piiMiddleware("email", { strategy: "redact" }),
* ],
* });
* ```
*
* @example Different strategies for different PII types
* ```typescript
* const agent = createAgent({
* model: "openai:gpt-4o",
* middleware: [
* piiMiddleware("credit_card", { strategy: "mask" }),
* piiMiddleware("url", { strategy: "redact" }),
* piiMiddleware("ip", { strategy: "hash" }),
* ],
* });
* ```
*
* @example Custom PII type with regex
* ```typescript
* const agent = createAgent({
* model: "openai:gpt-4",
* middleware: [
* piiMiddleware("api_key", {
* detector: "sk-[a-zA-Z0-9]{32}",
* strategy: "block",
* }),
* ],
* });
* ```
*
* @public
*/
function piiMiddleware(piiType, options = {}) {
const { strategy = "redact", detector } = options;
const resolvedRule = resolveRedactionRule({
piiType,
strategy,
detector
});
const middlewareName = `PIIMiddleware[${resolvedRule.piiType}]`;
return createMiddleware({
name: middlewareName,
contextSchema,
beforeModel: async (state, runtime) => {
const applyToInput = runtime.context.applyToInput ?? options.applyToInput ?? true;
const applyToToolResults = runtime.context.applyToToolResults ?? options.applyToToolResults ?? false;
if (!applyToInput && !applyToToolResults) return;
const messages = state.messages;
if (!messages || messages.length === 0) return;
const newMessages = [...messages];
let anyModified = false;
if (applyToInput) {
let lastUserIdx = null;
for (let i = messages.length - 1; i >= 0; i--) if (HumanMessage.isInstance(messages[i])) {
lastUserIdx = i;
break;
}
if (lastUserIdx !== null) {
const lastUserMsg = messages[lastUserIdx];
if (lastUserMsg && lastUserMsg.content) {
const content = String(lastUserMsg.content);
const { content: newContent, matches } = processContent(content, resolvedRule);
if (matches.length > 0) {
newMessages[lastUserIdx] = new HumanMessage({
content: newContent,
id: lastUserMsg.id,
name: lastUserMsg.name
});
anyModified = true;
}
}
}
}
if (applyToToolResults) {
let lastAiIdx = null;
for (let i = messages.length - 1; i >= 0; i--) if (AIMessage.isInstance(messages[i])) {
lastAiIdx = i;
break;
}
if (lastAiIdx !== null) for (let i = lastAiIdx + 1; i < messages.length; i++) {
const msg = messages[i];
if (ToolMessage.isInstance(msg)) {
if (!msg.content) continue;
const content = String(msg.content);
const { content: newContent, matches } = processContent(content, resolvedRule);
if (matches.length > 0) {
newMessages[i] = new ToolMessage({
content: newContent,
id: msg.id,
name: msg.name,
tool_call_id: msg.tool_call_id
});
anyModified = true;
}
}
}
}
if (anyModified) return { messages: newMessages };
},
afterModel: async (state, runtime) => {
const applyToOutput = runtime.context.applyToOutput ?? options.applyToOutput ?? false;
if (!applyToOutput) return;
const messages = state.messages;
if (!messages || messages.length === 0) return;
let lastAiIdx = null;
let lastAiMsg = null;
for (let i = messages.length - 1; i >= 0; i--) if (AIMessage.isInstance(messages[i])) {
lastAiMsg = messages[i];
lastAiIdx = i;
break;
}
if (lastAiIdx === null || !lastAiMsg || !lastAiMsg.content) return;
const content = String(lastAiMsg.content);
const { content: newContent, matches } = processContent(content, resolvedRule);
if (matches.length === 0) return;
const updatedMessage = new AIMessage({
content: newContent,
id: lastAiMsg.id,
name: lastAiMsg.name,
tool_calls: lastAiMsg.tool_calls
});
const newMessages = [...messages];
newMessages[lastAiIdx] = updatedMessage;
return { messages: newMessages };
}
});
}
//#endregion
export { PIIDetectionError, applyStrategy, detectCreditCard, detectEmail, detectIP, detectMacAddress, detectUrl, piiMiddleware, resolveRedactionRule };
//# sourceMappingURL=pii.js.map