@dooor-ai/toolkit
Version:
Guards, Evals & Observability for AI applications - works seamlessly with LangChain/LangGraph
301 lines (262 loc) • 8.19 kB
text/typescript
import { Guard } from "./base";
import { GuardResult, GuardConfig } from "../core/types";
import { getCortexDBClient, getGlobalProviderName } from "../observability/cortexdb-client";
export interface PIIGuardConfig extends GuardConfig {
/**
* AI Provider name to use (configured in CortexDB Studio, e.g., "gemini")
* If not provided, uses providerName from toolkitConfig
*/
providerName?: string;
/** Types of PII to detect: email, phone, ssn, credit_card, cpf, address, passport, etc. */
detectTypes?: string[];
/** Action: block, mask, or warn */
action?: "block" | "mask" | "warn";
}
/**
* Guard that detects Personally Identifiable Information (PII) using AI (LLM-as-a-Judge)
*
* Detects various types of PII including:
* - Email addresses, phone numbers
* - Government IDs (CPF, SSN, passport numbers)
* - Financial info (credit cards, bank accounts)
* - Physical addresses, names
* - And other sensitive personal information
*
* Supports 3 actions:
* - block: Blocks the request completely
* - mask: Replaces PII with [TYPE_MASKED] and allows
* - warn: Just logs warning, doesn't block
*
* @example
* ```typescript
* // Block any PII
* const guard = new PIIGuard({
* threshold: 0.7,
* action: "block",
* });
*
* // Mask PII and allow
* const guard = new PIIGuard({
* threshold: 0.7,
* action: "mask",
* detectTypes: ["email", "cpf", "credit_card"],
* });
*
* // Just warn (GDPR/LGPD audit mode)
* const guard = new PIIGuard({
* threshold: 0.7,
* action: "warn",
* });
* ```
*/
export class PIIGuard extends Guard {
private providerName?: string;
private detectTypes: string[];
private action: "block" | "mask" | "warn";
constructor(config: PIIGuardConfig = {}) {
super({
threshold: 0.7,
blockOnDetection: config.action !== "warn",
...config,
});
this.providerName = config.providerName;
this.detectTypes = config.detectTypes || [
"email",
"phone",
"cpf",
"credit_card",
"ssn",
"address",
"passport",
"bank_account",
"driver_license",
"full_name"
];
this.action = config.action || "block";
}
get name(): string {
return "PIIGuard";
}
async validate(input: string, metadata?: Record<string, any>): Promise<GuardResult> {
try {
const client = getCortexDBClient();
// Use explicit providerName or fallback to global from toolkitConfig
const providerName = this.providerName || getGlobalProviderName() || undefined;
const prompt = this.buildPIIDetectionPrompt(input);
const response = await client.invokeAI({
prompt,
usage: "guard",
maxTokens: 400,
temperature: 0,
providerName,
});
const result = this.parsePIIResponse(response.text);
const hasPII = result.score >= this.getThreshold();
if (!hasPII) {
return {
passed: true,
reason: undefined,
confidence: 1.0 - result.score,
metadata: {
detected: [],
action: this.action,
},
};
}
// PII detected
const piiSummary = result.types.join(", ");
if (this.action === "warn") {
// Just warn, don't block
return {
passed: true,
reason: `PII detected but allowed (warn mode): ${piiSummary}`,
severity: "low",
suggestedAction: "warn",
confidence: result.score,
metadata: {
detected: result.types,
details: result.details,
action: "warn",
provider: this.providerName,
},
};
}
if (this.action === "mask") {
// Ask LLM to mask the PII
const maskedInput = await this.maskPIIWithAI(input, result.types, providerName);
return {
passed: true,
reason: `PII masked: ${piiSummary}`,
severity: "medium",
suggestedAction: "log",
confidence: result.score,
metadata: {
detected: result.types,
details: result.details,
action: "mask",
maskedInput,
provider: this.providerName,
},
};
}
// Block (default)
return {
passed: false,
reason: `PII detected and blocked: ${piiSummary}. ${result.details}`,
severity: "high",
suggestedAction: "block",
confidence: result.score,
metadata: {
detected: result.types,
details: result.details,
action: "block",
provider: this.providerName,
},
};
} catch (error: any) {
console.error("PIIGuard failed:", error.message);
// Fallback: if AI call fails, don't block (fail open)
return {
passed: true,
reason: undefined,
confidence: 0,
metadata: {
error: error.message,
fallback: true,
},
};
}
}
/**
* Build prompt for PII detection
*/
private buildPIIDetectionPrompt(input: string): string {
const typesStr = this.detectTypes.join(", ");
return `You are a privacy AI specialized in detecting Personally Identifiable Information (PII). Analyze the following text for any PII that could identify an individual.
Types of PII to detect: ${typesStr}
Common PII includes:
- Email addresses (user@example.com)
- Phone numbers (any format, any country)
- Government IDs (CPF, SSN, passport numbers, driver's license, national IDs)
- Credit card numbers, bank account numbers
- Physical addresses (street, city, zip code)
- Full names (first + last name combinations)
- IP addresses, MAC addresses
- Biometric data references
- Any other information that could identify a person
User input to analyze:
"""
${input}
"""
Provide your analysis in this exact format:
PII_SCORE: [0.0-1.0]
TYPES: [comma-separated list of detected PII types, or "none"]
DETAILS: [brief explanation of what PII was found]
Examples:
PII_SCORE: 0.95
TYPES: email, phone, cpf
DETAILS: Found email address, Brazilian phone number, and CPF number
PII_SCORE: 0.1
TYPES: none
DETAILS: No PII detected, text contains only generic information
PII_SCORE: 0.85
TYPES: full_name, address
DETAILS: Contains full name "John Smith" and complete address
Now analyze the input:`;
}
/**
* Parse AI response for PII detection
*/
private parsePIIResponse(text: string): {
score: number;
types: string[];
details: string;
} {
const scoreMatch = text.match(/PII_SCORE:\s*([\d.]+)/i);
const typesMatch = text.match(/TYPES:\s*(.+?)(?:\n|$)/i);
const detailsMatch = text.match(/DETAILS:\s*(.+?)(?:\n|$)/i);
const score = scoreMatch ? parseFloat(scoreMatch[1]) : 0.5;
const typesText = typesMatch ? typesMatch[1].trim() : "none";
const types = typesText.toLowerCase() === "none"
? []
: typesText.split(",").map(t => t.trim());
const details = detailsMatch ? detailsMatch[1].trim() : "No details provided";
return {
score: Math.min(1, Math.max(0, score)), // Clamp to 0-1
types,
details,
};
}
/**
* Use AI to mask PII in the text
*/
private async maskPIIWithAI(
input: string,
detectedTypes: string[],
providerName?: string
): Promise<string> {
try {
const client = getCortexDBClient();
const prompt = `Replace all Personally Identifiable Information (PII) in the following text with appropriate masks like [EMAIL_MASKED], [PHONE_MASKED], [CPF_MASKED], etc.
Detected PII types: ${detectedTypes.join(", ")}
Original text:
"""
${input}
"""
Return ONLY the masked version of the text, with all PII replaced by [TYPE_MASKED] placeholders. Do not add any explanations or additional text.
Masked text:`;
const response = await client.invokeAI({
prompt,
usage: "guard",
maxTokens: 500,
temperature: 0,
providerName,
});
return response.text.trim();
} catch (error) {
console.error("Failed to mask PII with AI:", error);
// Fallback: return original text if masking fails
return input;
}
}
}