@llm-dev-ops/shield-sdk
Version:
Enterprise-grade SDK for securing Large Language Model applications
220 lines • 6.86 kB
JavaScript
import { BaseScanner } from './base.js';
/**
* Built-in PII patterns
*/
const PII_PATTERNS = [
// Email
{
pattern: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
type: 'Email Address',
entityType: 'email',
severity: 'medium',
},
// Phone numbers (international formats)
{
pattern: /\b(?:\+?1[-.\s]?)?\(?[2-9]\d{2}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b/g,
type: 'US Phone Number',
entityType: 'phone',
severity: 'medium',
},
{
pattern: /\b\+44\s?[0-9]{4}\s?[0-9]{6}\b/g,
type: 'UK Phone Number',
entityType: 'phone',
severity: 'medium',
},
{
pattern: /\b\+[1-9]\d{1,14}\b/g,
type: 'International Phone Number',
entityType: 'phone',
severity: 'medium',
},
// Social Security Numbers
{
pattern: /\b\d{3}-\d{2}-\d{4}\b/g,
type: 'SSN (dashed)',
entityType: 'ssn',
severity: 'critical',
},
{
pattern: /\b\d{3}\s\d{2}\s\d{4}\b/g,
type: 'SSN (spaced)',
entityType: 'ssn',
severity: 'critical',
},
{
pattern: /\b\d{9}\b/g,
type: 'SSN (raw)',
entityType: 'ssn',
severity: 'high',
},
// Credit Card Numbers
{
pattern: /\b4[0-9]{12}(?:[0-9]{3})?\b/g,
type: 'Credit Card (Visa)',
entityType: 'credit-card',
severity: 'critical',
},
{
pattern: /\b5[1-5][0-9]{14}\b/g,
type: 'Credit Card (Mastercard)',
entityType: 'credit-card',
severity: 'critical',
},
{
pattern: /\b3[47][0-9]{13}\b/g,
type: 'Credit Card (Amex)',
entityType: 'credit-card',
severity: 'critical',
},
{
pattern: /\b6(?:011|5[0-9]{2})[0-9]{12}\b/g,
type: 'Credit Card (Discover)',
entityType: 'credit-card',
severity: 'critical',
},
// IP Addresses
{
pattern: /\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g,
type: 'IPv4 Address',
entityType: 'ip-address',
severity: 'low',
},
{
pattern: /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g,
type: 'IPv6 Address',
entityType: 'ip-address',
severity: 'low',
},
// US Passport
{
pattern: /\b[A-Z][0-9]{8}\b/g,
type: 'US Passport Number',
entityType: 'passport',
severity: 'high',
},
// US Driver's License (general pattern)
{
pattern: /\b[A-Z][0-9]{7,8}\b/g,
type: 'Driver\'s License Number',
entityType: 'drivers-license',
severity: 'high',
},
];
/**
* Scanner for detecting Personally Identifiable Information (PII)
*/
export class PIIScanner extends BaseScanner {
name = 'pii';
patterns;
redact;
constructor(config = {}) {
super();
this.redact = config.redact ?? true;
// Filter patterns based on config
const enabledTypes = config.piiTypes ?? ['email', 'phone', 'ssn', 'credit-card'];
this.patterns = PII_PATTERNS.filter(p => enabledTypes.includes(p.entityType));
}
async scan(text) {
const startTime = performance.now();
const entities = [];
const riskFactors = [];
const foundTypes = new Map();
for (const { pattern, type, entityType, severity } of this.patterns) {
const regex = new RegExp(pattern.source, pattern.flags);
let match;
while ((match = regex.exec(text)) !== null) {
// Validate potential matches to reduce false positives
if (this.validate(match[0], entityType)) {
entities.push({
entityType,
text: this.redact ? this.maskPII(match[0], entityType) : match[0],
start: match.index,
end: match.index + match[0].length,
confidence: this.getConfidence(entityType),
});
foundTypes.set(type, (foundTypes.get(type) || 0) + 1);
}
}
}
// Create risk factors for each type found
for (const [type, count] of foundTypes) {
const pattern = this.patterns.find(p => p.type === type);
if (pattern) {
riskFactors.push({
category: 'pii',
description: `Detected ${count} ${type}(s)`,
severity: pattern.severity,
confidence: 0.9,
metadata: { piiType: type, count },
});
}
}
const durationMs = performance.now() - startTime;
return this.createResult(text, entities, riskFactors, durationMs);
}
validate(value, type) {
switch (type) {
case 'credit-card':
return this.luhnCheck(value.replace(/\D/g, ''));
case 'ssn':
// Basic SSN validation (not 000, 666, or 9xx for area)
const cleaned = value.replace(/\D/g, '');
if (cleaned.length !== 9)
return false;
const area = parseInt(cleaned.substring(0, 3));
return area !== 0 && area !== 666 && area < 900;
case 'email':
return value.includes('@') && value.includes('.');
default:
return true;
}
}
luhnCheck(num) {
let sum = 0;
let isEven = false;
for (let i = num.length - 1; i >= 0; i--) {
let digit = parseInt(num[i], 10);
if (isEven) {
digit *= 2;
if (digit > 9) {
digit -= 9;
}
}
sum += digit;
isEven = !isEven;
}
return sum % 10 === 0;
}
getConfidence(type) {
switch (type) {
case 'email':
return 0.95;
case 'credit-card':
return 0.99;
case 'ssn':
return 0.85;
case 'phone':
return 0.75;
default:
return 0.7;
}
}
maskPII(value, type) {
switch (type) {
case 'email': {
const [local, domain] = value.split('@');
return local.substring(0, 2) + '***@' + domain;
}
case 'credit-card':
return '****-****-****-' + value.slice(-4);
case 'ssn':
return '***-**-' + value.slice(-4);
case 'phone':
return '***-***-' + value.slice(-4);
default:
return '****';
}
}
}
//# sourceMappingURL=pii.js.map