@himorishige/noren-core
Version:
Core PII detection, masking, and tokenization library built on Web Standards
242 lines (241 loc) • 8.1 kB
JavaScript
/**
* PII validation functions for false positive reduction
* Implements lightweight, fast validation logic for each PII type
*/
import { CREDIT_CARD_BRANDS, EXAMPLE_DOMAINS, NON_PII_EMAIL_PREFIXES, NORMALIZED_TEST_CREDIT_CARDS, STRICTNESS_LEVELS, } from './constants.js';
import { calculateContextScore as calcContextScore } from './context-scoring.js';
import { luhn } from './utils.js';
/**
* Validate credit card candidate
*/
export function validateCreditCard(candidate, context) {
const digits = candidate.replace(/[\s-]/g, '');
const settings = STRICTNESS_LEVELS[context.strictness];
// Basic format check
if (digits.length < 13 || digits.length > 19) {
return {
valid: false,
confidence: 0.0,
reason: 'invalid_length',
};
}
// Luhn algorithm check (already implemented in core)
if (!luhn(digits)) {
return {
valid: false,
confidence: 0.0,
reason: 'luhn_failed',
};
}
// Test number exclusion
if (settings.excludeTestData && NORMALIZED_TEST_CREDIT_CARDS.has(digits)) {
return {
valid: false,
confidence: 0.0,
reason: 'test_number',
};
}
// Brand validation
let brand = null;
let validLength = false;
if (settings.brandValidation) {
for (const [brandName, brandInfo] of Object.entries(CREDIT_CARD_BRANDS)) {
if (brandInfo.pattern.test(digits)) {
brand = brandName;
validLength = brandInfo.lengths.includes(digits.length);
break;
}
}
if (!brand) {
return {
valid: false,
confidence: 0.0,
reason: 'unknown_brand',
};
}
if (!validLength) {
return {
valid: false,
confidence: 0.0,
reason: 'invalid_brand_length',
metadata: { brand, length: digits.length },
};
}
}
// Check for repeated digits (likely test data) - check before context
const repeatedPattern = /(\d)\1{3,}/.test(digits);
if (repeatedPattern) {
return {
valid: false,
confidence: 0.1,
reason: 'repeated_digits',
};
}
// Check for sequential digits - check before context
const sequential = /(?:0123|1234|2345|3456|4567|5678|6789|9876|8765|7654|6543|5432|4321|3210)/.test(digits);
if (sequential) {
return {
valid: false,
confidence: 0.2,
reason: 'sequential_digits',
};
}
// Check for bare 16-digit numbers (no separators) - only in strict mode
const hasNoSeparators = !/[\s-]/.test(candidate);
if (hasNoSeparators && digits.length === 16 && settings.excludeTestData) {
// Bare 16-digit numbers need strong context evidence in balanced/strict mode
const contextScore = calculateContextScore(context.surroundingText, 'credit_card');
const threshold = context.strictness === 'strict' ? 2.0 : 1.2; // Lower threshold for balanced mode
if (contextScore < threshold) {
return {
valid: false,
confidence: 0.3,
reason: 'bare_digits_weak_context',
metadata: { contextScore, threshold },
};
}
}
return {
valid: true,
confidence: brand ? 0.9 : 0.7,
reason: brand ? `valid_${brand}` : 'valid_unknown_brand',
metadata: { brand, digits: digits.length },
};
}
/**
* Validate email address candidate
*/
export function validateEmail(candidate, context) {
const settings = STRICTNESS_LEVELS[context.strictness];
const atIndex = candidate.indexOf('@');
if (atIndex === -1) {
return {
valid: false,
confidence: 0.0,
reason: 'no_at_symbol',
};
}
const localPart = candidate.slice(0, atIndex);
const domain = candidate.slice(atIndex + 1);
// Check example domains
if (settings.excludeTestData && EXAMPLE_DOMAINS.has(domain.toLowerCase())) {
return {
valid: false,
confidence: 0.0,
reason: 'example_domain',
};
}
// Check non-PII prefixes
const normalizedLocal = localPart.toLowerCase().replace(/[.-]/g, '');
if (NON_PII_EMAIL_PREFIXES.has(normalizedLocal)) {
return {
valid: false,
confidence: 0.1,
reason: 'non_pii_prefix',
metadata: { prefix: normalizedLocal },
};
}
// TLD validation
const lastDot = domain.lastIndexOf('.');
if (lastDot === -1) {
return {
valid: false,
confidence: 0.0,
reason: 'no_tld',
};
}
const tld = domain.slice(lastDot + 1);
if (tld.length < 2 || tld.length > 24 || !/^[a-z]+$/i.test(tld)) {
return {
valid: false,
confidence: 0.0,
reason: 'invalid_tld',
};
}
// Context scoring
const contextScore = calculateContextScore(context.surroundingText, 'email');
return {
valid: true,
confidence: Math.min(0.9, 0.6 + contextScore * 0.15),
reason: 'valid_email',
metadata: { contextScore, domain, isExample: EXAMPLE_DOMAINS.has(domain.toLowerCase()) },
};
}
/**
* Calculate context score based on surrounding text
*/
function calculateContextScore(surroundingText, piiType) {
const analysis = calcContextScore(surroundingText, piiType);
return analysis.score;
}
/**
* Main validation dispatcher with error handling
*/
export function validateCandidate(candidate, piiType, context) {
// Input validation
if (!candidate || typeof candidate !== 'string') {
return {
valid: false,
confidence: 0.0,
reason: 'invalid_input',
metadata: { error: 'Candidate must be a non-empty string' },
};
}
if (candidate.length > 1000) {
return {
valid: false,
confidence: 0.0,
reason: 'candidate_too_long',
metadata: { length: candidate.length, maxLength: 1000 },
};
}
try {
switch (piiType) {
case 'credit_card':
return validateCreditCard(candidate, context);
case 'email':
return validateEmail(candidate, context);
default:
// For other types, just pass through
return { valid: true, confidence: 0.7, reason: 'no_validation' };
}
}
catch (error) {
// Fallback on validation errors
return {
valid: false,
confidence: 0.0,
reason: 'validation_error',
metadata: {
error: error instanceof Error ? error.message : 'Unknown validation error',
piiType,
candidateLength: candidate.length,
},
};
}
}
/**
* Debug helper to analyze validation results in a readable format
*/
export function debugValidation(candidate, piiType, context) {
const result = validateCandidate(candidate, piiType, context);
console.log(`🔍 Validation Debug for ${piiType}: "${candidate}"`);
console.log(`📍 Context: "${context.surroundingText}"`);
console.log(`⚙️ Strictness: ${context.strictness}`);
console.log(`✅ Valid: ${result.valid}`);
console.log(`🎯 Confidence: ${result.confidence}`);
console.log(`💭 Reason: ${result.reason}`);
if (result.metadata) {
console.log(`📋 Metadata:`);
Object.entries(result.metadata).forEach(([key, value]) => {
console.log(` ${key}: ${JSON.stringify(value)}`);
});
}
if (!result.valid) {
console.log(`❌ This candidate was filtered out by validation`);
}
else {
console.log(`✅ This candidate passed validation`);
}
console.log(''); // Empty line for readability
}