UNPKG

@himorishige/noren-plugin-us

Version:

US-specific PII detection plugin for Noren (phone numbers, ZIP codes, SSN)

111 lines (110 loc) 4.41 kB
import { validateSSN } from './validators.js'; // Pre-compiled regex patterns for US detectors const US_PATTERNS = { phone: /\b(?:\+1[-.\s]?)?(?:\(?[2-9]\d{2}\)?[-.\s]?)\d{3}[-.\s]?\d{4}\b/g, zip: /\b\d{5}(?:-\d{4})?\b/g, ssn: /\b(?!000|666|9\d{2})\d{3}-(?!00)\d{2}-(?!0000)\d{4}\b/g, }; // Context hints as Sets for better performance (O(1) lookup) const US_CONTEXTS = { zip: new Set(['zip', 'ZIP', 'postal', 'address']), ssn: new Set(['SSN', 'social', 'security']), phone: new Set(['phone', 'tel', 'call', 'contact']), }; export const detectors = [ { id: 'us.phone', match: ({ src, push, hasCtx }) => { const hasContext = hasCtx(Array.from(US_CONTEXTS.phone)); for (const m of src.matchAll(US_PATTERNS.phone)) { if (m.index == null) continue; const confidence = hasContext ? 0.8 : 0.65; push({ type: 'phone_us', start: m.index, end: m.index + m[0].length, value: m[0], risk: 'medium', confidence, reasons: ['us_phone_pattern', hasContext ? 'context_match' : 'no_context'], features: { hasContext, normalized: m[0].replace(/[^\d]/g, '').replace(/(\d{3})(\d{3})(\d{4})/, '$1-$2-$3'), hasCountryCode: m[0].includes('+1'), }, }); } }, }, { id: 'us.zip', match: ({ src, push, hasCtx }) => { const hasContext = hasCtx(Array.from(US_CONTEXTS.zip)); if (!hasContext) return; for (const m of src.matchAll(US_PATTERNS.zip)) { if (m.index == null) continue; const isExtended = m[0].includes('-'); const confidence = hasContext ? (isExtended ? 0.85 : 0.75) : 0.5; push({ type: 'zip_us', start: m.index, end: m.index + m[0].length, value: m[0], risk: 'low', confidence, reasons: ['zip_pattern', hasContext ? 'context_match' : 'no_context'], features: { hasContext, isExtended, normalized: isExtended ? m[0] : `${m[0].slice(0, 5)}-${m[0].slice(5) || '0000'}`, }, }); } }, }, { id: 'us.ssn', priority: -10, match: ({ src, push, hasCtx }) => { const hasContext = hasCtx(Array.from(US_CONTEXTS.ssn)); if (!hasContext) return; for (const m of src.matchAll(US_PATTERNS.ssn)) { const validation = validateSSN(m[0]); // Only push if basic validation passes or context is very strong if (validation.basic || hasContext) { if (m.index == null) continue; push({ type: 'ssn_us', start: m.index, end: m.index + m[0].length, value: m[0], risk: 'high', confidence: validation.confidence || (hasContext ? 0.6 : 0.3), reasons: [ 'ssn_pattern', ...validation.reason, hasContext ? 'context_match' : 'no_context', ], features: { basicValid: validation.basic, strictValid: validation.strict, hasContext, normalized: validation.normalized || m[0], validationReasons: validation.reason, }, }); } } }, }, ]; export const maskers = { phone_us: (h) => h.value.replace(/\d/g, '•'), zip_us: (h) => (h.value.length > 5 ? '•••••-••••' : '•••••'), ssn_us: (h) => `***-**-${h.value.replace(/\D/g, '').slice(-4)}`, };