@himorishige/noren-core
Version:
Core PII detection, masking, and tokenization library built on Web Standards
89 lines (88 loc) • 4.4 kB
JavaScript
// Optimized built-in detection logic for v0.6.0 - single-pass unified detection with validation
import { extractSurroundingText } from './context-scoring.js';
import { PATTERN_TYPES, UNIFIED_PATTERN } from './patterns.js';
import { hitPool } from './pool.js';
import { prefilterText, quickDigitValidation } from './prefilter.js';
import { luhn, SECURITY_LIMITS } from './utils.js';
import { validateCandidate } from './validators.js';
export function builtinDetect(u, strictness = 'balanced') {
const { src } = u;
// Simple length check for security
if (src.length > SECURITY_LIMITS.maxInputLength) {
return;
}
// Fast prefiltering to skip expensive regex when no candidates
const prefilterResult = prefilterText(src);
if (prefilterResult.skipExpensiveDetection) {
return;
}
// Single-pass unified detection for common patterns
for (const match of src.matchAll(UNIFIED_PATTERN)) {
if (!u.canPush || !u.canPush())
break;
if (match.index == null)
continue;
// Find which group matched
for (let i = 1; i < match.length; i++) {
if (match[i]) {
const patternType = PATTERN_TYPES[i - 1];
let isValid = true;
const value = match[i];
const startIndex = match.index;
// Basic validation for credit cards (keep existing logic for compatibility)
if (patternType.type === 'credit_card') {
if (!quickDigitValidation(value, 'credit_card')) {
isValid = false;
}
else {
const digits = value.replace(/[ -]/g, '');
if (!luhn(digits)) {
isValid = false;
}
}
}
// Enhanced validation using new validator system
if (isValid && strictness !== 'fast') {
// Use window around the match for context, but include the full text for pattern matching
const windowSize = 48; // Larger window for context
const beforeStart = Math.max(0, startIndex - windowSize);
const afterEnd = Math.min(src.length, startIndex + value.length + windowSize);
const surroundingText = src.slice(beforeStart, afterEnd);
const validationContext = {
surroundingText,
strictness,
originalIndex: startIndex - beforeStart, // Adjust index for windowed text
};
try {
const validationResult = validateCandidate(value, patternType.type, validationContext);
if (!validationResult.valid) {
isValid = false;
}
}
catch (error) {
// On validation error, fall back to allowing the match
// This prevents validation errors from breaking detection entirely
console.warn(`Validation error for ${patternType.type}: ${error instanceof Error ? error.message : 'Unknown error'}`);
// Continue with isValid = true (fallback behavior)
}
}
if (isValid) {
const hit = hitPool.acquire(patternType.type, startIndex, startIndex + value.length, value, patternType.risk);
// Add validation metadata if available
if (strictness !== 'fast') {
const surroundingText = extractSurroundingText(src, startIndex, startIndex + value.length);
const validationContext = {
surroundingText,
strictness,
originalIndex: startIndex,
};
const validationResult = validateCandidate(value, patternType.type, validationContext);
hit.confidence = validationResult.confidence;
}
u.push(hit);
}
break; // Only process the first matching group
}
}
}
}