UNPKG

@himorishige/noren-plugin-network

Version:

Network PII detection plugin for Noren - IPv4, IPv6, and MAC address detection

199 lines (198 loc) 8.18 kB
import { parseIPv6 } from './ipv6-parser.js'; import { NETWORK_PATTERN_TYPES, NETWORK_PATTERNS, UNIFIED_NETWORK_PATTERN } from './patterns.js'; import { validateNetworkCandidate } from './validators.js'; /** * Context hints for network PII detection */ const NETWORK_CONTEXTS = { ip: new Set(['ip', 'address', 'server', 'host', 'endpoint', 'url', 'api', 'dns', 'gateway']), ipv6: new Set(['ipv6', 'ip6', 'interface', 'link-local', 'unique-local']), mac: new Set(['mac', 'ethernet', 'wifi', 'interface', 'adapter', 'nic', 'hardware']), }; /** * IPv4 address detector */ const ipv4Detector = { id: 'network.ipv4', priority: 0, match: ({ src, push, hasCtx, canPush }) => { // Optional context requirement for stricter detection const hasContext = hasCtx(Array.from(NETWORK_CONTEXTS.ip)); for (const m of src.matchAll(NETWORK_PATTERNS.ipv4)) { if (m.index === undefined || !canPush?.()) break; const value = m[0]; const confidence = hasContext ? 0.8 : 0.6; // Basic validation to reduce false positives const surroundingText = src.slice(Math.max(0, m.index - 50), Math.min(src.length, m.index + value.length + 50)); const validation = validateNetworkCandidate(value, 'ipv4', { surroundingText, strictness: 'balanced', originalIndex: 0, // Not used in this context }); if (!validation.valid) continue; push({ type: 'ipv4', start: m.index, end: m.index + value.length, value, risk: 'low', confidence: Math.max(confidence, validation.confidence), reasons: ['ipv4_pattern', hasContext ? 'has_context' : 'no_context', validation.reason], features: { hasContext, isPrivate: validation.metadata?.isPrivate || false, octets: validation.metadata?.octets || [], }, }); } }, }; /** * IPv6 address detector */ const ipv6Detector = { id: 'network.ipv6', priority: -1, // Lower priority due to complex pattern match: ({ src, push, hasCtx, canPush }) => { const hasContext = hasCtx(Array.from(NETWORK_CONTEXTS.ipv6)); for (const m of src.matchAll(NETWORK_PATTERNS.ipv6)) { if (m.index === undefined || !canPush?.()) break; const value = m[0]; let startIndex = m.index; // Handle lookahead adjustment (similar to core implementation) const fullMatch = m[0]; const ipv6Start = fullMatch.lastIndexOf(value); if (ipv6Start > 0) { startIndex = m.index + ipv6Start; } // IPv6 validation using the parser const parseResult = parseIPv6(value); if (!parseResult.valid) continue; const confidence = hasContext ? 0.9 : 0.7; push({ type: 'ipv6', start: startIndex, end: startIndex + value.length, value, risk: 'low', confidence, reasons: ['ipv6_pattern', hasContext ? 'has_context' : 'no_context', 'ipv6_parsed'], features: { hasContext, normalized: parseResult.normalized, isPrivate: parseResult.isPrivate || false, isLoopback: parseResult.isLoopback || false, isLinkLocal: parseResult.isLinkLocal || false, isUniqueLocal: parseResult.isUniqueLocal || false, }, }); } }, }; /** * MAC address detector */ const macDetector = { id: 'network.mac', priority: 0, match: ({ src, push, hasCtx, canPush }) => { const hasContext = hasCtx(Array.from(NETWORK_CONTEXTS.mac)); for (const m of src.matchAll(NETWORK_PATTERNS.mac)) { if (m.index === undefined || !canPush?.()) break; const value = m[0]; const confidence = hasContext ? 0.8 : 0.5; // Basic validation const surroundingText = src.slice(Math.max(0, m.index - 50), Math.min(src.length, m.index + value.length + 50)); const validation = validateNetworkCandidate(value, 'mac', { surroundingText, strictness: 'balanced', originalIndex: 0, }); if (!validation.valid) continue; push({ type: 'mac', start: m.index, end: m.index + value.length, value, risk: 'low', confidence: Math.max(confidence, validation.confidence), reasons: ['mac_pattern', hasContext ? 'has_context' : 'no_context', validation.reason], features: { hasContext, normalized: validation.metadata?.normalized || value, separator: validation.metadata?.separator || ':', }, }); } }, }; /** * Unified network detector (optional, for performance optimization) */ const unifiedNetworkDetector = { id: 'network.unified', priority: 1, match: ({ src, push, hasCtx, canPush }) => { const hasIpContext = hasCtx(Array.from(NETWORK_CONTEXTS.ip)); const hasIpv6Context = hasCtx(Array.from(NETWORK_CONTEXTS.ipv6)); const hasMacContext = hasCtx(Array.from(NETWORK_CONTEXTS.mac)); for (const match of src.matchAll(UNIFIED_NETWORK_PATTERN)) { if (!canPush?.()) break; if (match.index == null) continue; // Find which group matched for (let i = 1; i < match.length; i++) { if (match[i]) { const patternType = NETWORK_PATTERN_TYPES[i - 1]; const value = match[i]; let startIndex = match.index; let isValid = true; // Handle IPv6 position adjustment if (patternType.type === 'ipv6') { const fullMatch = match[0]; const ipv6Start = fullMatch.lastIndexOf(value); startIndex = match.index + ipv6Start; const result = parseIPv6(value); if (!result.valid) { isValid = false; } } if (isValid) { const contextMap = { ipv4: hasIpContext, ipv6: hasIpv6Context, mac: hasMacContext, }; const hasContext = contextMap[patternType.type] || false; const baseConfidence = hasContext ? 0.8 : 0.6; push({ type: patternType.type, start: startIndex, end: startIndex + value.length, value, risk: patternType.risk, confidence: baseConfidence, reasons: [`${patternType.type}_pattern`, hasContext ? 'has_context' : 'no_context'], features: { hasContext, unified: true, }, }); } break; // Only process the first match in this iteration } } } }, }; // Export individual detectors (recommended for flexibility) export const detectors = [ipv4Detector, ipv6Detector, macDetector]; // Export unified detector as alternative (for performance-critical scenarios) export const unifiedDetector = unifiedNetworkDetector;