traceprompt-node

Version:

Client-side encrypted, audit-ready logging for LLM applications

89 lines (81 loc) • 2.28 kB

text/typescript

import patterns from "../idPatterns.json"; import { Recognizer, Entity } from "../../types"; import { dniCheck, inseeCheck, beEidCheck, luhn10, nhsMod11, svnrMod11, imeiLuhn, npiLuhn, } from "../utils/checksums"; type IdRule = { type: string; regex: string; context?: string[]; validate?: string | null; }; const compiled = (patterns as IdRule[]).map((rule) => ({ ...rule, re: new RegExp(rule.regex, "gu"), })); const validators = { dniCheck, inseeCheck, beEidCheck, luhn10, nhsMod11, svnrMod11, imeiLuhn, npiLuhn, }; export const idRecognizer: Recognizer = { id: "nat-id", detect(text, map) { const out: Entity[] = []; for (const rule of compiled) { for (const m of text.matchAll(rule.re)) { // 1️⃣ context guard if (rule.context) { const contextPattern = new RegExp(rule.context.join("|"), "i"); // Check both before and after for context (±25 chars for longer words) const contextWindow = text.slice( Math.max(0, m.index! - 25), m.index! + m[0].length + 25 ); if (!contextPattern.test(contextWindow)) continue; // Special case: reject EIN if routing/aba context is present if ( rule.type === "EIN" && /routing|aba|acct|checking|bank/i.test(contextWindow) ) continue; } // 2️⃣ checksum validation (affects confidence, doesn't reject) let confidence = 0.9; if (rule.validate) { const validator = (validators as any)[rule.validate]; // Clean number: remove spaces, Unicode dashes, dots, slashes const cleanNum = m[0].replace( /[\s\u002D\u2010\u2011\u2012\u2013\u2014\u2015.-/]/g, "" ); if (validator && !validator(cleanNum)) { confidence = 0.7; // Lower confidence for invalid checksum, but still detect } } out.push({ type: rule.type as any, start: map.origPos(m.index!), end: map.origPos(m.index! + m[0].length), text: m[0], confidence, source: this.id, risk: "critical" as const, }); } } return out; }, };