traceprompt-node
Version:
Client-side encrypted, audit-ready logging for LLM applications
1,675 lines (1,662 loc) • 73.7 kB
JavaScript
import { performance } from 'perf_hooks';
import * as fs from 'fs';
import { createReadStream } from 'fs';
import * as path2 from 'path';
import path2__default from 'path';
import * as yaml from 'yaml';
import { buildClient, CommitmentPolicy, KmsKeyringNode } from '@aws-crypto/client-node';
import { Registry, Histogram, Counter, Gauge } from 'prom-client';
import winston from 'winston';
import { blake3 } from '@napi-rs/blake-hash';
import fs2 from 'fs/promises';
import { randomUUID, createHmac } from 'crypto';
import { createInterface } from 'readline';
import { fetch as fetch$1 } from 'undici';
import winkNLP from 'wink-nlp';
import model from 'wink-eng-lite-web-model';
import its from 'wink-nlp/src/its.js';
import nlp2 from 'compromise';
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
}) : x)(function(x) {
if (typeof require !== "undefined") return require.apply(this, arguments);
throw Error('Dynamic require of "' + x + '" is not supported');
});
async function fetchHmacSecret(apiKey, ingestUrl) {
try {
const hmacUrl = `${ingestUrl.replace("/v1/ingest", "")}/v1/hmac-secret`;
const response = await fetch(hmacUrl, {
method: "GET",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
}
});
if (!response.ok) {
let errorMessage = `${response.status} ${response.statusText}`;
try {
const errorBody = await response.json();
if (errorBody.message) {
errorMessage = errorBody.message;
} else if (errorBody.error) {
errorMessage = errorBody.error;
}
} catch {
}
throw new Error(`Failed to fetch HMAC secret: ${errorMessage}`);
}
const result = await response.json();
if (!result.success || !result.data?.hmacSecret) {
throw new Error("Invalid HMAC secret response");
}
return result.data.hmacSecret;
} catch (error) {
throw new Error(`Failed to fetch HMAC secret: ${error.message}`);
}
}
async function resolveOrgFromApiKey(apiKey, ingestUrl) {
try {
const whoamiUrl = `${ingestUrl.replace("/v1/ingest", "")}/v1/whoami`;
const response = await fetch(whoamiUrl, {
method: "GET",
headers: {
"x-api-key": apiKey,
"Content-Type": "application/json"
}
});
if (!response.ok) {
let errorMessage = `${response.status} ${response.statusText}`;
try {
const errorBody = await response.json();
if (errorBody.message) {
errorMessage = errorBody.message;
} else if (errorBody.error) {
errorMessage = errorBody.error;
}
} catch {
}
throw new Error(`Failed to resolve organization: ${errorMessage}`);
}
const result = await response.json();
if (!result.success) {
throw new Error("Failed to resolve organization from API key");
}
const orgId = result.data.orgId;
if (!orgId) {
throw new Error("No organization ID found in API key response");
}
const cmkArn = result.data.kmsKeyArn;
console.log(`\u2713 Traceprompt auto-resolved organization: ${orgId}`);
return { orgId, cmkArn };
} catch (error) {
throw new Error(
`Failed to auto-resolve organization from API key: ${error instanceof Error ? error.message : String(error)}`
);
}
}
function readYaml(filePath) {
try {
const abs = path2.resolve(process.cwd(), filePath);
if (!fs.existsSync(abs)) return {};
const raw = fs.readFileSync(abs, "utf8");
return yaml.parse(raw) ?? {};
} catch {
return {};
}
}
var ConfigManagerClass = class {
async load(userCfg = {}) {
if (this._cfg) return;
if (this._loadPromise) {
await this._loadPromise;
return;
}
this._loadPromise = this._doLoad(userCfg);
await this._loadPromise;
}
async _doLoad(userCfg = {}) {
const fileCfg = process.env["TRACEPROMPT_RC"] ? readYaml(process.env["TRACEPROMPT_RC"]) : {};
const envCfg = {
...process.env["TRACEPROMPT_API_KEY"] && {
apiKey: process.env["TRACEPROMPT_API_KEY"]
},
...process.env["TRACEPROMPT_INGEST_URL"] && {
ingestUrl: process.env["TRACEPROMPT_INGEST_URL"]
},
...process.env["TRACEPROMPT_BATCH_SIZE"] && {
batchSize: Number(process.env["TRACEPROMPT_BATCH_SIZE"])
},
...process.env["TRACEPROMPT_FLUSH_INTERVAL_MS"] && {
flushIntervalMs: Number(process.env["TRACEPROMPT_FLUSH_INTERVAL_MS"])
},
...process.env["TRACEPROMPT_LOG_LEVEL"] && {
logLevel: process.env["TRACEPROMPT_LOG_LEVEL"]
}
};
const merged = {
apiKey: "",
ingestUrl: "https://api-staging.traceprompt.com/v1/ingest",
batchSize: 25,
flushIntervalMs: 2e3,
staticMeta: {},
logLevel: "info",
// Default to info level
...fileCfg,
...envCfg,
...userCfg
};
if (!merged.apiKey) throw new Error("Traceprompt: apiKey is required");
let orgId;
let cmkArn;
let hmacSecret;
try {
const resolved = await resolveOrgFromApiKey(
merged.apiKey,
merged.ingestUrl
);
orgId = resolved.orgId;
cmkArn = resolved.cmkArn;
hmacSecret = await fetchHmacSecret(merged.apiKey, merged.ingestUrl);
} catch (error) {
throw new Error(
`Failed to auto-resolve organization or HMAC secret: ${error instanceof Error ? error.message : String(error)}`
);
}
if (merged.batchSize <= 0) merged.batchSize = 25;
if (merged.flushIntervalMs <= 0) merged.flushIntervalMs = 2e3;
this._cfg = {
...merged,
orgId,
cmkArn,
hmacSecret,
apiKey: merged.apiKey,
ingestUrl: merged.ingestUrl
};
}
get cfg() {
if (!this._cfg) {
throw new Error("Traceprompt: initTracePrompt() must be called first");
}
return this._cfg;
}
};
async function initTracePrompt(cfg) {
await ConfigManager.load(cfg);
}
var ConfigManager = new ConfigManagerClass();
function buildKeyring() {
const { cmkArn } = ConfigManager.cfg;
return new KmsKeyringNode({
generatorKeyId: cmkArn
});
}
var registry = new Registry();
var encryptHist = new Histogram({
name: "traceprompt_encrypt_ms",
help: "Latency of client-side AES-GCM envelope encryption (ms)",
buckets: [0.05, 0.1, 0.25, 0.5, 1, 2, 5],
registers: [registry]
});
new Histogram({
name: "traceprompt_token_count",
help: "Tokens counted per prompt/response",
buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1e3],
registers: [registry]
});
var flushFailures = new Counter({
name: "traceprompt_flush_failures_total",
help: "Number of failed POSTs to the Traceprompt ingest API",
registers: [registry]
});
var queueGauge = new Gauge({
name: "traceprompt_queue_depth",
help: "Number of events currently buffered in memory",
registers: [registry]
});
var logger = null;
function createLogger() {
const cfg = ConfigManager.cfg;
const logLevel = cfg.logLevel || "verbose";
return winston.createLogger({
level: logLevel,
format: winston.format.combine(
winston.format.timestamp({
format: "YYYY-MM-DD HH:mm:ss"
}),
winston.format.errors({ stack: true }),
winston.format.printf(({ level, message, timestamp, stack }) => {
const prefix = `[${timestamp}] [Traceprompt] [${level.toUpperCase()}]`;
if (stack) {
return `${prefix} ${message}
${stack}`;
}
return `${prefix} ${message}`;
})
),
transports: [
new winston.transports.Console({
handleExceptions: true,
handleRejections: true
})
],
exitOnError: false
});
}
function getLogger() {
if (!logger) {
logger = createLogger();
}
return logger;
}
var log = {
error: (message, meta) => getLogger().error(message, meta),
warn: (message, meta) => getLogger().warn(message, meta),
info: (message, meta) => getLogger().info(message, meta),
verbose: (message, meta) => getLogger().verbose(message, meta),
debug: (message, meta) => getLogger().debug(message, meta),
silly: (message, meta) => getLogger().silly(message, meta)
};
// src/crypto/encryptor.ts
var { encrypt, decrypt } = buildClient(
CommitmentPolicy.REQUIRE_ENCRYPT_REQUIRE_DECRYPT
);
async function encryptBuffer(plain) {
const keyring = buildKeyring();
const endTimer = encryptHist.startTimer();
try {
log.info("Encrypting buffer", {
orgId: ConfigManager.cfg.orgId
});
const { result, messageHeader } = await encrypt(keyring, plain, {
encryptionContext: {
org_id: ConfigManager.cfg.orgId
}
});
const bundle = {
ciphertext: Buffer.from(result).toString("base64"),
encryptedDataKey: Buffer.from(
messageHeader.encryptedDataKeys[0].encryptedDataKey
).toString("base64"),
suiteId: messageHeader.suiteId
};
return bundle;
} finally {
endTimer();
}
}
async function decryptBundle(bundle) {
const keyring = buildKeyring();
const { plaintext } = await decrypt(
keyring,
Buffer.from(bundle.ciphertext, "base64")
);
return plaintext;
}
function computeLeaf(data) {
if (data === void 0) {
data = "null";
}
return blake3(data).toString("hex");
}
var encodeFn = null;
var tokenCountHist2 = new Histogram({
name: "traceprompt_tokens_per_string",
help: "Number of tokens counted per string passed to countTokens()",
buckets: [1, 5, 10, 20, 50, 100, 200, 500, 1e3],
registers: [registry]
});
function countTokens(text) {
if (encodeFn) {
const t = encodeFn(text);
tokenCountHist2.observe(t);
return t;
}
if (maybeInitTiktoken()) {
const t = encodeFn(text);
tokenCountHist2.observe(t);
return t;
}
const words = text.trim().split(/\s+/g).length;
const tokens = Math.ceil(words * 1.33);
tokenCountHist2.observe(tokens);
return tokens;
}
var triedTiktoken = false;
function maybeInitTiktoken() {
if (encodeFn || triedTiktoken) return !!encodeFn;
triedTiktoken = true;
try {
const { encoding_for_model } = __require("@dqbd/tiktoken");
const enc = encoding_for_model("cl100k_base");
encodeFn = (s) => enc.encode(s).length;
return true;
} catch {
return false;
}
}
// src/utils/retry.ts
async function retry(fn, attempts = 5, baseDelay = 250, onError) {
let attempt = 0;
while (true) {
try {
attempt++;
return await fn();
} catch (err) {
onError?.(err, attempt);
if (attempt >= attempts) throw err;
const exp = baseDelay * 2 ** (attempt - 1);
const jitter = Math.random() * exp;
await new Promise((res) => setTimeout(res, jitter));
}
}
}
function deterministicStringify(obj) {
if (obj === null || typeof obj !== "object") {
return JSON.stringify(obj);
}
if (Array.isArray(obj)) {
return "[" + obj.map(deterministicStringify).join(",") + "]";
}
const keys = Object.keys(obj).sort();
const pairs = keys.map(
(key) => JSON.stringify(key) + ":" + deterministicStringify(obj[key])
);
return "{" + pairs.join(",") + "}";
}
function generateHmacSignature(payload, secret) {
const payloadString = deterministicStringify(payload);
const secretBuffer = Buffer.from(secret, "base64");
return createHmac("sha256", secretBuffer).update(payloadString).digest("hex");
}
// src/network/transport.ts
var Transport = {
async post(path3, body, retries = 5, headers) {
await sendJson({ path: path3, body, retries, method: "POST", headers });
}
};
async function sendJson(opts) {
const { ingestUrl, apiKey, hmacSecret } = ConfigManager.cfg;
const url = new URL(opts.path, ingestUrl).toString();
const extra = opts.headers ?? {};
let requestBody = opts.body;
if (opts.path === "/v1/ingest" && requestBody) {
const records = requestBody.records;
const hmacSignature = generateHmacSignature(records, hmacSecret);
requestBody = {
...requestBody,
hmacSignature
};
}
log.verbose(`Sending request to ${opts.path}`, {
url,
method: opts.method ?? "POST",
retries: opts.retries ?? 5,
hasBody: !!requestBody,
hasHmacSignature: opts.path === "/v1/ingest"
});
await retry(
async () => {
const res = await fetch$1(url, {
method: opts.method ?? "POST",
headers: {
"content-type": "application/json",
"user-agent": "traceprompt-sdk/0.1.0",
"x-api-key": apiKey,
...extra
},
body: JSON.stringify(requestBody)
});
if (res.status >= 400) {
const msg = await res.text();
const errorMessage = `HTTP ${res.status} - ${msg}`;
if (res.status >= 500) {
log.warn(`Server error (will retry): ${errorMessage}`, {
status: res.status,
url,
response: msg
});
} else if (res.status === 429) {
log.warn(`Rate limited (will retry): ${errorMessage}`, {
status: res.status,
url,
response: msg
});
} else if (res.status === 401 || res.status === 403) {
log.error(`Authentication/authorization error: ${errorMessage}`, {
status: res.status,
url,
response: msg,
hint: "Check your API key and organization permissions"
});
} else {
log.error(`Client error: ${errorMessage}`, {
status: res.status,
url,
response: msg
});
}
throw new Error(`Traceprompt: ${errorMessage}`);
}
log.debug(`Request successful`, {
status: res.status,
url
});
},
opts.retries ?? 5,
250,
(error, attempt) => {
log.verbose(`Request attempt ${attempt} failed, retrying...`, {
error: error instanceof Error ? error.message : String(error),
attempt,
maxRetries: opts.retries ?? 5,
url
});
}
);
log.verbose(`Request completed successfully`, { url });
}
// src/queue/persistentBatcher.ts
function getConfig() {
return ConfigManager.cfg;
}
function getDir() {
const cfg = getConfig();
return path2__default.resolve(cfg.dataDir ?? ".traceprompt", "queue");
}
function getLogPath() {
return path2__default.join(getDir(), "outbox.log");
}
function getMaxRamRecords() {
const cfg = getConfig();
return (cfg.batchSize || 10) * 2;
}
var MAX_FILE_BYTES = 5 * 1024 * 1024;
var bootstrapDone = false;
var pLimitPromise = null;
var closing = false;
async function getPLimit() {
if (!pLimitPromise) {
pLimitPromise = import('p-limit').then((module) => module.default);
}
return pLimitPromise;
}
async function bootstrap() {
if (bootstrapDone) return;
await fs2.mkdir(getDir(), { recursive: true });
bootstrapDone = true;
}
var ring = [];
var head = 0;
var len = 0;
var ringInitialized = false;
function initializeRing() {
if (ringInitialized) return;
const maxRecords = getMaxRamRecords();
ring = new Array(maxRecords);
ringInitialized = true;
}
function ringPush(item) {
initializeRing();
const maxRecords = getMaxRamRecords();
ring[(head + len) % maxRecords] = item;
if (len < maxRecords) {
len++;
return;
}
head = (head + 1) % maxRecords;
}
function ringDrip(n) {
initializeRing();
const maxRecords = getMaxRamRecords();
const out = [];
while (out.length < n && len > 0) {
out.push(ring[head]);
head = (head + 1) % maxRecords;
len--;
}
return out;
}
async function append(item) {
if (closing) {
throw new Error("Traceprompt SDK is shutting down, rejecting new events");
}
await bootstrap();
initializeTimer();
const rec = JSON.stringify({ id: randomUUID(), ...item }) + "\n";
try {
await fs2.appendFile(getLogPath(), rec, "utf8");
log.debug("Record appended to outbox", {
outboxPath: getLogPath(),
recordSize: rec.length
});
} catch (error) {
log.error("Failed to append record to outbox", {
error: error instanceof Error ? error.message : String(error),
outboxPath: getLogPath()
});
throw error;
}
ringPush(item);
queueGauge.set(len);
log.verbose("Record added to ring buffer", {
ringSize: len,
maxRingSize: getMaxRamRecords()
});
try {
const { size } = await fs2.stat(getLogPath());
if (size > MAX_FILE_BYTES) {
log.error("Outbox file size exceeded limit - applying backpressure", {
currentSize: size,
maxSize: MAX_FILE_BYTES,
outboxPath: getLogPath()
});
throw new Error(
"Traceprompt SDK backpressure: local outbox full, ingest unreachable."
);
}
if (size > MAX_FILE_BYTES * 0.8) {
log.warn("Outbox file size approaching limit", {
currentSize: size,
maxSize: MAX_FILE_BYTES,
percentFull: Math.round(size / MAX_FILE_BYTES * 100),
outboxPath: getLogPath()
});
}
} catch (e) {
if (e.code !== "ENOENT") {
log.warn("Failed to check outbox file size", {
error: e instanceof Error ? e.message : String(e),
outboxPath: getLogPath()
});
throw e;
}
}
}
var limit = null;
async function flushOnce() {
await bootstrap();
initializeTimer();
if (!limit) {
const pLimit = await getPLimit();
limit = pLimit(1);
}
return limit(async () => {
const cfg = getConfig();
const batchSize = cfg.batchSize || 10;
let batch = [];
const ringRecords = ringDrip(batchSize);
if (ringRecords.length > 0) {
log.verbose("Using ring buffer records for flush", {
ringRecords: ringRecords.length,
batchSize
});
batch = ringRecords.map((record) => ({
id: randomUUID(),
...record
}));
}
let diskLines = [];
let totalDiskRecords = 0;
if (batch.length < batchSize) {
const needed = batchSize - batch.length;
try {
const rl = createInterface({ input: createReadStream(getLogPath()) });
const diskBatch = [];
for await (const line of rl) {
if (!line.trim()) continue;
if (diskBatch.length < needed) {
diskBatch.push(JSON.parse(line));
}
diskLines.push(line);
totalDiskRecords++;
if (diskBatch.length >= needed && totalDiskRecords >= needed * 2) {
break;
}
}
rl.close();
if (diskBatch.length > 0) {
log.verbose("Supplementing with disk records", {
ringRecords: batch.length,
diskRecords: diskBatch.length,
totalDiskRecordsRead: totalDiskRecords
});
batch.push(...diskBatch);
}
} catch (error) {
if (error.code === "ENOENT") {
if (batch.length === 0) {
log.debug("No records in ring buffer or disk, nothing to flush");
return;
}
} else {
log.warn("Error reading outbox file", {
error: error.message,
outboxPath: getLogPath()
});
}
}
} else {
try {
const rl = createInterface({ input: createReadStream(getLogPath()) });
for await (const line of rl) {
if (line.trim()) {
diskLines.push(line);
totalDiskRecords++;
}
}
rl.close();
} catch (error) {
if (error.code !== "ENOENT") {
log.warn("Error counting disk records", {
error: error.message,
outboxPath: getLogPath()
});
}
}
}
if (batch.length === 0) {
log.debug("No records available for flush");
return;
}
const totalPending = totalDiskRecords + (ringRecords.length > batch.length ? 0 : len);
queueGauge.set(totalPending);
log.info("Starting batch flush", {
batchSize: batch.length,
fromRingBuffer: Math.min(ringRecords.length, batch.length),
fromDisk: Math.max(0, batch.length - ringRecords.length),
totalPendingAfterFlush: totalPending - batch.length,
outboxPath: getLogPath()
});
const body = {
records: batch.map(({ payload, leafHash }) => ({ payload, leafHash }))
};
try {
await Transport.post("/v1/ingest", body, {
"Idempotency-Key": batch[0].leafHash
});
if (totalDiskRecords > 0) {
const diskRecordsUsed = Math.max(0, batch.length - ringRecords.length);
if (diskRecordsUsed > 0) {
let allDiskLines;
if (diskLines.length === totalDiskRecords) {
allDiskLines = diskLines;
} else {
try {
const text = await fs2.readFile(getLogPath(), "utf8");
allDiskLines = text.trim().split("\n").filter(Boolean);
} catch (error) {
log.error("Failed to read outbox file for cleanup", {
error: error instanceof Error ? error.message : String(error),
outboxPath: getLogPath()
});
return;
}
}
const remaining = allDiskLines.slice(diskRecordsUsed);
if (remaining.length > 0) {
await fs2.writeFile(getLogPath(), remaining.join("\n") + "\n");
log.info("Batch flush successful, updated outbox", {
flushedRecords: batch.length,
fromRingBuffer: ringRecords.length,
fromDisk: diskRecordsUsed,
remainingOnDisk: remaining.length
});
queueGauge.set(totalPending - batch.length);
} else {
await fs2.writeFile(getLogPath(), "");
log.info("Batch flush successful, outbox cleared", {
flushedRecords: batch.length,
fromRingBuffer: ringRecords.length,
fromDisk: diskRecordsUsed
});
queueGauge.set(totalPending - batch.length);
}
} else {
log.info("Batch flush successful, used only ring buffer", {
flushedRecords: batch.length,
diskRecordsRemaining: totalDiskRecords
});
queueGauge.set(totalPending - batch.length);
}
} else {
log.info("Batch flush successful, used only ring buffer", {
flushedRecords: batch.length
});
queueGauge.set(totalPending - batch.length);
}
} catch (e) {
const errorMessage = e instanceof Error ? e.message : String(e);
if (ringRecords.length > 0) {
log.warn("Flush failed, restoring ring buffer records to disk", {
ringRecordsToRestore: ringRecords.length
});
const ringRecordsAsLines = ringRecords.map(
(record) => JSON.stringify({ id: randomUUID(), ...record })
);
try {
let existingContent = "";
try {
existingContent = await fs2.readFile(getLogPath(), "utf8");
} catch {
}
const allLines = [...ringRecordsAsLines];
if (existingContent.trim()) {
allLines.push(
...existingContent.trim().split("\n").filter(Boolean)
);
}
await fs2.writeFile(getLogPath(), allLines.join("\n") + "\n");
} catch (restoreError) {
log.error("Failed to restore ring buffer records to disk", {
error: restoreError instanceof Error ? restoreError.message : String(restoreError),
lostRecords: ringRecords.length
});
}
}
if (errorMessage.includes("HTTP 5")) {
log.warn("Server error during batch flush, will retry", {
error: errorMessage,
batchSize: batch.length,
totalPending
});
} else if (errorMessage.includes("HTTP 429")) {
log.warn("Rate limited during batch flush, will retry", {
error: errorMessage,
batchSize: batch.length,
totalPending
});
} else if (errorMessage.includes("HTTP 4")) {
log.error("Client error during batch flush", {
error: errorMessage,
batchSize: batch.length,
totalPending,
hint: "Check API configuration and request format"
});
} else {
log.error("Network error during batch flush", {
error: errorMessage,
batchSize: batch.length,
totalPending
});
}
flushFailures.inc();
throw e;
}
});
}
var timerInitialized = false;
var flushTimer = null;
function initializeTimer() {
if (timerInitialized) return;
timerInitialized = true;
const cfg = getConfig();
log.info("Initializing periodic flush timer", {
flushIntervalMs: cfg.flushIntervalMs
});
flushTimer = setInterval(
() => flushOnce().catch((error) => {
log.verbose("Periodic flush failed, will retry on next interval", {
error: error instanceof Error ? error.message : String(error),
nextRetryIn: cfg.flushIntervalMs
});
}),
cfg.flushIntervalMs
);
flushTimer.unref();
}
async function flushWithRetry(opts) {
for (let attempt = 1; attempt <= opts.maxRetries; attempt++) {
try {
await flushOnce();
return;
} catch (error) {
if (attempt === opts.maxRetries) throw error;
const delayMs = Math.min(500 * Math.pow(2, attempt - 1), 4e3);
log.debug("Flush attempt failed, retrying", {
attempt,
maxRetries: opts.maxRetries,
delayMs,
error: error instanceof Error ? error.message : String(error)
});
await new Promise((resolve2) => setTimeout(resolve2, delayMs));
}
}
}
async function drainOutboxWithRetry(opts) {
const startTime = Date.now();
let attempt = 0;
while (Date.now() - startTime < opts.maxTimeoutMs) {
attempt++;
try {
const outboxContent = await fs2.readFile(getLogPath(), "utf8").catch(() => "");
if (!outboxContent.trim()) {
log.info("Outbox is empty, drain complete");
return;
}
await flushWithRetry({ maxRetries: opts.maxRetries });
} catch (error) {
log.warn("Outbox drain attempt failed", {
attempt,
error: error instanceof Error ? error.message : String(error)
});
const delayMs = Math.min(500 * Math.pow(2, attempt - 1), 4e3);
await new Promise((resolve2) => setTimeout(resolve2, delayMs));
}
}
throw new Error(`Outbox drain timed out after ${opts.maxTimeoutMs}ms`);
}
async function gracefulShutdown() {
log.info("Starting graceful shutdown");
closing = true;
if (flushTimer) {
clearInterval(flushTimer);
log.debug("Cleared periodic flush timer");
}
log.info("Flushing in-memory ring buffer");
await flushWithRetry({ maxRetries: 3 });
log.info("Draining persistent outbox");
await drainOutboxWithRetry({ maxRetries: 5, maxTimeoutMs: 3e4 });
log.info("Graceful shutdown completed successfully");
}
process.on("SIGTERM", async () => {
try {
await gracefulShutdown();
process.exit(0);
} catch (error) {
log.error("Graceful shutdown failed", {
error: error instanceof Error ? error.message : String(error)
});
flushFailures.inc();
process.exit(1);
}
});
process.on("SIGINT", async () => {
try {
await gracefulShutdown();
process.exit(0);
} catch (error) {
log.error("Graceful shutdown failed", {
error: error instanceof Error ? error.message : String(error)
});
flushFailures.inc();
process.exit(1);
}
});
var PersistentBatcher = {
enqueue: append,
flush: flushOnce,
gracefulShutdown
};
// src/piiDetector/preprocessor.ts
function preprocess(raw) {
const norm = raw.normalize("NFC");
let cleaned = "";
const idx = [];
for (let i = 0; i < norm.length; i++) {
const ch = norm[i];
if (/\s/.test(ch)) {
if (cleaned[cleaned.length - 1] !== " ") {
cleaned += " ";
idx.push(i);
}
} else {
cleaned += ch;
idx.push(i);
}
}
const map = {
origPos(n) {
return idx[n] ?? n;
}
};
return { text: cleaned, map };
}
// src/piiDetector/utils/luhn.ts
function luhnValid(num) {
const digits = num.replace(/\D+/g, "").split("").map(Number).reverse();
let sum = 0;
for (let i = 0; i < digits.length; i++) {
let n = digits[i];
if (i % 2 === 1) {
n *= 2;
if (n > 9) n -= 9;
}
sum += n;
}
return sum % 10 === 0;
}
// src/piiDetector/utils/aba.ts
function abaValid(routing) {
if (!/^\d{9}$/.test(routing)) return false;
const weights = [3, 7, 1];
let sum = 0;
for (let i = 0; i < 9; i++) {
sum += +routing[i] * weights[i % 3];
}
return sum % 10 === 0;
}
// src/piiDetector/recognizers/regexRecognizer.ts
var EMAIL_RE = /\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/gu;
var PHONE_RE = /(?<!account\s)(?<!sort\s?code\s)(?:\+?\d{1,3}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]?)?(?:\(\d{2,4}\)|\d{2,4})[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]?\d{3,4}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]?\d{3,4}(?!\s+\d{4})\b/gu;
var SSN_RE = /\b\d{3}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015-]\d{2}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015-]\d{4}\b/gu;
var IPV4_RE = /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d?\d)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d?\d)\b/gu;
var VISA_RE = /\b4\d{12}(\d{3})?\b/gu;
var AMEX_RE = /\b3[47]\d{13}\b/gu;
var CC_GROUP_RE = /\b(?:\d{4}[-\s]?){3}\d{4}\b/gu;
var PAN_GENERIC_RE = /\b\d{4}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]\d{4}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]\d{4}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]\d{3,7}\b/gu;
var PAN_MASKED_RE = /\b\d{4}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]?\d{2}\*{2}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]?\*{4}[\u002D\u2010\u2011\u2012\u2013\u2014\u2015\s-]?\d{4}\b/gu;
var IBAN_RE = /\b[A-Z]{2}\d{2}(?:[ \dA-Z]){11,30}\b/gu;
var SWIFT_RE = /\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}(?:[A-Z0-9]{3})?\b/gu;
var ROUTING_RE = /\b\d{3}[\s-]?\d{6}(?=\s*(?:routing|aba))|(?:routing|aba)\s+\d{3}[\s-]?\d{6}\b/giu;
var SORT_ACC_RE = /\b\d{2}-\d{2}-\d{2}(?:\s+\w+){0,4}?\s*\d{8}\b/gi;
var ACCT_RE = /(?:acct|account)\s+\d{8,12}\b/gi;
var POSTCODE_RE = /\b[A-Z]{1,2}\d{1,2}[A-Z]?\s?\d[A-Z]{2}\b/gi;
var NINO_RE = /\b[ABCEGHJ-PRSTW-ZQ]{2}\d{6}[A-D]\b/gi;
var PASSPORT_RE = /\b(?:[A-Z]{1,2}\d{7}|[A-Z]\d{8}|[A-Z0-9]{9}|\d{9})\b/gu;
var DL_RE = /\b\d{8,15}[A-Z]{0,2}\b/gu;
var DL_CA_RE = /\bD\d{7}\b/gu;
var MAC_RE = /\b(?:[0-9A-F]{2}[:-]){5}[0-9A-F]{2}\b/gi;
var IMEI_RE = /\b\d{15}\b/g;
var MRN_RE = /\b(?:MRN|Patient\s*(?:ID|No\.?))\s*#?\s*\d{6,10}\b/gi;
var DOB_RE = /\b(?:DOB|Date\s*of\s*birth|D\.O\.B\.?)\s*[:\-]?\s*(\d{1,2}[\/\-]\d{1,2}[\/\-]\d{2,4})\b/gi;
var INS_RE = /\b(?:policy|member|insurance)\s*(?:id|no|number|#)\s*[:\-]?\s*[A-Z0-9]{3,}(?:[-–][A-Z0-9]{2,})?(?=\b|[,.;])(?!\s*card)\b/gi;
function push(out, type, m, map, conf = 1) {
out.push({
type,
start: map.origPos(m.index),
end: map.origPos(m.index + m[0].length),
text: m[0],
confidence: conf,
source: "regex",
risk: "general"
});
}
function overlapsExisting(list, s, e) {
return list.some((x) => !(e <= x.start || s >= x.end));
}
var regexRecognizer = {
id: "regex",
detect(text, map) {
const out = [];
for (const m of text.matchAll(EMAIL_RE)) push(out, "EMAIL", m, map);
for (const m of text.matchAll(SSN_RE)) push(out, "SSN", m, map, 0.98);
for (const m of text.matchAll(IPV4_RE)) push(out, "IP", m, map, 0.95);
for (const m of text.matchAll(ROUTING_RE)) {
const contextWindow = text.slice(
Math.max(0, m.index - 10),
m.index + m[0].length + 15
);
if (/routing|aba/i.test(contextWindow)) {
if (abaValid(m[0])) push(out, "US_ROUTING", m, map, 0.9);
}
}
for (const m of text.matchAll(VISA_RE))
if (luhnValid(m[0])) push(out, "CREDIT_CARD", m, map, 0.95);
for (const m of text.matchAll(AMEX_RE))
if (luhnValid(m[0])) push(out, "CREDIT_CARD", m, map, 0.95);
for (const m of text.matchAll(CC_GROUP_RE)) {
const digits = m[0].replace(/\D+/g, "");
if (digits.length === 16 && luhnValid(digits)) {
push(out, "CREDIT_CARD", m, map, 0.95);
}
}
for (const m of text.matchAll(PAN_GENERIC_RE)) {
const digits = m[0].replace(/\D+/g, "");
if (digits.length < 13 || digits.length > 19) continue;
if (/[\*x]/i.test(m[0])) continue;
if (luhnValid(digits)) {
if (!overlapsExisting(
out,
map.origPos(m.index),
map.origPos(m.index + m[0].length)
))
push(out, "CREDIT_CARD", m, map, 0.95);
}
}
for (const m of text.matchAll(PAN_MASKED_RE)) {
if (!overlapsExisting(
out,
map.origPos(m.index),
map.origPos(m.index + m[0].length)
))
push(out, "CREDIT_CARD_PARTIAL", m, map, 0.7);
}
for (const m of text.matchAll(IBAN_RE)) push(out, "IBAN", m, map, 0.9);
for (const m of text.matchAll(SWIFT_RE))
push(out, "SWIFT_BIC", m, map, 0.9);
for (const m of text.matchAll(SORT_ACC_RE))
push(out, "UK_BANK_ACCT", m, map, 0.9);
for (const m of text.matchAll(NINO_RE)) push(out, "NINO", m, map, 0.9);
for (const m of text.matchAll(ROUTING_RE))
push(out, "US_ROUTING", m, map, 0.9);
for (const m of text.matchAll(ACCT_RE))
push(out, "BANK_ACCOUNT", m, map, 0.85);
for (const m of text.matchAll(MAC_RE)) {
const ctx = text.slice(
Math.max(0, m.index - 10),
m.index + m[0].length + 10
);
if (/mac|address|ethernet|wifi|device/i.test(ctx)) {
push(out, "MAC_ADDRESS", m, map, 0.85);
}
}
for (const m of text.matchAll(IMEI_RE)) {
const ctx = text.slice(
Math.max(0, m.index - 10),
m.index + m[0].length + 10
);
if (/imei|device|phone|mobile/i.test(ctx)) {
push(out, "IMEI", m, map, 0.9);
}
}
for (const m of text.matchAll(PHONE_RE)) {
const digits = m[0].replace(/\D+/g, "");
if (digits.length < 9 || digits.length > 12) continue;
if (overlapsExisting(
out,
map.origPos(m.index),
map.origPos(m.index + m[0].length)
))
continue;
const pre = text.slice(Math.max(0, m.index - 15), m.index).toLowerCase();
if (/aba\s|acct\s|routing\s|checking\s|sin\s|ein\s|insee\s|dni\s|nhs\s|mbi\s|npi\s|svnr\s|ohip\s|medicare\s|mac\s|imei\s|member\s|plan\s|policy\s|insurance\s/.test(
pre
))
continue;
push(out, "PHONE", m, map, 0.9);
}
for (const m of text.matchAll(PASSPORT_RE)) {
const ctx = text.slice(
Math.max(0, m.index - 15),
m.index + m[0].length + 5
);
if (/passport|passeport|travel|document|P<|pièce/i.test(ctx)) {
push(out, "PASSPORT", m, map, 0.9);
}
}
for (const m of text.matchAll(DL_RE)) {
const ctx = text.slice(Math.max(0, m.index - 12), m.index);
if (/\bDL\b|driver|licen[cs]e/i.test(ctx))
push(out, "DRIVER_LICENSE", m, map, 0.8);
}
for (const m of text.matchAll(DL_CA_RE)) {
const ctx = text.slice(Math.max(0, m.index - 10), m.index);
if (/\bDL\b|driver/i.test(ctx)) push(out, "DRIVER_LICENSE", m, map, 0.8);
}
for (const m of text.matchAll(MRN_RE)) push(out, "MEDICAL_ID", m, map, 0.9);
for (const m of text.matchAll(DOB_RE)) push(out, "DOB", m, map, 0.85);
for (const m of text.matchAll(INS_RE))
push(out, "INSURANCE_ID", m, map, 0.8);
for (const m of text.matchAll(POSTCODE_RE))
push(out, "POSTCODE", m, map, 0.7);
return out;
}
};
var nlp = winkNLP(model);
var nerRecognizer = {
id: "wink-ner",
detect(text, map) {
const doc = nlp.readDoc(text);
return doc.entities().out(its.detail).filter((e) => e.type === "PERSON" || e.type === "LOCATION").map((e) => ({
type: e.type === "PERSON" ? "FULL_NAME" : "ADDRESS",
start: map.origPos(e.start),
end: map.origPos(e.end),
text: text.slice(e.start, e.end),
confidence: 0.7,
source: this.id,
risk: "sensitive"
}));
}
};
// src/piiDetector/idPatterns.json
var idPatterns_default = [
{
type: "DNI",
regex: "\\b\\d{8}[A-Z]\\b",
context: ["dni", "national"],
validate: "dniCheck"
},
{
type: "INSEE_SSN",
regex: "\\b[12]\\s?\\d{2}\\s?\\d{2}\\s?\\d{2}\\s?\\d{3}\\s?\\d{3}\\b",
context: ["social security", "insee", "num\xE9ro"],
validate: "inseeCheck"
},
{
type: "EU_NATIONAL_ID",
regex: "\\b\\d{6}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{3}\\.\\d{2}\\b",
context: [
"eid",
"rijksregisternummer",
"national",
"belgian",
"netherlands"
],
validate: "beEidCheck"
},
{
type: "EIN",
regex: "\\b\\d{2}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{7}\\b",
context: ["ein", "tax id", "tin", "employer", "federal"],
validate: null
},
{
type: "UK_DL",
regex: "\\b[A-Z]{5}\\d{6}[A-Z0-9]{5}\\b",
context: ["driving licence", "driver licence", "dvla"],
validate: null
},
{
type: "ON_DL",
regex: "\\b[A-Z]\\d{4}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015-]\\d{5}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015-]\\d{5}\\b",
context: ["driver licence", "licence", "ontario"],
validate: null
},
{
type: "CA_SIN",
regex: "\\b\\d{3}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{3}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{3}\\b",
context: ["sin", "social insurance"],
validate: null
},
{
type: "PERSONNUMMER",
regex: "\\b\\d{6}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{4}\\b",
context: ["personnummer"],
validate: "luhn10"
},
{
type: "NHS_NUMBER",
regex: "\\b\\d{3}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{3}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{4}\\b",
context: ["nhs number", "chi"],
validate: "nhsMod11"
},
{
type: "MBI",
regex: "\\b[0-9][A-HJ-NP-TV-Z][0-9A-HJ-NP-TV-Z][0-9][A-HJ-NP-TV-Z]{2}[0-9][A-HJ-NP-TV-Z]{2}[0-9]{2}\\b",
context: ["medicare", "mbi"],
validate: null
},
{
type: "NPI",
regex: "\\b\\d{10}\\b",
context: ["npi", "provider"],
validate: "luhn10"
},
{
type: "ON_HEALTH",
regex: "\\b\\d{10}\\b",
context: ["health card", "ohip"],
validate: "luhn10"
},
{
type: "SVNR",
regex: "\\b\\d{2}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{6}[\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?[A-Z][\\u002D\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\s-]?\\d{3}\\b",
context: ["svnr", "versicherungsnummer"],
validate: "svnrMod11"
}
];
// src/piiDetector/utils/checksums.ts
function dniCheck(num) {
const letters = "TRWAGMYFPDXBNJZSQVHLCKE";
const n = parseInt(num.slice(0, 8), 10);
return num[8] === letters[n % 23];
}
function inseeCheck(num) {
const clean = num.replace(/\s/g, "");
const key = parseInt(clean.slice(-2), 10);
const body = parseInt(clean.slice(0, -2), 10);
return 97 - body % 97 === key;
}
function beEidCheck(num) {
const clean = num.replace(/[-.]/g, "");
const base = parseInt(clean.slice(0, 9), 10);
const chk = parseInt(clean.slice(9), 10);
return 97 - base % 97 === chk;
}
function luhn10(num) {
const clean = num.replace(/\D+/g, "");
const digits = clean.split("").map(Number).reverse();
let sum = 0;
for (let i = 0; i < digits.length; i++) {
let n = digits[i];
if (i % 2 === 1) {
n *= 2;
if (n > 9) n -= 9;
}
sum += n;
}
return sum % 10 === 0;
}
function nhsMod11(num) {
const digits = num.replace(/\D/g, "").slice(0, 9);
if (digits.length !== 9) return false;
const sum = [...digits].reduce((acc, d, i) => acc + Number(d) * (10 - i), 0);
const chk = 11 - sum % 11;
const expectedCheck = chk === 11 ? 0 : chk;
return expectedCheck === Number(num.replace(/\D/g, "")[9]);
}
function svnrMod11(num) {
const digits = num.replace(/\D/g, "").slice(0, 10);
if (digits.length !== 10) return false;
let sum = 0;
for (let i = 0; i < 10; i++) {
sum += Number(digits[i]) * (2 + i);
}
const chk = sum % 11;
return chk === Number(num.replace(/\D/g, "").slice(-1));
}
function imeiLuhn(num) {
return luhn10(num);
}
function npiLuhn(num) {
return luhn10(num);
}
// src/piiDetector/recognizers/idRecognizer.ts
var compiled = idPatterns_default.map((rule) => ({
...rule,
re: new RegExp(rule.regex, "gu")
}));
var validators = {
dniCheck,
inseeCheck,
beEidCheck,
luhn10,
nhsMod11,
svnrMod11,
imeiLuhn,
npiLuhn
};
var idRecognizer = {
id: "nat-id",
detect(text, map) {
const out = [];
for (const rule of compiled) {
for (const m of text.matchAll(rule.re)) {
if (rule.context) {
const contextPattern = new RegExp(rule.context.join("|"), "i");
const contextWindow = text.slice(
Math.max(0, m.index - 25),
m.index + m[0].length + 25
);
if (!contextPattern.test(contextWindow)) continue;
if (rule.type === "EIN" && /routing|aba|acct|checking|bank/i.test(contextWindow))
continue;
}
let confidence = 0.9;
if (rule.validate) {
const validator = validators[rule.validate];
const cleanNum = m[0].replace(
/[\s\u002D\u2010\u2011\u2012\u2013\u2014\u2015.-/]/g,
""
);
if (validator && !validator(cleanNum)) {
confidence = 0.7;
}
}
out.push({
type: rule.type,
start: map.origPos(m.index),
end: map.origPos(m.index + m[0].length),
text: m[0],
confidence,
source: this.id,
risk: "critical"
});
}
}
return out;
}
};
function findTextPositions(text, searchText) {
const positions = [];
let index = 0;
while (index < text.length) {
const foundIndex = text.indexOf(searchText, index);
if (foundIndex === -1) break;
positions.push({
start: foundIndex,
end: foundIndex + searchText.length
});
index = foundIndex + 1;
}
return positions;
}
var compromiseRecognizer = {
id: "compromise",
detect(text, map) {
const entities = [];
const doc = nlp2(text);
const people = doc.people().json();
for (const person of people) {
const personDoc = nlp2(person.text);
if (personDoc.has("#Verb") || personDoc.has("#Adjective") || personDoc.has("#Adverb")) {
continue;
}
const isInstructionalTerm = personDoc.has("#Imperative") || /^(emergency|health|monitor|symptoms|seek|call|avoid|inform|positioning|stay|sit)$/i.test(
person.text.trim()
);
if (isInstructionalTerm) continue;
const positions = findTextPositions(text, person.text);
for (const pos of positions) {
entities.push({
type: person.text.includes(" ") ? "FULL_NAME" : "FIRST_NAME",
start: map.origPos(pos.start),
end: map.origPos(pos.end),
text: person.text,
confidence: 0.85,
// High confidence from compromise.js people detection
source: this.id,
risk: "sensitive"
});
}
}
const organizations = doc.organizations().json();
for (const org of organizations) {
if (/^\([^)]*\)$/.test(org.text) || // Skip text in parentheses like "(e.g., 911 in the U.S.)"
/^(U\.S\.|UK|USA|Canada|Europe)[\)\.]?$/i.test(org.text.trim())) {
continue;
}
const positions = findTextPositions(text, org.text);
for (const pos of positions) {
entities.push({
type: "FULL_NAME",
// We'll mark these differently with source
start: map.origPos(pos.start),
end: map.origPos(pos.end),
text: org.text,
confidence: 0.75,
source: "compromise-org",
// Different source to identify as business
risk: "sensitive"
});
}
}
return entities;
}
};
function isLikelyNotAName(text, fullContext) {
try {
const doc = nlp2(fullContext);
const wordInContext = doc.match(text);
if (!wordInContext.found) return false;
if (wordInContext.has("#Verb")) return true;
if (wordInContext.has("#Adjective")) return true;
if (wordInContext.has("#Adverb")) return true;
if (wordInContext.has("#Preposition")) return true;
if (wordInContext.has("#Conjunction")) return true;
if (wordInContext.has("#Determiner")) return true;
if (wordInContext.has("#Modal")) return true;
if (wordInContext.has("#Auxiliary")) return true;
if (wordInContext.has("#Imperative")) return true;
if (wordInContext.has("#CommonNoun")) return true;
if (wordInContext.has("#Gerund")) return true;
const medicalTerms = /^(emergency|health|medical|symptoms|monitor|treatment|therapy|diagnosis|prescription|medication|hospital|clinic|doctor|patient|procedure|surgery|examination|consultation|ambulance|paramedic|nurse|vital|condition|disease|illness|infection|virus|bacteria|fever|pain|breathing|respiratory|cardiac|blood|pressure|heart|lung|brain|liver|kidney|diabetes|cancer|stroke|seizure|allergy|injection|vaccine|test|scan|xray|mri|ultrasound|laboratory|specimen|sample|result|report|chart|record)$/i;
if (medicalTerms.test(text.trim())) return true;
const instructionalTerms = /^(seek|call|avoid|inform|position|stay|sit|take|give|provide|contact|reach|report|listen|watch|observe|check|verify|confirm|ensure|prevent|reduce|increase|decrease|improve|maintain|continue|stop|start|begin|end|finish|complete|follow|perform|execute|implement|apply|use|utilize|operate|handle|manage|control|direct|guide|assist|help|support|advise|recommend|suggest|indicate|show|demonstrate|explain|describe|discuss|review|examine|evaluate|assess|analyze|consider|determine|decide|choose|select|prefer|require|need|want|wish|hope|expect|anticipate|prepare|plan|organize|arrange|schedule|coordinate|communicate|inform|notify|alert|warn|remind|update|progress|develop|create|establish|build|construct)$/i;
if (instructionalTerms.test(text.trim())) return true;
const contextAroundWord = fullContext.substring(
Math.max(0, fullContext.indexOf(text) - 20),
fullContext.indexOf(text) + text.length + 20
);
if (/[0-9]+\.\s*\*?\*?/.test(contextAroundWord) || /#{1,6}\s/.test(contextAroundWord) || /\*\*.*\*\*/.test(contextAroundWord)) {
return true;
}
const docData = wordInContext.json();
if (docData && docData.length > 0 && docData[0].terms) {
const tags = docData[0].terms[0].tags || [];
const nonNameTags = [
"Verb",
"Adjective",
"Adverb",
"Preposition",
"Conjunction",
"Determiner",
"Modal",
"Auxiliary",
"Imperative",
"CommonNoun",
"Gerund",
"Infinitive",
"PastTense",
"PresentTense",
"FutureTense",
"Comparative",
"Superlative",
"Possessive",
"Plural"
];
return tags.some((tag) => nonNameTags.includes(tag));
}
return false;
} catch (error) {
console.warn("Error in isLikelyNotAName:", error);
return false;
}
}
// src/piiDetector/recognizers/nameRecognizer.ts
var nlp3 = winkNLP(model);
var NAME_PREFIXES = /* @__PURE__ */ new Set([
"mr",
"mrs",
"ms",
"miss",
"dr",
"prof",
"professor",
"sir",
"lady",
"lord",
"rev",
"father",
"sister",
"brother",
"captain",
"major",
"colonel",
"general",
// Additional titles from feedback
"sen",
"rep",
"judge",
"officer",
"sgt",
"st",
// Saint
"detective",
"deputy",
"chief"
]);
var NAME_SUFFIXES = /* @__PURE__ */ new Set([
"jr",
"sr",
"ii",
"iii",
"iv",
"phd",
"md",
"esq",
"cpa",
"rn"
]);
var buildStopSet = (words) => new Set(words.map((w) => w.toLowerCase()));
var COMMON_WORDS_RAW = [
"about",
"above",
"after",
"again",
"against",
"all",
"am",
"an",
"and",
"any",
"are",
"as",
"at",
"be",
"because",
"been",
"before",
"being",
"below",
"between",
"both",
"but",
"by",
"could",
"did",
"do",
"does",
"doing",
"down",
"during",
"each",
"few",
"for",
"from",
"further",
"had",
"has",
"have",
"having",
"he",
"her",
"here",
"hers",
"herself",
"him",
"himself",
"his",
"how",
"if",
"in",
"into",
"is",
"it",
"its",
"itself",
"let",
"me",
"more",
"most",
"my",
"myself",
"no",
"nor",
"not",
"of",
"off",
"on",
"once",
"only",
"or",
"other",
"ought",
"our",
"ours",
"ourselves",
"out",
"over",
"own",
"same",
"she",
"should",
"so",
"some",
"such",
"than",
"that",
"the",
"their",
"theirs",
"them",
"themselves",
"then",
"there",
"these",
"they",
"this",
"those",
"through",
"to",
"too",
"under",
"until",
"up",
"very",
"was",
"we",
"were",
"what",
"when",
"where",
"which",
"while",
"who",
"whom",
"why",
"with",
"would",
"you",
"your",
"yours",
"yourself",
"yourselves",
// Days and months
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
"january",
"february",
"march",
"april",
"may",
"june",
"july",
"august",
"september",
"october",
"november",
"december",
// Common false positives
"email",
"phone",
"address",
"contact",
"company",
"team",
"group",
"department",
"office",
"building",
"hello",
"thanks",
"please",
"regards",
"best",
"dear",
"sincerely",
"yours",
"welcome",
"goodbye",
"meeting",
"call",
"conference",
"session",
"appointment",
"interview",
"discussion",
// Additional words commonly flagged as false positives
"close",
"update",
"delete",
"create",
"remove",
"add",
"set",
"get",
"help",