UNPKG

traceprompt-node

Version:

Client-side encrypted, audit-ready logging for LLM applications

133 lines (114 loc) 4.34 kB
import { performance } from "node:perf_hooks"; import { initTracePrompt as initCfgAsync, ConfigManager } from "./config"; import { encryptBuffer } from "./crypto/encryptor"; import { computeLeaf } from "./crypto/hasher"; import { countTokens } from "./utils/tokenCounter"; import { PersistentBatcher as Batcher } from "./queue/persistentBatcher"; const stringify = require("json-stable-stringify") as (v: any) => string; import type { TracePromptInit, WrapOpts } from "./types"; import { registry } from "./metrics"; import { Histogram } from "prom-client"; import { analyzePiiInPromptResponse } from "./utils/piiDetector"; // Helper function to extract actual content from LLM responses function extractResponseContent(result: any): string { if (typeof result === "string") { return result; } // OpenAI chat completion format if (result?.choices?.[0]?.message?.content) { return result.choices[0].message.content; } // Anthropic format if (result?.content?.[0]?.text) { return result.content[0].text; } // Generic content field if (result?.content && typeof result.content === "string") { return result.content; } // Fallback to stringification only if no recognizable content structure return JSON.stringify(result); } const wrapperLatencyHist = new Histogram({ name: "traceprompt_llm_wrapper_latency_ms", help: "End‑to‑end latency from prompt send to response receive in the SDK wrapper (ms)", buckets: [50, 100, 250, 500, 1000, 2000, 5000], registers: [registry], }); // Map PII detector risk levels to API expected values function mapRiskLevelToApiEnum( riskLevel: "general" | "sensitive" | "critical" ): "low" | "medium" | "high" | "critical" { switch (riskLevel) { case "general": return "low"; case "sensitive": return "medium"; case "critical": return "critical"; default: return "low"; } } export async function initTracePrompt( cfg?: Partial<TracePromptInit> ): Promise<void> { await initCfgAsync(cfg); } export function wrapLLM<P extends Record<string, any>, R>( originalFn: (prompt: string, params?: P) => Promise<R>, meta: WrapOpts ): (prompt: string, params?: P) => Promise<R> { const staticMeta = ConfigManager.cfg.staticMeta; return async function wrapped(prompt: string, params?: P): Promise<R> { const t0 = performance.now(); const result = await originalFn(prompt, params); const t1 = performance.now(); wrapperLatencyHist.observe(t1 - t0); // Extract actual content from LLM response instead of stringifying entire object const responseText = extractResponseContent(result); const piiAnalysis = await analyzePiiInPromptResponse(prompt, responseText); const plaintextJson = JSON.stringify({ prompt, response: result, }); const enc = await encryptBuffer(Buffer.from(plaintextJson, "utf8")); const payload = { ...staticMeta, orgId: ConfigManager.cfg.orgId, modelVendor: meta.modelVendor, modelName: meta.modelName, userId: meta.userId, ts_client: new Date().toISOString(), latency_ms: +(t1 - t0).toFixed(2), prompt_tokens: countTokens(prompt), response_tokens: countTokens(responseText), pii_detected: piiAnalysis.overallPiiDetected, pii_types: piiAnalysis.allPiiTypes, pii_risk_level: piiAnalysis.prompt.piiDetected || piiAnalysis.response.piiDetected ? piiAnalysis.prompt.riskLevel === "critical" || piiAnalysis.response.riskLevel === "critical" ? "critical" : piiAnalysis.prompt.riskLevel === "sensitive" || piiAnalysis.response.riskLevel === "sensitive" ? "medium" : "low" : "low", prompt_pii_detected: piiAnalysis.prompt.piiDetected, prompt_pii_types: piiAnalysis.prompt.piiTypes, prompt_pii_risk_level: mapRiskLevelToApiEnum( piiAnalysis.prompt.riskLevel ), response_pii_detected: piiAnalysis.response.piiDetected, response_pii_types: piiAnalysis.response.piiTypes, response_pii_risk_level: mapRiskLevelToApiEnum( piiAnalysis.response.riskLevel ), enc, }; const leafHash = computeLeaf(stringify(payload)); Batcher.enqueue({ payload, leafHash }); return result; }; }