@dooor-ai/toolkit
Version:
Guards, Evals & Observability for AI applications - works seamlessly with LangChain/LangGraph
634 lines (633 loc) âĸ 29.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.DOOORCallbackHandler = void 0;
const base_1 = require("@langchain/core/callbacks/base");
const types_1 = require("./types");
const uuid_1 = require("uuid");
/**
* Callback handler that implements guards, evals, and observability
*/
class DOOORCallbackHandler extends base_1.BaseCallbackHandler {
constructor(config) {
super();
this.name = "DOOORCallbackHandler";
this.guardsResults = [];
this.toolCalls = [];
this.pendingToolCalls = [];
this.toolCallsByTrace = new Map();
this.traceSequence = 0;
console.log("[DOOORCallbackHandler] Constructor called with config:", {
guardsCount: config.guards?.length || 0,
evalsCount: config.evals?.length || 0,
outputGuardsCount: config.outputGuards?.length || 0,
hasObservability: !!config.observability,
evalMode: config.evalMode,
evalSampleRate: config.evalSampleRate,
guardFailureMode: config.guardFailureMode,
});
this.guards = config.guards ?? [];
this.evals = config.evals ?? [];
this.outputGuards = config.outputGuards ?? [];
this.observability = config.observability;
this.evalMode = config.evalMode ?? "async";
this.evalSampleRate = config.evalSampleRate ?? 1.0;
this.guardFailureMode = config.guardFailureMode ?? "throw";
this.defaultModelName = config.modelName ?? "unknown";
console.log("[DOOORCallbackHandler] Constructor completed, handler ready");
}
/**
* Called before LLM starts (LangChain lifecycle hook)
*/
async handleLLMStart(llm, prompts, runId, _parentRunId, _extraParams, _tags, _metadata, _runName) {
await this.processLLMStart(llm, prompts, runId);
}
/**
* Backwards compatibility with LangChain < 0.3 which calls onLLMStart
*/
async onLLMStart(llm, prompts, runId) {
await this.processLLMStart(llm, prompts, runId);
}
async processLLMStart(llm, prompts, runId) {
const llmName = llm?.name || this.defaultModelName;
console.log("[DOOORCallbackHandler] handleLLMStart called", {
llmName,
promptsCount: prompts.length,
runId,
});
this.currentTraceId = (0, uuid_1.v4)();
this.currentModel = llmName;
this.currentInput = prompts[0] || "";
this.startTime = Date.now();
this.guardsResults = [];
// Session tracking: detect if this is a new conversation or continuation
const isNewConversation = !this.currentInput.includes("AI:") && !this.currentInput.includes("Tool:");
if (isNewConversation || !this.currentSessionId) {
// New conversation - generate new session_id
this.currentSessionId = (0, uuid_1.v4)();
this.traceSequence = 0;
console.log("[DOOORCallbackHandler] đ New session started:", this.currentSessionId);
}
// Note: traceSequence will be incremented when trace is actually saved (in processLLMEnd)
console.log("[DOOORCallbackHandler] đ Session:", this.currentSessionId, "| Current sequence:", this.traceSequence);
// Extract tool output from input (for LangGraph agents)
await this.extractToolOutputFromInput(this.currentInput);
// Run guards
for (const guard of this.guards) {
if (!guard.isEnabled())
continue;
const guardStartTime = Date.now();
try {
const result = await Promise.resolve(guard.validate(this.currentInput));
const guardLatency = Date.now() - guardStartTime;
this.guardsResults.push({
name: guard.name,
result,
latency: guardLatency,
});
// Handle guard failure
if (!result.passed && guard.shouldBlock()) {
const error = new types_1.GuardBlockedException(guard.name, result.reason ?? "Guard blocked request", result.severity ?? "medium", result.metadata);
if (this.guardFailureMode === "throw") {
// Log and throw
this.observability?.logError(error, {
traceId: this.currentTraceId,
input: this.currentInput,
guardName: guard.name,
});
throw error;
}
else if (this.guardFailureMode === "log_only") {
// Just log, don't block
this.observability?.logError(error, {
traceId: this.currentTraceId,
input: this.currentInput,
guardName: guard.name,
mode: "log_only",
});
}
// return_error mode is handled by the wrapper
}
}
catch (error) {
if (error instanceof types_1.GuardBlockedException) {
throw error;
}
// Guard itself failed - log but don't block
console.error(`Guard ${guard.name} failed:`, error);
}
}
}
/**
* Called after LLM completes (LangChain lifecycle hook)
*/
async handleLLMEnd(output, runId, _parentRunId, _tags, _extraParams) {
await this.processLLMEnd(output, runId);
}
/**
* Backwards compatibility with LangChain < 0.3 which calls onLLMEnd
*/
async onLLMEnd(output, runId) {
await this.processLLMEnd(output, runId);
}
async processLLMEnd(output, runId) {
console.log("[DOOORCallbackHandler] handleLLMEnd called", {
hasTraceId: !!this.currentTraceId,
hasInput: !!this.currentInput,
hasStartTime: !!this.startTime,
runId,
});
if (!this.currentTraceId || !this.currentInput || !this.startTime) {
console.log("[DOOORCallbackHandler] Skipping - missing trace data");
return;
}
const totalLatency = Date.now() - this.startTime;
console.log("[DOOORCallbackHandler] Processing trace, latency:", totalLatency);
console.log("[DOOORCallbackHandler] Raw output object keys:", Object.keys(output));
console.log("[DOOORCallbackHandler] Output structure:", JSON.stringify(output, null, 2));
const outputText = this.extractOutputText(output);
console.log("[DOOORCallbackHandler] Extracted output text length:", outputText.length);
const tokens = this.extractTokens(output);
console.log("[DOOORCallbackHandler] Extracted tokens:", tokens);
const modelName = this.currentModel || this.defaultModelName;
console.log("[DOOORCallbackHandler] Model name:", modelName);
// Extract tool calls from LLM output (LangGraph doesn't call tool hooks)
this.extractToolCallsFromOutput(output);
// Run output guards
for (const guard of this.outputGuards) {
if (!guard.isEnabled())
continue;
const guardStartTime = Date.now();
try {
const result = await Promise.resolve(guard.validate(outputText));
const guardLatency = Date.now() - guardStartTime;
this.guardsResults.push({
name: guard.name + " (output)",
result,
latency: guardLatency,
});
if (!result.passed && guard.shouldBlock()) {
console.warn(`Output guard ${guard.name} detected issue:`, result.reason);
// For output guards, we typically just log (can't undo the LLM call)
}
}
catch (error) {
console.error(`Output guard ${guard.name} failed:`, error);
}
}
// Prepare trace data
console.log("[DOOORCallbackHandler] đ Preparing trace, tool calls captured:", this.toolCalls.length);
if (this.toolCalls.length > 0) {
console.log("[DOOORCallbackHandler] Tool calls data:", JSON.stringify(this.toolCalls, null, 2));
}
// Increment trace sequence when actually saving the trace (not just starting)
this.traceSequence++;
console.log("[DOOORCallbackHandler] đ Saving trace #" + this.traceSequence + " for session:", this.currentSessionId);
// Determine trace type based on context
const hasToolCallsInOutput = this.toolCalls.length > 0;
const hasHistoryInInput = this.currentInput?.includes("AI:") || this.currentInput?.includes("Tool:");
const traceType = hasToolCallsInOutput
? "llm_decision"
: hasHistoryInInput
? "llm_response"
: "llm_response"; // Default to response if unclear
const trace = {
traceId: this.currentTraceId,
input: this.currentInput,
output: outputText,
model: modelName,
latency: totalLatency,
tokens: tokens,
// cost removed - will be calculated server-side based on tokens and pricing table
timestamp: new Date(),
guards: this.guardsResults,
toolCalls: this.toolCalls.length > 0 ? this.toolCalls : undefined,
sessionId: this.currentSessionId,
traceSequence: this.traceSequence,
traceType: traceType,
};
console.log("[DOOORCallbackHandler] Trace object created:", {
traceId: trace.traceId,
model: trace.model,
hasTokens: !!trace.tokens,
tokens: trace.tokens,
hasCost: trace.cost !== undefined,
cost: trace.cost,
});
// Log trace FIRST (must complete before evals can reference it via foreign key)
console.log("[DOOORCallbackHandler] Logging trace to observability...");
await this.observability?.logTrace(trace);
console.log("[DOOORCallbackHandler] Trace logged");
this.observability?.logMetric("dooor.llm.latency", totalLatency, {
model: trace.model,
});
// Run evals AFTER trace is saved (to avoid foreign key violations)
const shouldRunEvals = this.shouldRunEvals();
if (shouldRunEvals && this.evals.length > 0) {
if (this.evalMode === "async") {
// Run async without blocking (trace already saved, so foreign key is satisfied)
this.runEvalsAsync(this.currentInput, outputText, { latency: totalLatency }, trace.traceId);
}
else {
// Run sync (blocking)
const evalResults = await this.runEvalsSync(this.currentInput, outputText, { latency: totalLatency });
if (evalResults.length > 0) {
if (this.observability) {
await this.observability.updateTrace(trace.traceId, { evals: evalResults });
}
}
}
}
// Reset state
this.currentTraceId = undefined;
this.currentInput = undefined;
this.startTime = undefined;
this.guardsResults = [];
this.toolCalls = [];
this.currentToolStart = undefined;
}
/**
* Extract tool calls from LLM output (for LangGraph agents where tool hooks aren't called)
*/
extractToolCallsFromOutput(output) {
try {
// Navigate through the output structure to find tool_calls
const generations = output?.generations?.[0];
if (!generations || !Array.isArray(generations)) {
return;
}
for (const generation of generations) {
const message = generation?.message;
const toolCalls = message?.kwargs?.tool_calls || message?.tool_calls;
if (toolCalls && Array.isArray(toolCalls) && toolCalls.length > 0) {
console.log("[DOOORCallbackHandler] đ Found tool_calls in LLM output:", toolCalls.length);
for (const toolCall of toolCalls) {
const toolName = toolCall.name || "unknown_tool";
const toolArgs = toolCall.args || {};
const toolId = toolCall.id || "unknown_id";
console.log("[DOOORCallbackHandler] âī¸ Tool call detected:", toolName, "args:", toolArgs);
// Add to tool calls array (we don't have output yet, will be in next LLM call)
const entry = {
trace_id: this.currentTraceId,
tool_call_id: toolId,
tool_name: toolName,
input: toolArgs,
output: null, // Will be populated if we can extract it
latency_ms: 0, // Unknown from output
started_at: new Date().toISOString(),
ended_at: new Date().toISOString(),
};
this.toolCalls.push(entry);
this.pendingToolCalls.push(entry);
if (entry.trace_id) {
const existing = this.toolCallsByTrace.get(entry.trace_id) ?? [];
existing.push(entry);
this.toolCallsByTrace.set(entry.trace_id, existing);
}
}
}
}
}
catch (error) {
console.error("[DOOORCallbackHandler] Error extracting tool calls from output:", error);
}
}
/**
* Extract tool output from input (for LangGraph agents where tool result is in next LLM input)
*/
async extractToolOutputFromInput(inputText) {
try {
console.log("[DOOORCallbackHandler] đ Attempting to extract tool output from input");
console.log("[DOOORCallbackHandler] Input text sample:", inputText.substring(0, 500));
console.log("[DOOORCallbackHandler] Pending tool calls count:", this.pendingToolCalls.length);
// Look for "Tool: <name>, <output>" pattern in input
// Made more flexible to handle newlines and various JSON formats
const toolPattern = /Tool:\s*(\w+)\s*,\s*(\{[^}]*\}|\{[\s\S]*?\})/g;
const matches = Array.from(inputText.matchAll(toolPattern));
console.log("[DOOORCallbackHandler] Regex matches found:", matches.length);
if (matches.length > 0 && this.pendingToolCalls.length > 0) {
console.log("[DOOORCallbackHandler] đ Found tool outputs in input:", matches.length);
for (const match of matches) {
const toolName = match[1];
const toolOutputStr = match[2];
console.log("[DOOORCallbackHandler] Processing match - Tool:", toolName, "Output:", toolOutputStr.substring(0, 100));
// Find the corresponding pending tool call (last one with matching name and no output)
let toolCallIndex = -1;
for (let i = this.pendingToolCalls.length - 1; i >= 0; i--) {
const pending = this.pendingToolCalls[i];
if (pending.tool_name === toolName && pending.output === null) {
toolCallIndex = i;
break;
}
}
if (toolCallIndex !== -1) {
const toolCall = this.pendingToolCalls[toolCallIndex];
try {
toolCall.output = JSON.parse(toolOutputStr);
toolCall.ended_at = new Date().toISOString();
// Calculate latency if we can
const started = new Date(toolCall.started_at).getTime();
const ended = new Date(toolCall.ended_at).getTime();
toolCall.latency_ms = ended - started;
console.log("[DOOORCallbackHandler] â
Populated output for tool:", toolName, "latency:", toolCall.latency_ms, "ms");
}
catch (e) {
console.log("[DOOORCallbackHandler] Failed to parse JSON, using raw string:", e);
toolCall.output = toolOutputStr;
toolCall.ended_at = new Date().toISOString();
}
// Try to update the original trace
await this.persistToolCallUpdate(toolCall);
// Also include in current trace as fallback (if we're processing a new trace)
if (this.currentTraceId && this.currentTraceId !== toolCall.trace_id) {
// This is a new trace, include the updated tool call
const updatedToolCall = { ...toolCall };
updatedToolCall.trace_id = this.currentTraceId;
this.toolCalls.push(updatedToolCall);
console.log("[DOOORCallbackHandler] đ Added updated tool call to current trace as fallback");
}
this.pendingToolCalls.splice(toolCallIndex, 1);
}
else {
console.log("[DOOORCallbackHandler] â ī¸ No matching tool call found for:", toolName);
}
}
}
else {
if (this.pendingToolCalls.length === 0) {
console.log("[DOOORCallbackHandler] âšī¸ No tool calls to populate");
}
else {
console.log("[DOOORCallbackHandler] â ī¸ Tool pattern not found in input");
}
}
}
catch (error) {
console.error("[DOOORCallbackHandler] Error extracting tool output from input:", error);
}
}
/**
* Called when a tool starts execution (LangChain lifecycle hook)
*/
async handleToolStart(tool, input, runId) {
const toolName = tool?.name || "unknown_tool";
console.log("[DOOORCallbackHandler] âī¸ Tool started:", toolName, "with input:", input);
console.log("[DOOORCallbackHandler] Tool object:", JSON.stringify(tool));
console.log("[DOOORCallbackHandler] RunId:", runId);
this.currentToolStart = {
name: toolName,
input: input,
startTime: Date.now(),
traceId: this.currentTraceId,
};
}
/**
* Backwards compatibility - some versions call onToolStart
*/
async onToolStart(tool, input, runId) {
await this.handleToolStart(tool, input, runId);
}
/**
* Called when a tool finishes execution (LangChain lifecycle hook)
*/
async handleToolEnd(output, runId) {
console.log("[DOOORCallbackHandler] âī¸ Tool ended, output:", output);
console.log("[DOOORCallbackHandler] Current tool start state:", this.currentToolStart);
if (!this.currentToolStart) {
console.log("[DOOORCallbackHandler] â ī¸ Tool ended but no start recorded - this is a bug!");
return;
}
const latency = Date.now() - this.currentToolStart.startTime;
const endTime = new Date().toISOString();
const startTime = new Date(this.currentToolStart.startTime).toISOString();
console.log("[DOOORCallbackHandler] â
Tool completed:", this.currentToolStart.name, "latency:", latency);
const entry = {
trace_id: this.currentToolStart.traceId,
tool_name: this.currentToolStart.name,
input: this.currentToolStart.input,
output: output,
latency_ms: latency,
started_at: startTime,
ended_at: endTime,
};
this.toolCalls.push(entry);
if (entry.trace_id) {
const existing = this.toolCallsByTrace.get(entry.trace_id) ?? [];
existing.push(entry);
this.toolCallsByTrace.set(entry.trace_id, existing);
}
console.log("[DOOORCallbackHandler] đ Total tool calls captured:", this.toolCalls.length);
this.currentToolStart = undefined;
}
/**
* Backwards compatibility - some versions call onToolEnd
*/
async onToolEnd(output, runId) {
await this.handleToolEnd(output, runId);
}
/**
* Called when a tool errors (LangChain lifecycle hook)
*/
async handleToolError(error, runId) {
if (!this.currentToolStart) {
console.log("[DOOORCallbackHandler] Tool errored but no start recorded");
return;
}
const latency = Date.now() - this.currentToolStart.startTime;
const endTime = new Date().toISOString();
const startTime = new Date(this.currentToolStart.startTime).toISOString();
console.log("[DOOORCallbackHandler] Tool errored:", this.currentToolStart.name, error.message);
const entry = {
trace_id: this.currentToolStart.traceId,
tool_name: this.currentToolStart.name,
input: this.currentToolStart.input,
output: null,
latency_ms: latency,
started_at: startTime,
ended_at: endTime,
error: error.message,
};
this.toolCalls.push(entry);
if (entry.trace_id) {
const existing = this.toolCallsByTrace.get(entry.trace_id) ?? [];
existing.push(entry);
this.toolCallsByTrace.set(entry.trace_id, existing);
}
this.currentToolStart = undefined;
}
/**
* Backwards compatibility - some versions call onToolError
*/
async onToolError(error, runId) {
await this.handleToolError(error, runId);
}
/**
* Persist tool call updates to observability backend
*/
async persistToolCallUpdate(toolCall) {
if (!toolCall.trace_id || !this.observability) {
return;
}
try {
const toolCallsForTrace = this.toolCallsByTrace.get(toolCall.trace_id);
await this.observability.updateTrace(toolCall.trace_id, {
toolCalls: toolCallsForTrace ?? [toolCall],
});
const stillPending = this.pendingToolCalls.some((pending) => pending.trace_id === toolCall.trace_id);
if (!stillPending && toolCall.trace_id) {
this.toolCallsByTrace.delete(toolCall.trace_id);
}
}
catch (error) {
console.error("[DOOORCallbackHandler] Failed to persist tool call update:", error);
}
}
/**
* Called on LLM error (LangChain lifecycle hook)
*/
async handleLLMError(error, runId, _parentRunId, _tags, _extraParams) {
this.observability?.logError(error, {
traceId: this.currentTraceId,
input: this.currentInput,
});
}
/**
* Backwards compatibility with LangChain < 0.3 which calls onLLMError
*/
async onLLMError(error, runId) {
await this.handleLLMError(error, runId);
}
/**
* Run evals asynchronously (non-blocking)
*/
async runEvalsAsync(input, output, metadata, traceId) {
// Run in background without blocking
setImmediate(async () => {
try {
console.log("[DOOORCallbackHandler] Starting async evals for trace:", traceId);
const evalResults = await this.runEvalsSync(input, output, metadata);
if (evalResults.length > 0) {
console.log("[DOOORCallbackHandler] Evals completed, updating trace with results:", evalResults.length);
if (this.observability) {
await this.observability.updateTrace(traceId, { evals: evalResults });
}
}
}
catch (error) {
console.error("Error running async evals:", error);
}
});
}
/**
* Run evals synchronously (blocking)
*/
async runEvalsSync(input, output, metadata) {
const evalResults = [];
for (const evalInstance of this.evals) {
if (!evalInstance.isEnabled())
continue;
const evalStartTime = Date.now();
try {
const result = await Promise.resolve(evalInstance.evaluate(input, output, metadata));
const evalLatency = Date.now() - evalStartTime;
evalResults.push({
name: evalInstance.name,
result,
latency: evalLatency,
});
// Log metric
this.observability?.logMetric(`dooor.eval.${evalInstance.name}.score`, result.score, {
passed: result.passed ? "true" : "false",
});
}
catch (error) {
console.error(`Eval ${evalInstance.name} failed:`, error);
}
}
return evalResults;
}
/**
* Determine if evals should run (sampling logic)
*/
shouldRunEvals() {
if (this.evalMode === "sample") {
return Math.random() < this.evalSampleRate;
}
return true;
}
/**
* Extract text output from LangChain response
*/
extractOutputText(output) {
if (typeof output === "string") {
return output;
}
if (output?.generations?.[0]?.[0]?.text) {
return output.generations[0][0].text;
}
if (output?.text) {
return output.text;
}
if (output?.content) {
return output.content;
}
return JSON.stringify(output);
}
/**
* Extract token usage from LangChain response
*/
extractTokens(output) {
console.log("[DOOORCallbackHandler] extractTokens - checking multiple locations...");
const normalize = (usage, source) => {
console.log(`[DOOORCallbackHandler] Trying to normalize tokens from: ${source}`, usage);
if (!usage)
return undefined;
const prompt = usage.prompt_tokens ?? usage.promptTokens ?? usage.input_tokens ?? usage.inputTokens;
const completion = usage.completion_tokens ?? usage.completionTokens ?? usage.output_tokens ?? usage.outputTokens;
const total = usage.total_tokens ?? usage.totalTokens ?? (prompt ?? 0) + (completion ?? 0);
if (prompt == null && completion == null && total == null) {
console.log(`[DOOORCallbackHandler] ${source} - no tokens found`);
return undefined;
}
const result = {
prompt: prompt ?? (total ?? 0) - (completion ?? 0),
completion: completion ?? (total ?? 0) - (prompt ?? 0),
total: total ?? ((prompt ?? 0) + (completion ?? 0)),
};
console.log(`[DOOORCallbackHandler] ${source} - normalized tokens:`, result);
return result;
};
const llmUsage = output?.llmOutput?.tokenUsage;
const normalizedLLMUsage = normalize(llmUsage, "llmOutput.tokenUsage");
if (normalizedLLMUsage) {
console.log("[DOOORCallbackHandler] â
Tokens extracted from llmOutput.tokenUsage");
return normalizedLLMUsage;
}
const directUsage = normalize(output?.usage_metadata || output?.usageMetadata, "usage_metadata");
if (directUsage) {
console.log("[DOOORCallbackHandler] â
Tokens extracted from usage_metadata");
return directUsage;
}
const generationMessage = output?.generations?.[0]?.[0]?.message;
const generationUsage = normalize(generationMessage?.usage_metadata || generationMessage?.usageMetadata, "generations[0][0].message.usage_metadata");
if (generationUsage) {
console.log("[DOOORCallbackHandler] â
Tokens extracted from generation message usage_metadata");
return generationUsage;
}
const responseUsage = normalize(output?.response_metadata?.tokenUsage ||
output?.response_metadata?.usageMetadata ||
generationMessage?.response_metadata?.tokenUsage ||
generationMessage?.response_metadata?.usageMetadata, "response_metadata.tokenUsage");
if (responseUsage) {
console.log("[DOOORCallbackHandler] â
Tokens extracted from response_metadata");
return responseUsage;
}
const genericUsage = normalize(output?.usage, "usage");
if (genericUsage) {
console.log("[DOOORCallbackHandler] â
Tokens extracted from generic usage field");
return genericUsage;
}
console.log("[DOOORCallbackHandler] â No tokens found in any location!");
return undefined;
}
}
exports.DOOORCallbackHandler = DOOORCallbackHandler;