judgeval
Version:
Judgment SDK for TypeScript/JavaScript
444 lines • 25.2 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import { BaseCallbackHandler, } from "@langchain/core/callbacks/base";
import { Tracer, TraceClient } from "../tracer.js";
import * as uuid from "uuid";
// Removed ActiveNodeSpan interface
// Match Python structure more closely
export class JudgevalLanggraphCallbackHandler extends BaseCallbackHandler {
constructor(tracer) {
super();
this.name = "judgeval_langgraph_callback_handler";
this._startTime = 0; // Needed for TS manual duration calculation (if needed)
this.finished = false;
this.rootSpanStarted = false; // Still useful to track root
// Removed activeNodeSpans array
// Attributes for users to access (matching Python)
this.executedNodeTools = [];
this.executedNodes = []; // Add this like Python
this.executedTools = [];
this.tracer = tracer !== null && tracer !== void 0 ? tracer : Tracer.getInstance();
console.log(`[Judgeval Handler] Initialized. Monitoring Enabled: ${this.tracer.enableMonitoring}`);
// traceClient will be initialized in handleChainStart if needed
}
// --- Reset state helper (called internally) ---
initializeRunState(client) {
this.traceClient = client;
this.previousNode = undefined;
this._startTime = 0;
this.finished = false;
this.rootSpanStarted = true; // Mark as started since we just created/found the client
this.executedNodeTools.length = 0;
this.executedNodes.length = 0;
this.executedTools.length = 0;
console.log(`[Judgeval Handler] Run state initialized for TraceClient ID: ${client.traceId}`);
}
// --- Span management (simplified, relies on TraceClient internals) ---
startSpan(name, spanType = "span") {
if (!this.traceClient) {
console.warn(`[Judgeval Handler] startSpan(${name}, ${spanType}) called but traceClient is undefined.`);
return undefined;
}
const parentSpanId = this.traceClient.getCurrentSpanId();
console.log(`[Judgeval Handler] Before startSpan(${name}): Current Span ID = ${parentSpanId}`);
this.traceClient.startSpan(name, { spanType }); // TraceClient handles stack
const newSpanId = this.traceClient.getCurrentSpanId();
console.log(`[Judgeval Handler] Started span: ${name} (ID: ${newSpanId}), Parent reported: ${parentSpanId}, Type: ${spanType}`);
return newSpanId;
}
endSpan(context) {
if (!this.traceClient) {
console.warn(`[Judgeval Handler] endSpan(${context !== null && context !== void 0 ? context : ''}) called but traceClient is undefined.`);
return;
}
const currentSpanId = this.traceClient.getCurrentSpanId();
console.log(`[Judgeval Handler] Before endSpan(${context !== null && context !== void 0 ? context : ''}): Current Span ID = ${currentSpanId}`);
this.traceClient.endSpan(); // TraceClient handles stack
const spanIdAfterEnd = this.traceClient.getCurrentSpanId();
console.log(`[Judgeval Handler] Ended span: ${currentSpanId} (Context: ${context !== null && context !== void 0 ? context : 'N/A'}). Current Span ID after end: ${spanIdAfterEnd}`);
// Save logic: Python saves when depth returns to 0.
// TS TraceClient doesn't expose depth. Rely on external context ending or manual save?
// For now, let's stick to the previous logic: save if root finished and stack is empty.
// The final endSpan call will be triggered by the tracer wrapper ideally.
if (this.rootSpanStarted && this.finished && spanIdAfterEnd === undefined) {
console.log("[Judgeval Handler] Root context likely ended and graph finished. Saving trace.");
if (this.traceClient) {
// Save might need to happen *before* clearing the client
const clientToSave = this.traceClient;
this.traceClient = undefined; // Clear reference first?
this.rootSpanStarted = false;
clientToSave.save(true) // Save with overwrite=true
.then(() => console.log(`[Judgeval Handler] Trace ${clientToSave.traceId} saved.`))
.catch(err => console.error(`[Judgeval Handler] Error saving trace ${clientToSave.traceId}:`, err));
}
else {
console.warn("[Judgeval Handler] Cannot save trace as traceClient is undefined after root span ended.");
}
}
}
// --- Get or Create Client (Python logic adapted) ---
getOrCreateRootTraceClient(name, runType) {
if (!this.tracer.enableMonitoring)
return undefined;
// If a client already exists for this handler instance, return it
if (this.traceClient) {
// console.log(`[Judgeval Handler] Re-using existing TraceClient instance (ID: ${this.traceClient.traceId}).`);
return this.traceClient;
}
// Check if we are starting the root LangGraph run (like Python's check)
const isLangGraphRootStart = (name === 'LangGraph' || runType === 'Graph'); // Check both name and runType
if (isLangGraphRootStart) {
console.log("[Judgeval Handler] LangGraph root run detected. Getting/Creating TraceClient.");
// Check context first, in case external wrapper is used
let client = this.tracer.getCurrentTrace();
if (client) {
console.log(`[Judgeval Handler] Found existing TraceClient in context (ID: ${client.traceId}). Initializing run state.`);
this.initializeRunState(client);
}
else {
// Create a new TraceClient if none exists (like Python)
console.log("[Judgeval Handler] No TraceClient in context, creating new one.");
const traceId = uuid.v4();
client = new TraceClient({
tracer: this.tracer,
traceId: traceId,
name: "LangGraphRun", // Default name
projectName: this.tracer.projectName,
overwrite: false,
rules: this.tracer.defaultRules,
enableMonitoring: this.tracer.enableMonitoring,
enableEvaluations: this.tracer.enableEvaluations,
apiKey: this.tracer.apiKey,
organizationId: this.tracer.organizationId
});
// Save empty trace immediately (like Python)
client.save(false).catch(err => console.error("[Judgeval Handler] Error saving initial empty trace:", err));
this.initializeRunState(client);
// We might need to manually set this client into the async context
// if LangChain doesn't propagate it from the initial invoke context.
// This is complex and might require changes to the Tracer class or usage pattern.
// For now, we store it locally in the handler.
console.log(`[Judgeval Handler] Created new TraceClient (ID: ${client.traceId}). Run state initialized.`);
}
// Start the root span *after* initializing the state
this.startSpan("LangGraphRoot", "Main Function");
return this.traceClient;
}
else {
// If it's not the root start, try to get the client from context (might exist due to wrapper)
let client = this.tracer.getCurrentTrace();
if (client) {
// If we find a client but haven't initialized state, initialize now
if (!this.traceClient) {
console.log(`[Judgeval Handler] Found TraceClient in context mid-run (ID: ${client.traceId}). Initializing run state.`);
this.initializeRunState(client);
}
return this.traceClient;
}
else {
// No client found, and it's not the root start - likely context issue
console.warn(`[Judgeval Handler] handleChainStart called for non-root chain ('${name}') but no TraceClient found in context.`);
return undefined;
}
}
}
// --- Callback Handlers (Adapted to Python logic) ---
handleRetrieverStart(serialized, query, runId, parentRunId, tags, metadata, name, options) {
return __awaiter(this, void 0, void 0, function* () {
// Ensure client exists, but don't create it here
this.traceClient = this.tracer.getCurrentTrace();
if (!this.traceClient) {
console.warn(`[Judgeval Handler] handleRetrieverStart: No TraceClient found.`);
return;
}
let name_ = "RETRIEVER_CALL";
if (name) {
name_ = `RETRIEVER_${name.toUpperCase()}`;
}
else if (serialized === null || serialized === void 0 ? void 0 : serialized.name) {
name_ = `RETRIEVER_${serialized.name.toUpperCase()}`;
}
this.startSpan(name_, "retriever");
this.traceClient.recordInput({
query,
tags,
metadata,
options,
});
});
}
handleRetrieverEnd(documents, runId, parentRunId, tags, options) {
return __awaiter(this, void 0, void 0, function* () {
// Use existing client
if (!this.traceClient || !this.traceClient.getCurrentSpanId())
return;
// ... record output (docSummary)
const docSummary = documents.map((doc, i) => ({
index: i,
page_content: doc.pageContent.length > 100
? doc.pageContent.substring(0, 97) + "..."
: doc.pageContent,
metadata: doc.metadata,
}));
this.traceClient.recordOutput({ document_count: documents.length, documents: docSummary });
this.endSpan("RetrieverEnd");
});
}
handleRetrieverError(error, runId, parentRunId, tags, options) {
return __awaiter(this, void 0, void 0, function* () {
console.error(`[Judgeval Handler] Retriever error: ${error}`);
if (!this.traceClient || !this.traceClient.getCurrentSpanId())
return;
this.traceClient.recordError(error);
this.endSpan("RetrieverError");
});
}
handleChainStart(serialized, inputs, runId, parentRunId, tags, metadata, runType, name, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
console.log(`[Judgeval Handler] handleChainStart: Name=${name}, Type=${runType}, Node=${metadata === null || metadata === void 0 ? void 0 : metadata.langgraph_node}, RunID=${runId}`);
// Initialize client and root span if this is the LangGraph start
// Otherwise, ensure client exists from context
const client = this.getOrCreateRootTraceClient(name, runType);
if (!client)
return; // If no client (monitoring off or context issue), do nothing
// Now handle node detection (if not the root start event itself)
const isLangGraphRootStartEvent = (name === 'LangGraph' || runType === 'Graph') && this.rootSpanStarted;
const nodeName = metadata === null || metadata === void 0 ? void 0 : metadata.langgraph_node;
// Don't process the root start event as a node start
if (isLangGraphRootStartEvent && nodeName === undefined) { // Check nodeName too
console.log(`[Judgeval Handler] Skipping node processing for root start event.`);
return;
}
// If it's a node and different from the previous one
if (nodeName && nodeName !== this.previousNode) {
console.log(`[Judgeval Handler] New node detected: ${nodeName}`);
// Start a span for the node
this.startSpan(nodeName, "node");
// Update tracking (like Python)
this.executedNodes.push(nodeName);
// Update node:tool tracking context
this.previousNode = nodeName;
// Record input for the node span (like Python)
(_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.recordInput({
args: inputs,
kwargs: { tags, metadata, runType, name: nodeName, options }
});
}
else if (nodeName && nodeName === this.previousNode) {
// It's a chain start within the *same* node, log but don't start new node span
console.log(`[Judgeval Handler] Chain start ('${name}') within existing node context ('${nodeName}').`);
}
else {
// It's some other chain, maybe log it?
console.log(`[Judgeval Handler] Generic chain start ('${name}') detected. Node context: '${this.previousNode}'.`);
}
});
}
handleChainEnd(outputs, runId, parentRunId, tags, metadata, // Need metadata
name, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c;
const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId();
const nodeNameFromMetadata = metadata === null || metadata === void 0 ? void 0 : metadata.langgraph_node;
console.log(`[Judgeval Handler] handleChainEnd: RunID=${runId}, Node=${nodeNameFromMetadata}, CurrentSpanID=${currentSpanId}`);
if (!this.traceClient || !currentSpanId) {
console.warn(`[Judgeval Handler] handleChainEnd called but no active span. RunID=${runId}`);
return;
}
// Always record output
this.traceClient.recordOutput(outputs);
// Check if this marks the end of a graph step (node)
const isGraphStepEnd = tags === null || tags === void 0 ? void 0 : tags.some(tag => tag.includes("graph:step"));
if (isGraphStepEnd) {
console.log(`[Judgeval Handler] Graph step end detected for span: ${currentSpanId}. Ending span as 'node'.`);
// Assume the current span *is* the node span
this.endSpan(`NodeEnd: ${(_b = this.previousNode) !== null && _b !== void 0 ? _b : 'unknown'}`); // Use previousNode hint
}
// Check for graph finish signal AFTER potentially ending the node span
const isEndSignal = outputs === "__end__";
const isGraphFinishTag = tags === null || tags === void 0 ? void 0 : tags.some(tag => tag.includes(":graph:finish") || tag.includes(":__end__"));
if (isEndSignal || isGraphFinishTag) {
console.log(`[Judgeval Handler] Graph finished signal detected.`);
this.finished = true;
// End the root span if it's currently active
const currentSpanIdAfterNodeEnd = (_c = this.traceClient) === null || _c === void 0 ? void 0 : _c.getCurrentSpanId();
if (this.rootSpanStarted && !currentSpanIdAfterNodeEnd) {
console.log(`[Judgeval Handler] Graph finished signal and span stack is empty. Save should occur.`);
}
}
});
}
handleChainError(error, runId, parentRunId, tags, metadata, name, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b;
console.error(`[Judgeval Handler] Chain error: ${error}`);
const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId();
console.log(`[Judgeval Handler] ChainError occurred during span: ${currentSpanId}`);
if (!this.traceClient || !currentSpanId)
return;
this.traceClient.recordError(error);
// Check if the error occurred within a node span we are tracking
// Need a robust way to know if the current span is a node span started by us
// For now, assume if it's a graph step error, end the current span as node error
const isGraphStepError = tags === null || tags === void 0 ? void 0 : tags.some(tag => tag.includes("graph:step"));
if (isGraphStepError) {
console.log(`[Judgeval Handler] Ending current span ${currentSpanId} as NodeError due to graph:step tag.`);
this.endSpan(`NodeError: ${(_b = this.previousNode) !== null && _b !== void 0 ? _b : 'unknown'}`);
}
// Don't end generic chain errors otherwise, might pop root span too early.
});
}
handleToolStart(serialized, inputStr, runId, parentRunId, tags, metadata, name, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
const toolName = (_a = name !== null && name !== void 0 ? name : serialized === null || serialized === void 0 ? void 0 : serialized.name) !== null && _a !== void 0 ? _a : "unknown_tool";
console.log(`[Judgeval Handler] handleToolStart: Name=${toolName}, RunID=${runId}`);
// Ensure client exists
this.traceClient = this.tracer.getCurrentTrace();
if (!this.traceClient) {
console.warn(`[Judgeval Handler] handleToolStart: No TraceClient found.`);
return;
}
// this.previousNode should be set by the last handleChainStart for the node
console.log(`[Judgeval Handler] Starting tool span: ${toolName} (Parent Node Hint: ${this.previousNode})`);
this.startSpan(toolName, "tool");
this.executedTools.push(toolName);
const nodeTool = this.previousNode ? `${this.previousNode}:${toolName}` : toolName;
this.executedNodeTools.push(nodeTool);
this.traceClient.recordInput({
args: inputStr,
kwargs: { tags, metadata, options }
});
});
}
handleToolEnd(output, runId, parentRunId, tags, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId();
console.log(`[Judgeval Handler] handleToolEnd: RunID=${runId}, CurrentSpanID=${currentSpanId}`);
if (!this.traceClient || !currentSpanId)
return;
this.traceClient.recordOutput(output);
this.endSpan("ToolEnd"); // End specifically for tool
});
}
handleToolError(error, runId, parentRunId, tags, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
console.error(`[Judgeval Handler] Tool error: ${error}`);
const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId();
if (!this.traceClient || !currentSpanId)
return;
this.traceClient.recordError(error);
this.endSpan("ToolError"); // End specifically for tool error
});
}
// AgentAction / AgentFinish remain no-op for span management
handleAgentAction( /* ... */) {
return __awaiter(this, void 0, void 0, function* () { });
}
handleAgentFinish( /* ... */) {
return __awaiter(this, void 0, void 0, function* () { });
}
handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, name, options) {
return __awaiter(this, void 0, void 0, function* () {
const llmName = name !== null && name !== void 0 ? name : "LLM call";
// Ensure client exists
this.traceClient = this.tracer.getCurrentTrace();
if (!this.traceClient) {
console.warn(`[Judgeval Handler] handleLLMStart: No TraceClient found.`);
return;
}
console.log(`[Judgeval Handler] Starting LLM span: ${llmName}`);
this.startSpan(llmName, "llm");
this.traceClient.recordInput({
args: prompts,
kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata, serialized_id: serialized === null || serialized === void 0 ? void 0 : serialized.id }, options),
});
});
}
handleLLMEnd(output, runId, parentRunId, tags, options) {
return __awaiter(this, void 0, void 0, function* () {
if (!this.traceClient || !this.traceClient.getCurrentSpanId())
return;
// ... extract textOutput ...
let textOutput = "No text output found";
try {
if (output.generations && output.generations.length > 0 && output.generations[0].length > 0) {
const firstGen = output.generations[0][0];
if (firstGen.text) {
textOutput = firstGen.text;
}
else if ('message' in firstGen && firstGen.message && typeof firstGen.message === 'object' && firstGen.message !== null && 'content' in firstGen.message) {
const messageContent = firstGen.message.content;
textOutput = typeof messageContent === 'string'
? messageContent
: JSON.stringify(messageContent);
}
}
else if (output.llmOutput) {
textOutput = JSON.stringify(output.llmOutput);
}
}
catch (e) {
console.error("[Judgeval Handler] Error extracting LLM output text:", e);
textOutput = `Error extracting output: ${e instanceof Error ? e.message : String(e)}`;
}
this.traceClient.recordOutput(textOutput);
console.log(`[Judgeval Handler] Ending LLM span: ${this.traceClient.getCurrentSpanId()}`);
this.endSpan("LLMEnd"); // End specifically for LLM
});
}
handleLLMError(error, runId, parentRunId, tags, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
console.error(`[Judgeval Handler] LLM error: ${error}`);
const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId();
if (!this.traceClient || !currentSpanId)
return;
this.traceClient.recordError(error);
this.endSpan("LLMError"); // End specifically for LLM error
});
}
handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, name, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b;
// Ensure client exists
this.traceClient = this.tracer.getCurrentTrace();
if (!this.traceClient) {
console.warn(`[Judgeval Handler] handleChatModelStart: No TraceClient found.`);
return;
}
let modelName = "LLM call";
const serializedId = (_b = (_a = serialized === null || serialized === void 0 ? void 0 : serialized.id) === null || _a === void 0 ? void 0 : _a.join("::")) !== null && _b !== void 0 ? _b : "";
if (name) {
modelName = name;
}
else if (serializedId.includes("openai")) {
modelName = "OPENAI_API_CALL";
}
else if (serializedId.includes("anthropic")) {
modelName = "ANTHROPIC_API_CALL";
}
else if (serializedId.includes("together")) {
modelName = "TOGETHER_API_CALL";
}
console.log(`[Judgeval Handler] Starting ChatModel span: ${modelName}`);
this.startSpan(modelName, "llm");
this.traceClient.recordInput({
args: JSON.stringify(messages.map(msgList => msgList.map(msg => msg.toDict()))),
kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata, serialized_id: serializedId }, options),
});
// Note: handleLLMEnd will end this span
});
}
}
//# sourceMappingURL=langgraph.js.map