UNPKG

judgeval

Version:

Judgment SDK for TypeScript/JavaScript

judgmentlabs.ai

JudgmentLabs/judgeval-js

444 lines • 25.2 kB

JavaScript

var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import { BaseCallbackHandler, } from "@langchain/core/callbacks/base"; import { Tracer, TraceClient } from "../tracer.js"; import * as uuid from "uuid"; // Removed ActiveNodeSpan interface // Match Python structure more closely export class JudgevalLanggraphCallbackHandler extends BaseCallbackHandler { constructor(tracer) { super(); this.name = "judgeval_langgraph_callback_handler"; this._startTime = 0; // Needed for TS manual duration calculation (if needed) this.finished = false; this.rootSpanStarted = false; // Still useful to track root // Removed activeNodeSpans array // Attributes for users to access (matching Python) this.executedNodeTools = []; this.executedNodes = []; // Add this like Python this.executedTools = []; this.tracer = tracer !== null && tracer !== void 0 ? tracer : Tracer.getInstance(); console.log(`[Judgeval Handler] Initialized. Monitoring Enabled: ${this.tracer.enableMonitoring}`); // traceClient will be initialized in handleChainStart if needed } // --- Reset state helper (called internally) --- initializeRunState(client) { this.traceClient = client; this.previousNode = undefined; this._startTime = 0; this.finished = false; this.rootSpanStarted = true; // Mark as started since we just created/found the client this.executedNodeTools.length = 0; this.executedNodes.length = 0; this.executedTools.length = 0; console.log(`[Judgeval Handler] Run state initialized for TraceClient ID: ${client.traceId}`); } // --- Span management (simplified, relies on TraceClient internals) --- startSpan(name, spanType = "span") { if (!this.traceClient) { console.warn(`[Judgeval Handler] startSpan(${name}, ${spanType}) called but traceClient is undefined.`); return undefined; } const parentSpanId = this.traceClient.getCurrentSpanId(); console.log(`[Judgeval Handler] Before startSpan(${name}): Current Span ID = ${parentSpanId}`); this.traceClient.startSpan(name, { spanType }); // TraceClient handles stack const newSpanId = this.traceClient.getCurrentSpanId(); console.log(`[Judgeval Handler] Started span: ${name} (ID: ${newSpanId}), Parent reported: ${parentSpanId}, Type: ${spanType}`); return newSpanId; } endSpan(context) { if (!this.traceClient) { console.warn(`[Judgeval Handler] endSpan(${context !== null && context !== void 0 ? context : ''}) called but traceClient is undefined.`); return; } const currentSpanId = this.traceClient.getCurrentSpanId(); console.log(`[Judgeval Handler] Before endSpan(${context !== null && context !== void 0 ? context : ''}): Current Span ID = ${currentSpanId}`); this.traceClient.endSpan(); // TraceClient handles stack const spanIdAfterEnd = this.traceClient.getCurrentSpanId(); console.log(`[Judgeval Handler] Ended span: ${currentSpanId} (Context: ${context !== null && context !== void 0 ? context : 'N/A'}). Current Span ID after end: ${spanIdAfterEnd}`); // Save logic: Python saves when depth returns to 0. // TS TraceClient doesn't expose depth. Rely on external context ending or manual save? // For now, let's stick to the previous logic: save if root finished and stack is empty. // The final endSpan call will be triggered by the tracer wrapper ideally. if (this.rootSpanStarted && this.finished && spanIdAfterEnd === undefined) { console.log("[Judgeval Handler] Root context likely ended and graph finished. Saving trace."); if (this.traceClient) { // Save might need to happen *before* clearing the client const clientToSave = this.traceClient; this.traceClient = undefined; // Clear reference first? this.rootSpanStarted = false; clientToSave.save(true) // Save with overwrite=true .then(() => console.log(`[Judgeval Handler] Trace ${clientToSave.traceId} saved.`)) .catch(err => console.error(`[Judgeval Handler] Error saving trace ${clientToSave.traceId}:`, err)); } else { console.warn("[Judgeval Handler] Cannot save trace as traceClient is undefined after root span ended."); } } } // --- Get or Create Client (Python logic adapted) --- getOrCreateRootTraceClient(name, runType) { if (!this.tracer.enableMonitoring) return undefined; // If a client already exists for this handler instance, return it if (this.traceClient) { // console.log(`[Judgeval Handler] Re-using existing TraceClient instance (ID: ${this.traceClient.traceId}).`); return this.traceClient; } // Check if we are starting the root LangGraph run (like Python's check) const isLangGraphRootStart = (name === 'LangGraph' || runType === 'Graph'); // Check both name and runType if (isLangGraphRootStart) { console.log("[Judgeval Handler] LangGraph root run detected. Getting/Creating TraceClient."); // Check context first, in case external wrapper is used let client = this.tracer.getCurrentTrace(); if (client) { console.log(`[Judgeval Handler] Found existing TraceClient in context (ID: ${client.traceId}). Initializing run state.`); this.initializeRunState(client); } else { // Create a new TraceClient if none exists (like Python) console.log("[Judgeval Handler] No TraceClient in context, creating new one."); const traceId = uuid.v4(); client = new TraceClient({ tracer: this.tracer, traceId: traceId, name: "LangGraphRun", // Default name projectName: this.tracer.projectName, overwrite: false, rules: this.tracer.defaultRules, enableMonitoring: this.tracer.enableMonitoring, enableEvaluations: this.tracer.enableEvaluations, apiKey: this.tracer.apiKey, organizationId: this.tracer.organizationId }); // Save empty trace immediately (like Python) client.save(false).catch(err => console.error("[Judgeval Handler] Error saving initial empty trace:", err)); this.initializeRunState(client); // We might need to manually set this client into the async context // if LangChain doesn't propagate it from the initial invoke context. // This is complex and might require changes to the Tracer class or usage pattern. // For now, we store it locally in the handler. console.log(`[Judgeval Handler] Created new TraceClient (ID: ${client.traceId}). Run state initialized.`); } // Start the root span *after* initializing the state this.startSpan("LangGraphRoot", "Main Function"); return this.traceClient; } else { // If it's not the root start, try to get the client from context (might exist due to wrapper) let client = this.tracer.getCurrentTrace(); if (client) { // If we find a client but haven't initialized state, initialize now if (!this.traceClient) { console.log(`[Judgeval Handler] Found TraceClient in context mid-run (ID: ${client.traceId}). Initializing run state.`); this.initializeRunState(client); } return this.traceClient; } else { // No client found, and it's not the root start - likely context issue console.warn(`[Judgeval Handler] handleChainStart called for non-root chain ('${name}') but no TraceClient found in context.`); return undefined; } } } // --- Callback Handlers (Adapted to Python logic) --- handleRetrieverStart(serialized, query, runId, parentRunId, tags, metadata, name, options) { return __awaiter(this, void 0, void 0, function* () { // Ensure client exists, but don't create it here this.traceClient = this.tracer.getCurrentTrace(); if (!this.traceClient) { console.warn(`[Judgeval Handler] handleRetrieverStart: No TraceClient found.`); return; } let name_ = "RETRIEVER_CALL"; if (name) { name_ = `RETRIEVER_${name.toUpperCase()}`; } else if (serialized === null || serialized === void 0 ? void 0 : serialized.name) { name_ = `RETRIEVER_${serialized.name.toUpperCase()}`; } this.startSpan(name_, "retriever"); this.traceClient.recordInput({ query, tags, metadata, options, }); }); } handleRetrieverEnd(documents, runId, parentRunId, tags, options) { return __awaiter(this, void 0, void 0, function* () { // Use existing client if (!this.traceClient || !this.traceClient.getCurrentSpanId()) return; // ... record output (docSummary) const docSummary = documents.map((doc, i) => ({ index: i, page_content: doc.pageContent.length > 100 ? doc.pageContent.substring(0, 97) + "..." : doc.pageContent, metadata: doc.metadata, })); this.traceClient.recordOutput({ document_count: documents.length, documents: docSummary }); this.endSpan("RetrieverEnd"); }); } handleRetrieverError(error, runId, parentRunId, tags, options) { return __awaiter(this, void 0, void 0, function* () { console.error(`[Judgeval Handler] Retriever error: ${error}`); if (!this.traceClient || !this.traceClient.getCurrentSpanId()) return; this.traceClient.recordError(error); this.endSpan("RetrieverError"); }); } handleChainStart(serialized, inputs, runId, parentRunId, tags, metadata, runType, name, options) { return __awaiter(this, void 0, void 0, function* () { var _a; console.log(`[Judgeval Handler] handleChainStart: Name=${name}, Type=${runType}, Node=${metadata === null || metadata === void 0 ? void 0 : metadata.langgraph_node}, RunID=${runId}`); // Initialize client and root span if this is the LangGraph start // Otherwise, ensure client exists from context const client = this.getOrCreateRootTraceClient(name, runType); if (!client) return; // If no client (monitoring off or context issue), do nothing // Now handle node detection (if not the root start event itself) const isLangGraphRootStartEvent = (name === 'LangGraph' || runType === 'Graph') && this.rootSpanStarted; const nodeName = metadata === null || metadata === void 0 ? void 0 : metadata.langgraph_node; // Don't process the root start event as a node start if (isLangGraphRootStartEvent && nodeName === undefined) { // Check nodeName too console.log(`[Judgeval Handler] Skipping node processing for root start event.`); return; } // If it's a node and different from the previous one if (nodeName && nodeName !== this.previousNode) { console.log(`[Judgeval Handler] New node detected: ${nodeName}`); // Start a span for the node this.startSpan(nodeName, "node"); // Update tracking (like Python) this.executedNodes.push(nodeName); // Update node:tool tracking context this.previousNode = nodeName; // Record input for the node span (like Python) (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.recordInput({ args: inputs, kwargs: { tags, metadata, runType, name: nodeName, options } }); } else if (nodeName && nodeName === this.previousNode) { // It's a chain start within the *same* node, log but don't start new node span console.log(`[Judgeval Handler] Chain start ('${name}') within existing node context ('${nodeName}').`); } else { // It's some other chain, maybe log it? console.log(`[Judgeval Handler] Generic chain start ('${name}') detected. Node context: '${this.previousNode}'.`); } }); } handleChainEnd(outputs, runId, parentRunId, tags, metadata, // Need metadata name, options) { return __awaiter(this, void 0, void 0, function* () { var _a, _b, _c; const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId(); const nodeNameFromMetadata = metadata === null || metadata === void 0 ? void 0 : metadata.langgraph_node; console.log(`[Judgeval Handler] handleChainEnd: RunID=${runId}, Node=${nodeNameFromMetadata}, CurrentSpanID=${currentSpanId}`); if (!this.traceClient || !currentSpanId) { console.warn(`[Judgeval Handler] handleChainEnd called but no active span. RunID=${runId}`); return; } // Always record output this.traceClient.recordOutput(outputs); // Check if this marks the end of a graph step (node) const isGraphStepEnd = tags === null || tags === void 0 ? void 0 : tags.some(tag => tag.includes("graph:step")); if (isGraphStepEnd) { console.log(`[Judgeval Handler] Graph step end detected for span: ${currentSpanId}. Ending span as 'node'.`); // Assume the current span *is* the node span this.endSpan(`NodeEnd: ${(_b = this.previousNode) !== null && _b !== void 0 ? _b : 'unknown'}`); // Use previousNode hint } // Check for graph finish signal AFTER potentially ending the node span const isEndSignal = outputs === "__end__"; const isGraphFinishTag = tags === null || tags === void 0 ? void 0 : tags.some(tag => tag.includes(":graph:finish") || tag.includes(":__end__")); if (isEndSignal || isGraphFinishTag) { console.log(`[Judgeval Handler] Graph finished signal detected.`); this.finished = true; // End the root span if it's currently active const currentSpanIdAfterNodeEnd = (_c = this.traceClient) === null || _c === void 0 ? void 0 : _c.getCurrentSpanId(); if (this.rootSpanStarted && !currentSpanIdAfterNodeEnd) { console.log(`[Judgeval Handler] Graph finished signal and span stack is empty. Save should occur.`); } } }); } handleChainError(error, runId, parentRunId, tags, metadata, name, options) { return __awaiter(this, void 0, void 0, function* () { var _a, _b; console.error(`[Judgeval Handler] Chain error: ${error}`); const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId(); console.log(`[Judgeval Handler] ChainError occurred during span: ${currentSpanId}`); if (!this.traceClient || !currentSpanId) return; this.traceClient.recordError(error); // Check if the error occurred within a node span we are tracking // Need a robust way to know if the current span is a node span started by us // For now, assume if it's a graph step error, end the current span as node error const isGraphStepError = tags === null || tags === void 0 ? void 0 : tags.some(tag => tag.includes("graph:step")); if (isGraphStepError) { console.log(`[Judgeval Handler] Ending current span ${currentSpanId} as NodeError due to graph:step tag.`); this.endSpan(`NodeError: ${(_b = this.previousNode) !== null && _b !== void 0 ? _b : 'unknown'}`); } // Don't end generic chain errors otherwise, might pop root span too early. }); } handleToolStart(serialized, inputStr, runId, parentRunId, tags, metadata, name, options) { return __awaiter(this, void 0, void 0, function* () { var _a; const toolName = (_a = name !== null && name !== void 0 ? name : serialized === null || serialized === void 0 ? void 0 : serialized.name) !== null && _a !== void 0 ? _a : "unknown_tool"; console.log(`[Judgeval Handler] handleToolStart: Name=${toolName}, RunID=${runId}`); // Ensure client exists this.traceClient = this.tracer.getCurrentTrace(); if (!this.traceClient) { console.warn(`[Judgeval Handler] handleToolStart: No TraceClient found.`); return; } // this.previousNode should be set by the last handleChainStart for the node console.log(`[Judgeval Handler] Starting tool span: ${toolName} (Parent Node Hint: ${this.previousNode})`); this.startSpan(toolName, "tool"); this.executedTools.push(toolName); const nodeTool = this.previousNode ? `${this.previousNode}:${toolName}` : toolName; this.executedNodeTools.push(nodeTool); this.traceClient.recordInput({ args: inputStr, kwargs: { tags, metadata, options } }); }); } handleToolEnd(output, runId, parentRunId, tags, options) { return __awaiter(this, void 0, void 0, function* () { var _a; const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId(); console.log(`[Judgeval Handler] handleToolEnd: RunID=${runId}, CurrentSpanID=${currentSpanId}`); if (!this.traceClient || !currentSpanId) return; this.traceClient.recordOutput(output); this.endSpan("ToolEnd"); // End specifically for tool }); } handleToolError(error, runId, parentRunId, tags, options) { return __awaiter(this, void 0, void 0, function* () { var _a; console.error(`[Judgeval Handler] Tool error: ${error}`); const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId(); if (!this.traceClient || !currentSpanId) return; this.traceClient.recordError(error); this.endSpan("ToolError"); // End specifically for tool error }); } // AgentAction / AgentFinish remain no-op for span management handleAgentAction( /* ... */) { return __awaiter(this, void 0, void 0, function* () { }); } handleAgentFinish( /* ... */) { return __awaiter(this, void 0, void 0, function* () { }); } handleLLMStart(serialized, prompts, runId, parentRunId, extraParams, tags, metadata, name, options) { return __awaiter(this, void 0, void 0, function* () { const llmName = name !== null && name !== void 0 ? name : "LLM call"; // Ensure client exists this.traceClient = this.tracer.getCurrentTrace(); if (!this.traceClient) { console.warn(`[Judgeval Handler] handleLLMStart: No TraceClient found.`); return; } console.log(`[Judgeval Handler] Starting LLM span: ${llmName}`); this.startSpan(llmName, "llm"); this.traceClient.recordInput({ args: prompts, kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata, serialized_id: serialized === null || serialized === void 0 ? void 0 : serialized.id }, options), }); }); } handleLLMEnd(output, runId, parentRunId, tags, options) { return __awaiter(this, void 0, void 0, function* () { if (!this.traceClient || !this.traceClient.getCurrentSpanId()) return; // ... extract textOutput ... let textOutput = "No text output found"; try { if (output.generations && output.generations.length > 0 && output.generations[0].length > 0) { const firstGen = output.generations[0][0]; if (firstGen.text) { textOutput = firstGen.text; } else if ('message' in firstGen && firstGen.message && typeof firstGen.message === 'object' && firstGen.message !== null && 'content' in firstGen.message) { const messageContent = firstGen.message.content; textOutput = typeof messageContent === 'string' ? messageContent : JSON.stringify(messageContent); } } else if (output.llmOutput) { textOutput = JSON.stringify(output.llmOutput); } } catch (e) { console.error("[Judgeval Handler] Error extracting LLM output text:", e); textOutput = `Error extracting output: ${e instanceof Error ? e.message : String(e)}`; } this.traceClient.recordOutput(textOutput); console.log(`[Judgeval Handler] Ending LLM span: ${this.traceClient.getCurrentSpanId()}`); this.endSpan("LLMEnd"); // End specifically for LLM }); } handleLLMError(error, runId, parentRunId, tags, options) { return __awaiter(this, void 0, void 0, function* () { var _a; console.error(`[Judgeval Handler] LLM error: ${error}`); const currentSpanId = (_a = this.traceClient) === null || _a === void 0 ? void 0 : _a.getCurrentSpanId(); if (!this.traceClient || !currentSpanId) return; this.traceClient.recordError(error); this.endSpan("LLMError"); // End specifically for LLM error }); } handleChatModelStart(serialized, messages, runId, parentRunId, extraParams, tags, metadata, name, options) { return __awaiter(this, void 0, void 0, function* () { var _a, _b; // Ensure client exists this.traceClient = this.tracer.getCurrentTrace(); if (!this.traceClient) { console.warn(`[Judgeval Handler] handleChatModelStart: No TraceClient found.`); return; } let modelName = "LLM call"; const serializedId = (_b = (_a = serialized === null || serialized === void 0 ? void 0 : serialized.id) === null || _a === void 0 ? void 0 : _a.join("::")) !== null && _b !== void 0 ? _b : ""; if (name) { modelName = name; } else if (serializedId.includes("openai")) { modelName = "OPENAI_API_CALL"; } else if (serializedId.includes("anthropic")) { modelName = "ANTHROPIC_API_CALL"; } else if (serializedId.includes("together")) { modelName = "TOGETHER_API_CALL"; } console.log(`[Judgeval Handler] Starting ChatModel span: ${modelName}`); this.startSpan(modelName, "llm"); this.traceClient.recordInput({ args: JSON.stringify(messages.map(msgList => msgList.map(msg => msg.toDict()))), kwargs: Object.assign({ extra_params: extraParams, tags: tags, metadata: metadata, serialized_id: serializedId }, options), }); // Note: handleLLMEnd will end this span }); } } //# sourceMappingURL=langgraph.js.map