UNPKG

judgeval

Version:

Judgment SDK for TypeScript/JavaScript

judgmentlabs.ai

JudgmentLabs/judgeval-js

969 lines • 58.3 kB

JavaScript

var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; // Core Node.js imports import { v4 as uuidv4 } from 'uuid'; import { AsyncLocalStorage } from 'async_hooks'; // Installed SDKs import OpenAI from 'openai'; import Anthropic from '@anthropic-ai/sdk'; // Local Imports import { JUDGMENT_TRACES_SAVE_API_URL, JUDGMENT_TRACES_FETCH_API_URL, JUDGMENT_TRACES_DELETE_API_URL, JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, // Add other necessary constants if needed } from '../constants.js'; import { APIJudgmentScorer } from '../scorers/base-scorer.js'; import logger from './logger-instance.js'; // Use the shared winston logger instance // On their own, span() and trace() are fully synchronous // The issue is with asynchronous functions // To address this, I made a wrapper for async functions that'll handle all the context stuff class TraceClientContext { constructor() { this.entries = []; this.entryStack = []; } } const traceClientContextAsyncLocalStorage = new AsyncLocalStorage(); // We could use .enterWith(), but the documentation advises against it let rootTraceClientContext = new TraceClientContext(); function getTraceClientContext() { var _a; return (_a = traceClientContextAsyncLocalStorage.getStore()) !== null && _a !== void 0 ? _a : rootTraceClientContext; } function asyncFunctionWrapper(func) { return (...args) => __awaiter(this, void 0, void 0, function* () { const parentTraceClientContext = getTraceClientContext(); const traceClientContext = new TraceClientContext(); const lastEntry = parentTraceClientContext.entryStack.at(-1); if (lastEntry) { traceClientContext.entryStack.push(lastEntry); } const result = yield traceClientContextAsyncLocalStorage .run(traceClientContext, () => func(...args)); if (traceClientContext.entries.length > parentTraceClientContext.entries.length) { [traceClientContext.entries, parentTraceClientContext.entries] = [parentTraceClientContext.entries, traceClientContext.entries]; } parentTraceClientContext.entries.push(...traceClientContext.entries); return result; }); } // --- API Interaction Client --- /** * Client for interacting with Judgment trace API endpoints. */ class TraceManagerClient { constructor(apiKey, organizationId) { if (!apiKey) { throw new Error("TraceManagerClient requires a Judgment API key."); } if (!organizationId) { throw new Error("TraceManagerClient requires a Judgment Organization ID."); } this.apiKey = apiKey; this.organizationId = organizationId; } _fetch(url_1) { return __awaiter(this, arguments, void 0, function* (url, options = {}) { const headers = Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}`, 'X-Organization-Id': this.organizationId }, (options.headers || {})); try { // Use isomorphic fetch (available globally in modern Node.js and browsers) const response = yield fetch(url, Object.assign(Object.assign({}, options), { headers: headers })); // We will return the response object even if !response.ok // The caller (e.g., saveTrace) is responsible for checking response.ok or response.status // Handle cases where the response might be empty (e.g., 204 No Content on DELETE) if (response.status === 204) { return null; // Indicate success with no content } return response; } catch (error) { console.error(`Network or fetch error during ${options.method || 'GET'} ${url}:`, error); // Re-throw or handle as appropriate for the application context throw error; } }); } fetchTrace(traceId) { return __awaiter(this, void 0, void 0, function* () { return this._fetch(JUDGMENT_TRACES_FETCH_API_URL, { method: 'POST', body: JSON.stringify({ trace_id: traceId }), }); }); } saveTrace(traceData) { return __awaiter(this, void 0, void 0, function* () { // _fetch now returns the raw response object or throws on network error const response = yield this._fetch(JUDGMENT_TRACES_SAVE_API_URL, { method: 'POST', body: JSON.stringify(traceData), // Stringify directly here again }); // Check if _fetch threw a network error (caught below) or returned an invalid object if (!response) { // This case should ideally be caught by _fetch's catch block, but double-check throw new Error('Failed to save trace data: No response received from API.'); } // Now, check the status code on the received response object if (response.status === 400) { // Attempt to get error body for more info const errorBody = yield response.text(); throw new Error(`Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: ${response.status} ${response.statusText || ''} - ${errorBody}`); } else if (!response.ok) { // Handles other errors (5xx, 4xx except 400) const errorBody = yield response.text(); throw new Error(`Failed to save trace data: Status ${response.status} ${response.statusText || '(No status text)'} - ${errorBody}`); } // --- Success Path --- // Optionally log the UI URL (needs JSON parsing) let responseData = null; try { // Handle 204 No Content specifically if (response.status === 204) { responseData = null; // Or maybe { success: true }? } else { responseData = yield response.json(); // Parse JSON only on success } } catch (parseError) { logger.warn("Failed to parse successful API response JSON.", { error: parseError }); // Depending on requirements, maybe throw, maybe return a default success object throw new Error(`API request succeeded (${response.status}), but failed to parse JSON response.`); } if (responseData === null || responseData === void 0 ? void 0 : responseData.ui_results_url) { console.info(` 🔍 View trace: ${responseData.ui_results_url} `); } // Return the parsed data (or null for 204) return responseData; }); } deleteTrace(traceId) { return __awaiter(this, void 0, void 0, function* () { // Assuming DELETE method is correct based on REST principles for the delete endpoint return this._fetch(JUDGMENT_TRACES_DELETE_API_URL, { method: 'DELETE', body: JSON.stringify({ trace_ids: [traceId] }), }); }); } deleteTraces(traceIds) { return __awaiter(this, void 0, void 0, function* () { return this._fetch(JUDGMENT_TRACES_DELETE_API_URL, { method: 'DELETE', body: JSON.stringify({ trace_ids: traceIds }), }); }); } /** * Calculate token costs directly using the API endpoint. * This is more accurate than client-side calculation as it uses the most up-to-date pricing. * * @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229') * @param promptTokens Number of tokens in the prompt/input * @param completionTokens Number of tokens in the completion/output * @returns Object containing token counts and calculated costs in USD */ calculateTokenCosts(model, promptTokens, completionTokens) { return __awaiter(this, void 0, void 0, function* () { try { // Use the new calculation endpoint const response = yield this._fetch(JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, { method: 'POST', body: JSON.stringify({ model, prompt_tokens: promptTokens, completion_tokens: completionTokens }) }); // Check if the response is okay and parse JSON if (response && response.ok) { const data = yield response.json(); return data; } else if (response) { // Log error if response was not ok const errorBody = yield response.text(); logger.warn(`API error calculating token costs for model ${model}: ${response.status} ${response.statusText}`, { errorBody }); return null; } else { // Handle cases where _fetch might return null or undefined (though it shouldn't with current implementation) logger.warn(`No response received when calculating token costs for model ${model}.`); return null; } } catch (error) { logger.warn(`Failed to calculate token costs for model ${model}.`, { error: error instanceof Error ? error.message : String(error) }); return null; } }); } } // --- Helper Functions --- // Helper function to sanitize names (e.g., replace spaces with underscores) function sanitizeName(name) { // Replace spaces with underscores and remove potentially problematic characters // You can adjust the regex further if other characters cause issues. return name.replace(/\s+/g, '_').replace(/[^a-zA-Z0-9_.-]/g, ''); } // --- Core Trace Classes --- /** * Represents an ongoing trace context. */ class TraceClient { constructor(config) { var _a, _b, _c, _d, _e; this.traceManager = null; // Made public for wrap access this._spanDepths = {}; // Track depth of active spans this.pendingEvaluationRuns = []; // <-- ADDED: Store pending evaluations this.traceId = config.traceId || uuidv4(); this.originalName = config.name || 'default_trace'; // Store original this.name = sanitizeName(this.originalName); // Use sanitized name internally // If the sanitized name is empty, fallback to a default if (!this.name) { console.warn(`Original trace name "${this.originalName}" sanitized to empty string. Using default_trace_${this.traceId.substring(0, 8)}.`); this.name = `default_trace_${this.traceId.substring(0, 8)}`; } this.projectName = (_a = config.projectName) !== null && _a !== void 0 ? _a : config.tracer.projectName; this.overwrite = (_b = config.overwrite) !== null && _b !== void 0 ? _b : false; this.rules = (_c = config.rules) !== null && _c !== void 0 ? _c : []; // Determine effective monitoring status based on tracer and API keys let effectiveMonitoring = (_d = config.enableMonitoring) !== null && _d !== void 0 ? _d : config.tracer.enableMonitoring; if (effectiveMonitoring && (!config.apiKey || !config.organizationId)) { console.warn(`TraceClient ${this.traceId}: Monitoring requires JUDGMENT_API_KEY and JUDGMENT_ORG_ID. Disabling monitoring for this trace.`); effectiveMonitoring = false; } this.enableMonitoring = effectiveMonitoring; // Evaluations depend on monitoring this.enableEvaluations = effectiveMonitoring && ((_e = config.enableEvaluations) !== null && _e !== void 0 ? _e : config.tracer.enableEvaluations); this.parentTraceId = config.parentTraceId; this.parentName = config.parentName; this.apiKey = config.apiKey; this.organizationId = config.organizationId; this.startTime = Date.now() / 1000; if (this.enableMonitoring) { this.traceManager = new TraceManagerClient(this.apiKey, this.organizationId); } } addEntry(entry) { const traceClientContext = getTraceClientContext(); if (this.enableMonitoring) { traceClientContext.entries.push(entry); } } recordInput(inputs) { const traceClientContext = getTraceClientContext(); const currentEntry = traceClientContext.entryStack.at(-1); if (!currentEntry || !currentEntry.span_id) { console.warn(`No current entry to record input to\nStack trace: ${new Error().stack}`); return; } this.addEntry({ type: 'input', span_id: currentEntry.span_id, inputs, function: currentEntry.function, depth: this._spanDepths[currentEntry.span_id], created_at: Date.now() / 1000, span_type: currentEntry.span_type, message: `Inputs to ${currentEntry.function}` }); } recordOutput(output) { const traceClientContext = getTraceClientContext(); const currentEntry = traceClientContext.entryStack.at(-1); if (!currentEntry || !currentEntry.span_id) { console.warn(`No current entry to record output to\nStack trace: ${new Error().stack}`); return; } this.addEntry({ type: 'output', span_id: currentEntry.span_id, output, function: currentEntry.function, depth: this._spanDepths[currentEntry.span_id], created_at: Date.now() / 1000, span_type: currentEntry.span_type, message: `Output from ${currentEntry.function}` }); } recordError(error) { const traceClientContext = getTraceClientContext(); const currentEntry = traceClientContext.entryStack.at(-1); if (!currentEntry || !currentEntry.span_id) { console.warn(`No current entry to record error to\nStack trace: ${new Error().stack}`); return; } this.addEntry({ type: 'error', span_id: currentEntry.span_id, output: error, function: currentEntry.function, depth: this._spanDepths[currentEntry.span_id], created_at: Date.now() / 1000, span_type: currentEntry.span_type, message: `Error from ${currentEntry.function}` }); } startSpan(name, options = {}) { var _a; const traceClientContext = getTraceClientContext(); const parentEntry = traceClientContext.entryStack.at(-1); const spanId = uuidv4(); const spanType = (_a = options.spanType) !== null && _a !== void 0 ? _a : 'span'; const startTime = Date.now() / 1000; let depth = 0, parentSpanId = undefined; if (parentEntry && parentEntry.span_id) { depth = this._spanDepths[parentEntry.span_id] + 1; parentSpanId = parentEntry.span_id; } this._spanDepths[spanId] = depth; const entry = { type: 'enter', function: name, span_id: spanId, depth: depth, created_at: startTime, span_type: spanType, parent_span_id: parentSpanId, message: name }; this.addEntry(entry); traceClientContext.entryStack.push(entry); } endSpan() { const traceClientContext = getTraceClientContext(); const enterEntry = traceClientContext.entryStack.pop(); if (!enterEntry || !enterEntry.span_id) { console.warn("No enter entry to end"); return; } const endTime = Date.now() / 1000; const duration = endTime - enterEntry.created_at; this.addEntry({ type: 'exit', function: enterEntry.function, span_id: enterEntry.span_id, depth: this._spanDepths[enterEntry.span_id], created_at: endTime, duration: duration, span_type: enterEntry.span_type, message: `← ${enterEntry.function}` }); // Clean up depth tracking delete this._spanDepths[enterEntry.span_id]; } *span(name, options = {}) { if (!this.enableMonitoring) { yield this; } else { this.startSpan(name, options); yield this; this.endSpan(); } } /** * Retrieves the ID of the currently active span in this trace context. * Relies on AsyncLocalStorage context established by observe/span. * @returns {string | undefined} The ID of the current span, or undefined if none is active. */ getCurrentSpanId() { const traceClientContext = getTraceClientContext(); // Internal function using AsyncLocalStorage if (!traceClientContext) { // Should ideally not happen if called within an observe/trace context console.warn("[Judgeval] getCurrentSpanId called outside of an active trace context."); return undefined; } const currentEntry = traceClientContext.entryStack.at(-1); // Get the latest 'enter' entry if (!currentEntry) { // This might happen if called right at the start of a trace before the first span // console.warn("[Judgeval] getCurrentSpanId called but span stack is empty."); return undefined; } return currentEntry.span_id; } getDuration() { return (Date.now() / 1000) - this.startTime; } condenseTrace(rawEntries) { var _a, _b, _c, _d, _e; const spansById = {}; for (const entry of rawEntries) { const spanId = entry.span_id; if (!spanId) continue; if (!spansById[spanId]) { spansById[spanId] = { span_id: spanId, function: entry.function || 'unknown', depth: (_a = entry.depth) !== null && _a !== void 0 ? _a : 0, created_at: new Date(((_b = entry.created_at) !== null && _b !== void 0 ? _b : 0) * 1000).toISOString(), // Convert number to ISO string trace_id: this.traceId, // Add trace_id parent_span_id: entry.parent_span_id, span_type: entry.span_type || 'span', inputs: null, output: null, duration: null, children: [] }; } const currentSpanData = spansById[spanId]; switch (entry.type) { case 'enter': currentSpanData.function = entry.function || currentSpanData.function; currentSpanData.depth = (_c = entry.depth) !== null && _c !== void 0 ? _c : currentSpanData.depth; currentSpanData.created_at = new Date(((_d = entry.created_at) !== null && _d !== void 0 ? _d : 0) * 1000).toISOString(); // Ensure created_at is string on update currentSpanData.parent_span_id = entry.parent_span_id; currentSpanData.span_type = entry.span_type || currentSpanData.span_type; currentSpanData.start_time = entry.created_at; // Keep original number for duration calc break; case 'exit': currentSpanData.duration = (_e = entry.duration) !== null && _e !== void 0 ? _e : currentSpanData.duration; currentSpanData.end_time = entry.created_at; // Keep original number for duration calc if (currentSpanData.duration === null && currentSpanData.start_time && currentSpanData.end_time) { currentSpanData.duration = currentSpanData.end_time - currentSpanData.start_time; } break; case 'input': if (currentSpanData.inputs === null && entry.inputs) { currentSpanData.inputs = entry.inputs; } else if (typeof currentSpanData.inputs === 'object' && typeof entry.inputs === 'object') { currentSpanData.inputs = Object.assign(Object.assign({}, currentSpanData.inputs), entry.inputs); } break; case 'output': case 'error': currentSpanData.output = entry.output; break; } } const spansList = Object.values(spansById).map(span => { if (span.duration === null && span.start_time && span.end_time) { span.duration = span.end_time - span.start_time; } delete span.start_time; delete span.end_time; return span; }); const childrenMap = {}; const roots = []; const spanMap = {}; const sortedCondensedList = []; const visited = new Set(); for (const span of spansList) { spanMap[span.span_id] = span; const parentId = span.parent_span_id; if (parentId === undefined || parentId === null) { roots.push(span); } else { if (!childrenMap[parentId]) { childrenMap[parentId] = []; } childrenMap[parentId].push(span); } } // Sort using parsed dates roots.sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at)); for (const parentId in childrenMap) { // Sort using parsed dates childrenMap[parentId].sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at)); } function buildFlatListDfs(span) { if (visited.has(span.span_id)) return; visited.add(span.span_id); sortedCondensedList.push(span); const children = childrenMap[span.span_id] || []; for (const child of children) { buildFlatListDfs(child); } } for (const rootSpan of roots) { buildFlatListDfs(rootSpan); } for (const span of spansList) { if (!visited.has(span.span_id)) { console.warn(`Orphaned span detected: ${span.span_id}, adding to end of list.`); buildFlatListDfs(span); } } return sortedCondensedList; } save() { return __awaiter(this, arguments, void 0, function* (emptySave = false) { var _a; // If monitoring is disabled or trace hasn't started, don't save if (!this.enableMonitoring || this.startTime === -1 || !this.traceManager) { logger.info(`[TraceClient ${this.traceId}] Monitoring disabled or trace not started. Not saving.`); return null; } const endTime = Date.now() / 1000; // Current time in seconds const duration = endTime - this.startTime; const condensedEntries = this.condenseTrace(getTraceClientContext().entries); // Calculate token counts and costs const tokenCounts = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0, prompt_tokens_cost_usd: 0.0, completion_tokens_cost_usd: 0.0, total_cost_usd: 0.0 }; // Use a Set to avoid double-counting tokens from nested calls if structure is complex // Note: Assuming span_ids are unique across the trace const processedSpanIds = new Set(); for (const entry of condensedEntries) { if (processedSpanIds.has(entry.span_id)) continue; processedSpanIds.add(entry.span_id); if (entry.span_type === 'llm' && ((_a = entry.output) === null || _a === void 0 ? void 0 : _a.usage)) { const usage = entry.output.usage; // Sum tokens tokenCounts.prompt_tokens += usage.prompt_tokens || usage.input_tokens || 0; tokenCounts.completion_tokens += usage.completion_tokens || usage.output_tokens || 0; tokenCounts.total_tokens += usage.total_tokens || ((usage.prompt_tokens || usage.input_tokens || 0) + (usage.completion_tokens || usage.output_tokens || 0)); // Sum costs embedded in the usage object (if they exist) if (usage.prompt_tokens_cost_usd !== undefined) { tokenCounts.prompt_tokens_cost_usd += usage.prompt_tokens_cost_usd; } if (usage.completion_tokens_cost_usd !== undefined) { tokenCounts.completion_tokens_cost_usd += usage.completion_tokens_cost_usd; } if (usage.total_cost_usd !== undefined) { tokenCounts.total_cost_usd += usage.total_cost_usd; } } } // --- Retrieve and include pending evaluation runs --- const evaluationRunsToSave = [...this.pendingEvaluationRuns]; // ---------------------------------------------------- const traceData = { trace_id: this.traceId, name: this.name, // Use the sanitized name project_name: this.projectName, created_at: new Date(this.startTime * 1000).toISOString(), // Convert start time to ISO string duration: duration < 0 ? 0 : duration, token_counts: tokenCounts, entries: condensedEntries, // Send the potentially nested structure from condenseTrace evaluation_runs: evaluationRunsToSave, // <-- ADDED: Include collected evaluations overwrite: this.overwrite, parent_trace_id: this.parentTraceId, parent_name: this.parentName, }; // <<< ADD LOGGING HERE >>> logger.info(`[TraceClient ${this.traceId}] Payload to be saved:`, JSON.stringify(traceData, null, 2)); // <<< ADD SPECIFIC LOGGING FOR EVALUATION RUNS (using INFO level) >>> logger.info(`[TraceClient ${this.traceId}] Evaluation runs included in payload:`, JSON.stringify(traceData.evaluation_runs, null, 2)); // <<< END SPECIFIC LOGGING >>> if (emptySave) { // Skip actual saving if emptySave is true (used for context management in generators) logger.info(`[TraceClient ${this.traceId}] emptySave=true, skipping actual save call.`); return { traceId: this.traceId, traceData }; } try { logger.info(`[TraceClient ${this.traceId}] Calling traceManager.saveTrace...`); const response = yield this.traceManager.saveTrace(traceData); logger.info(`[TraceClient ${this.traceId}] Trace saved successfully. Response:`, response); // Reset trace context after successful save getTraceClientContext().entries = []; getTraceClientContext().entryStack = []; this.pendingEvaluationRuns = []; // <-- ADDED: Clear pending evaluations this.startTime = -1; // Reset start time return { traceId: this.traceId, traceData: traceData }; // Return payload on success } catch (error) { logger.error(`[TraceClient ${this.traceId}] Error saving trace:`, error); // Optionally reset context even on error? // getTraceClientContext().entries = []; // getTraceClientContext().entryStack = []; // this.startTime = -1; return null; // Indicate save failure } }); } print() { if (!this.enableMonitoring) { // Keep console.log for direct user output when print() is called console.log("Monitoring was disabled. No trace entries recorded."); return; } const traceClientContext = getTraceClientContext(); if (traceClientContext.entries.length === 0) { // Keep console.log for direct user output when print() is called console.log("No trace entries recorded."); return; } // Keep console.log for direct user output when print() is called console.log(`\n--- Trace Details: ${this.name} (ID: ${this.traceId}) ---`); traceClientContext.entries.forEach(entry => { var _a; const indent = " ".repeat((_a = entry.depth) !== null && _a !== void 0 ? _a : 0); const timeStr = entry.created_at ? `@ ${new Date(entry.created_at * 1000).toISOString()}` : ''; const shortSpanId = entry.span_id ? `(id: ${entry.span_id.substring(0, 8)}...)` : ''; const shortParentId = entry.parent_span_id ? `(parent: ${entry.parent_span_id.substring(0, 8)}...)` : ''; try { switch (entry.type) { case 'enter': console.log(`${indent}→ ${entry.function || 'unknown'} ${shortSpanId} ${shortParentId} [${entry.span_type || 'span'}] ${timeStr}`); break; case 'exit': const durationStr = entry.duration !== undefined ? `(${entry.duration.toFixed(3)}s)` : ''; // Keep console.log console.log(`${indent}← ${entry.function || 'unknown'} ${shortSpanId} ${durationStr} ${timeStr}`); break; case 'input': let inputStr = JSON.stringify(entry.inputs); if (inputStr && inputStr.length > 200) { inputStr = inputStr.substring(0, 197) + '...'; } // Keep console.log console.log(`${indent} Input (for ${shortSpanId}): ${inputStr || '{}'}`); break; case 'output': case 'error': let outputStr = JSON.stringify(entry.output); if (outputStr && outputStr.length > 200) { outputStr = outputStr.substring(0, 197) + '...'; } const prefix = entry.type === 'error' ? 'Error' : 'Output'; // Keep console.log console.log(`${indent} ${prefix} (for ${shortSpanId}): ${outputStr || 'null'}`); break; } } catch (stringifyError) { const errorMessage = stringifyError instanceof Error ? stringifyError.message : String(stringifyError); // Keep console.log console.log(`${indent}! Error formatting entry: ${errorMessage}`); console.log(`${indent} Raw entry:`, entry); } }); // Keep console.log console.log(`--- End Trace: ${this.name} ---`); } delete() { return __awaiter(this, void 0, void 0, function* () { if (!this.enableMonitoring || !this.traceManager) { logger.warn(`Cannot delete trace ${this.traceId}, monitoring disabled or manager missing.`); return null; } try { const result = yield this.traceManager.deleteTrace(this.traceId); logger.info(`Trace ${this.traceId} deleted successfully.`); return result; } catch (error) { logger.error(`Failed to delete trace ${this.traceId}.`, { error: error instanceof Error ? error.message : String(error) }); throw error; // Re-throw after logging } }); } /** * Asynchronously evaluate an example using the provided scorers, * embedding the evaluation request into the trace data. * Ported from the Python SDK's async_evaluate method. * * @param scorers Array of scorers to use for evaluation * @param options Evaluation options including input, outputs, and metadata * @returns Promise that resolves when the evaluation entry has been added to the trace */ asyncEvaluate(scorers_1) { return __awaiter(this, arguments, void 0, function* (scorers, options = {}) { if (!this.enableEvaluations) { logger.warn("Evaluations are disabled. Skipping async evaluation."); return; } if (!scorers || scorers.length === 0) { logger.warn("No scorers provided. Skipping async evaluation."); return; } // Filter for APIJudgmentScorers as the backend evaluation needs them const apiScorers = scorers.filter((s) => s instanceof APIJudgmentScorer); if (apiScorers.length === 0) { logger.warn("No APIJudgmentScorers found in the provided scorers list. Skipping async evaluation as backend requires API scorers."); return; } // Process rules (currently just using this.rules directly) const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules if (loadedRules && loadedRules.length > 0 && scorers.some(s => !(s instanceof APIJudgmentScorer))) { throw new Error("Cannot use Judgeval scorers, you can only use API scorers when using rules. Please either remove rules or use only APIJudgmentScorer types."); } // Create example structure matching Python/backend expectations const example = { input: options.input || "", actual_output: options.actualOutput || "", expected_output: options.expectedOutput || "", context: options.context || [], retrieval_context: options.retrievalContext || [], tools_called: options.toolsCalled || [], expected_tools: options.expectedTools || [], additional_metadata: options.additionalMetadata || {}, trace_id: this.traceId }; try { const traceClientContext = getTraceClientContext(); const currentEntry = traceClientContext.entryStack.at(-1); if (!currentEntry) { logger.warn(`No current entry to associate evaluation with\\nStack trace: ${new Error().stack}`); return; } const currentSpanId = currentEntry.span_id; // Get the span ID // --- Create evaluation run name (similar to Python) --- // Capitalize scorer names const scorerNames = apiScorers.map(scorer => { var _a; // Attempt to get score_type, fallback to class name or Unknown const name = (scorer === null || scorer === void 0 ? void 0 : scorer.scoreType) || ((_a = scorer === null || scorer === void 0 ? void 0 : scorer.constructor) === null || _a === void 0 ? void 0 : _a.name) || "Unknown"; return name.charAt(0).toUpperCase() + name.slice(1); }).join(','); // Use trace name and shortened span ID (or trace ID if no span) const idPart = currentSpanId ? currentSpanId.substring(0, 8) : this.traceId.substring(0, 8); const evalName = `${this.name.charAt(0).toUpperCase() + this.name.slice(1)}-${idPart}-[${scorerNames}]`; // --- End eval name creation --- // Construct the evaluation payload const evalRunPayload = { organization_id: this.organizationId, log_results: options.logResults !== false, // Default to true project_name: this.projectName, eval_name: evalName, examples: [example], scorers: apiScorers, // Use the filtered list of API scorers model: options.model || "", metadata: {}, // Matches Python tracer judgment_api_key: this.apiKey, override: this.overwrite, // Use trace's overwrite setting rules: loadedRules, // Pass the processed rules trace_span_id: currentSpanId // <<< RENAMED: Assign the current span ID (matching backend) }; // --- Log the payload before storing (using INFO level) --- logger.info(`[TraceClient ${this.traceId}] Storing EvaluationRunPayload for span ${currentSpanId}:`, JSON.stringify(evalRunPayload, null, 2)); // --- End log before store --- // --- Store the payload instead of sending --- this.pendingEvaluationRuns.push(evalRunPayload); logger.info(`[TraceClient ${this.traceId}] Evaluation payload stored for span ${currentSpanId}`); // --- End store payload --- } catch (error) { logger.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`); throw error; // Re-throw after logging } }); } // OPTIONAL: Add a method to get the original name if needed elsewhere getOriginalName() { return this.originalName; } } /** * Singleton Tracer class. Manages overall tracing configuration and trace creation. */ class Tracer { constructor(config) { var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p; this.initialized = false; const isNode = typeof process !== 'undefined' && process.versions != null && process.versions.node != null; const envApiKey = isNode ? (_a = process.env) === null || _a === void 0 ? void 0 : _a.JUDGMENT_API_KEY : undefined; const envOrgId = isNode ? (_b = process.env) === null || _b === void 0 ? void 0 : _b.JUDGMENT_ORG_ID : undefined; const envProjectName = isNode ? (_c = process.env) === null || _c === void 0 ? void 0 : _c.JUDGMENT_PROJECT_NAME : undefined; const envMonitoring = isNode ? (_d = process.env) === null || _d === void 0 ? void 0 : _d.JUDGMENT_MONITORING : 'true'; const envEvaluations = isNode ? (_e = process.env) === null || _e === void 0 ? void 0 : _e.JUDGMENT_EVALUATIONS : 'true'; this.apiKey = (_g = (_f = config === null || config === void 0 ? void 0 : config.apiKey) !== null && _f !== void 0 ? _f : envApiKey) !== null && _g !== void 0 ? _g : ''; this.organizationId = (_j = (_h = config === null || config === void 0 ? void 0 : config.organizationId) !== null && _h !== void 0 ? _h : envOrgId) !== null && _j !== void 0 ? _j : ''; this.projectName = (_l = (_k = config === null || config === void 0 ? void 0 : config.projectName) !== null && _k !== void 0 ? _k : envProjectName) !== null && _l !== void 0 ? _l : 'default_project'; this.defaultRules = (_m = config === null || config === void 0 ? void 0 : config.rules) !== null && _m !== void 0 ? _m : []; let effectiveMonitoring = (_o = config === null || config === void 0 ? void 0 : config.enableMonitoring) !== null && _o !== void 0 ? _o : ((envMonitoring === null || envMonitoring === void 0 ? void 0 : envMonitoring.toLowerCase()) !== 'false'); if (effectiveMonitoring && (!this.apiKey || !this.organizationId)) { console.warn("JUDGMENT_API_KEY or JUDGMENT_ORG_ID missing. Monitoring disabled."); effectiveMonitoring = false; } this.enableMonitoring = effectiveMonitoring; this.enableEvaluations = effectiveMonitoring && ((_p = config === null || config === void 0 ? void 0 : config.enableEvaluations) !== null && _p !== void 0 ? _p : ((envEvaluations === null || envEvaluations === void 0 ? void 0 : envEvaluations.toLowerCase()) !== 'false')); this.initialized = true; } static getInstance(config) { if (!Tracer.instance) { Tracer.instance = new Tracer(config); } else if (config && !Tracer.instance.initialized) { console.warn("Tracer getInstance called with config after implicit initialization. Re-initializing."); Tracer.instance = new Tracer(config); } else if (config && Tracer.instance.initialized) { if (config.projectName && config.projectName !== Tracer.instance.projectName) { console.warn(`Attempting to re-initialize Tracer with different project_name. Original will be used: '${Tracer.instance.projectName}'.`); } if (config.rules && config.rules.length > 0 && Tracer.instance.defaultRules.length === 0) { try { console.warn("Setting default rules on Tracer instance after initial creation."); Tracer.instance.defaultRules = config.rules; } catch (e) { console.error("Failed to set default rules after tracer initialization:", e); } } else if (config.rules && JSON.stringify(config.rules) !== JSON.stringify(Tracer.instance.defaultRules)) { console.warn("Attempting to change default rules on Tracer after initialization. Original rules will be used."); } } return Tracer.instance; } getCurrentTrace() { return this.currentTrace; } startTrace(name, config) { var _a, _b; const parentTrace = this.getCurrentTrace(); const projectName = (_a = config.projectName) !== null && _a !== void 0 ? _a : this.projectName; const effectiveRules = Object.values(Object.fromEntries([...this.defaultRules, ...((_b = config.rules) !== null && _b !== void 0 ? _b : [])] .map(rule => { var _a; return [(_a = rule.rule_id) !== null && _a !== void 0 ? _a : rule.name, rule]; }))); const traceClient = new TraceClient({ tracer: this, name: name, projectName: projectName, overwrite: config.overwrite, rules: effectiveRules, enableMonitoring: this.enableMonitoring, enableEvaluations: this.enableEvaluations, parentTraceId: parentTrace === null || parentTrace === void 0 ? void 0 : parentTrace.traceId, parentName: parentTrace === null || parentTrace === void 0 ? void 0 : parentTrace.name, apiKey: this.apiKey, organizationId: this.organizationId, }); return traceClient; } *trace(name, options = {}) { var _a; const trace = this.startTrace(name, Object.assign({}, options)); const shouldCreateRootSpan = (_a = options.createRootSpan) !== null && _a !== void 0 ? _a : true; const prevTrace = this.currentTrace; if (shouldCreateRootSpan) { for (const span of trace.span(name, { spanType: 'chain' })) { this.currentTrace = trace; yield trace; this.currentTrace = prevTrace; } } else { this.currentTrace = trace; yield trace; this.currentTrace = prevTrace; } if (trace.enableMonitoring) { trace.save(false).catch(saveErr => { console.error(`Failed to save completed trace '${name}' (${trace.traceId}):`, saveErr); }); } } observe(options) { if (!this.enableMonitoring) { return (func) => (...args) => Promise.resolve(func(...args)); } return (func) => { var _a, _b, _c; const spanName = (_b = (_a = options === null || options === void 0 ? void 0 : options.name) !== null && _a !== void 0 ? _a : func.name) !== null && _b !== void 0 ? _b : 'anonymous_function'; const spanType = (_c = options === null || options === void 0 ? void 0 : options.spanType) !== null && _c !== void 0 ? _c : 'span'; return asyncFunctionWrapper(((...args) => __awaiter(this, void 0, void 0, function* () { const currentTrace = this.getCurrentTrace(); let output; let error; if (!currentTrace) { for (const trace of this.trace(spanName, { createRootSpan: false })) { for (const span of trace.span(spanName, { spanType })) { span.recordInput({ args: args }); try { span.recordOutput(output = yield func(...args)); } catch (e) { span.recordError(error = e); } } } } else { for (const span of currentTrace.span(spanName, { spanType })) { span.recordInput({ args: args }); try { span.recordOutput(output = yield func(...args)); } catch (e) { span.recordError(error = e); } } } if (error) { throw error; } // @ts-expect-error output is assigned return output; })).bind(func)); }; } } Tracer.instance = null; // --- Helper Functions for Wrapping LLM Clients --- // // Return owner and method name for explicit patching function _getClientConfig(client) { var _a, _b, _c, _d, _e, _f; // Check OpenAI structure first if (client instanceof OpenAI && typeof ((_b = (_a = client === null || client === void 0 ? void 0 : client.chat) === null || _a === void 0 ? void 0 : _a.completions) === null || _b === void 0 ? void 0 : _b.create) === 'function') { return { spanName: "OPENAI_API_CALL", originalMethod: client.chat.completions.create, methodOwner: client.chat.completions, methodName: 'create' }; } // Check Anthropic structure next else if (client instanceof Anthropic && typeof ((_c = client === null || client === void 0 ? void 0 : client.messages) === null || _c === void 0 ? void 0 : _c.create) === 'function') { return { spanName: "ANTHROPIC_API_CALL", originalMethod: client.messages.create, methodOwner: client.messages, methodName: 'create' }; } // Check for Together structure (duck typing - looking for .chat.completions.create for v0.7.0) else if (typeof ((_e = (_d = client === null || client === void 0 ? void 0 : client.chat) === null || _d === void 0 ? void 0 : _d.completions) === null || _e === void 0 ? void 0 : _e.create) === 'function') { const chatCompletionsObj = client.chat.completions; return { spanName: "TOGETHER_API_CALL", originalMethod: chatCompletionsObj.create, methodOwner: chatCompletionsObj, // Owner is chat.completions methodName: 'create' }; } // Fallback/Warning if none match logger.warn("Cannot wrap client: Unsupported type or incompatible SDK structure.", { clientType: (_f = client === null || client === void 0 ? void 0 : client.constructor) === null || _f === void 0 ? void 0 : _f.name }); return null; } function _formatInputData(client, args) { var _a, _b; const params = args[0] || {}; try { // Check for OpenAI or Together (assuming chat structure) if (client instanceof OpenAI || (((_a = client === null || client === void 0 ? void 0 : client.constructor) === null || _a === void 0 ? void 0 : _a.name) === 'Together' && ((_b = client === null || client === void 0 ? void 0 : client.chat) === null || _b === void 0 ? void 0 : _b.completions))) { return { model: params.model, messages: params.messages, /* other potential params */ }; } else if (client instanceof Anthropic) { return { model: params.model, messages: params.messages, max_tokens: params.max_tokens, }; } } catch (e) { logger.error("Error formatting LLM input:", { error: e instanceof Error ? e.message : String(e), params }); return { raw_params: params }; } return { raw_params: params