judgeval
Version:
Judgment SDK for TypeScript/JavaScript
969 lines ⢠58.3 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
// Core Node.js imports
import { v4 as uuidv4 } from 'uuid';
import { AsyncLocalStorage } from 'async_hooks';
// Installed SDKs
import OpenAI from 'openai';
import Anthropic from '@anthropic-ai/sdk';
// Local Imports
import { JUDGMENT_TRACES_SAVE_API_URL, JUDGMENT_TRACES_FETCH_API_URL, JUDGMENT_TRACES_DELETE_API_URL, JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL,
// Add other necessary constants if needed
} from '../constants.js';
import { APIJudgmentScorer } from '../scorers/base-scorer.js';
import logger from './logger-instance.js'; // Use the shared winston logger instance
// On their own, span() and trace() are fully synchronous
// The issue is with asynchronous functions
// To address this, I made a wrapper for async functions that'll handle all the context stuff
class TraceClientContext {
constructor() {
this.entries = [];
this.entryStack = [];
}
}
const traceClientContextAsyncLocalStorage = new AsyncLocalStorage();
// We could use .enterWith(), but the documentation advises against it
let rootTraceClientContext = new TraceClientContext();
function getTraceClientContext() {
var _a;
return (_a = traceClientContextAsyncLocalStorage.getStore()) !== null && _a !== void 0 ? _a : rootTraceClientContext;
}
function asyncFunctionWrapper(func) {
return (...args) => __awaiter(this, void 0, void 0, function* () {
const parentTraceClientContext = getTraceClientContext();
const traceClientContext = new TraceClientContext();
const lastEntry = parentTraceClientContext.entryStack.at(-1);
if (lastEntry) {
traceClientContext.entryStack.push(lastEntry);
}
const result = yield traceClientContextAsyncLocalStorage
.run(traceClientContext, () => func(...args));
if (traceClientContext.entries.length > parentTraceClientContext.entries.length) {
[traceClientContext.entries, parentTraceClientContext.entries] =
[parentTraceClientContext.entries, traceClientContext.entries];
}
parentTraceClientContext.entries.push(...traceClientContext.entries);
return result;
});
}
// --- API Interaction Client ---
/**
* Client for interacting with Judgment trace API endpoints.
*/
class TraceManagerClient {
constructor(apiKey, organizationId) {
if (!apiKey) {
throw new Error("TraceManagerClient requires a Judgment API key.");
}
if (!organizationId) {
throw new Error("TraceManagerClient requires a Judgment Organization ID.");
}
this.apiKey = apiKey;
this.organizationId = organizationId;
}
_fetch(url_1) {
return __awaiter(this, arguments, void 0, function* (url, options = {}) {
const headers = Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}`, 'X-Organization-Id': this.organizationId }, (options.headers || {}));
try {
// Use isomorphic fetch (available globally in modern Node.js and browsers)
const response = yield fetch(url, Object.assign(Object.assign({}, options), { headers: headers }));
// We will return the response object even if !response.ok
// The caller (e.g., saveTrace) is responsible for checking response.ok or response.status
// Handle cases where the response might be empty (e.g., 204 No Content on DELETE)
if (response.status === 204) {
return null; // Indicate success with no content
}
return response;
}
catch (error) {
console.error(`Network or fetch error during ${options.method || 'GET'} ${url}:`, error);
// Re-throw or handle as appropriate for the application context
throw error;
}
});
}
fetchTrace(traceId) {
return __awaiter(this, void 0, void 0, function* () {
return this._fetch(JUDGMENT_TRACES_FETCH_API_URL, {
method: 'POST',
body: JSON.stringify({ trace_id: traceId }),
});
});
}
saveTrace(traceData) {
return __awaiter(this, void 0, void 0, function* () {
// _fetch now returns the raw response object or throws on network error
const response = yield this._fetch(JUDGMENT_TRACES_SAVE_API_URL, {
method: 'POST',
body: JSON.stringify(traceData), // Stringify directly here again
});
// Check if _fetch threw a network error (caught below) or returned an invalid object
if (!response) {
// This case should ideally be caught by _fetch's catch block, but double-check
throw new Error('Failed to save trace data: No response received from API.');
}
// Now, check the status code on the received response object
if (response.status === 400) {
// Attempt to get error body for more info
const errorBody = yield response.text();
throw new Error(`Failed to save trace data: Check your Trace name for conflicts, set overwrite=True to overwrite existing traces: ${response.status} ${response.statusText || ''} - ${errorBody}`);
}
else if (!response.ok) { // Handles other errors (5xx, 4xx except 400)
const errorBody = yield response.text();
throw new Error(`Failed to save trace data: Status ${response.status} ${response.statusText || '(No status text)'} - ${errorBody}`);
}
// --- Success Path ---
// Optionally log the UI URL (needs JSON parsing)
let responseData = null;
try {
// Handle 204 No Content specifically
if (response.status === 204) {
responseData = null; // Or maybe { success: true }?
}
else {
responseData = yield response.json(); // Parse JSON only on success
}
}
catch (parseError) {
logger.warn("Failed to parse successful API response JSON.", { error: parseError });
// Depending on requirements, maybe throw, maybe return a default success object
throw new Error(`API request succeeded (${response.status}), but failed to parse JSON response.`);
}
if (responseData === null || responseData === void 0 ? void 0 : responseData.ui_results_url) {
console.info(`
š View trace: ${responseData.ui_results_url}
`);
}
// Return the parsed data (or null for 204)
return responseData;
});
}
deleteTrace(traceId) {
return __awaiter(this, void 0, void 0, function* () {
// Assuming DELETE method is correct based on REST principles for the delete endpoint
return this._fetch(JUDGMENT_TRACES_DELETE_API_URL, {
method: 'DELETE',
body: JSON.stringify({ trace_ids: [traceId] }),
});
});
}
deleteTraces(traceIds) {
return __awaiter(this, void 0, void 0, function* () {
return this._fetch(JUDGMENT_TRACES_DELETE_API_URL, {
method: 'DELETE',
body: JSON.stringify({ trace_ids: traceIds }),
});
});
}
/**
* Calculate token costs directly using the API endpoint.
* This is more accurate than client-side calculation as it uses the most up-to-date pricing.
*
* @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229')
* @param promptTokens Number of tokens in the prompt/input
* @param completionTokens Number of tokens in the completion/output
* @returns Object containing token counts and calculated costs in USD
*/
calculateTokenCosts(model, promptTokens, completionTokens) {
return __awaiter(this, void 0, void 0, function* () {
try {
// Use the new calculation endpoint
const response = yield this._fetch(JUDGMENT_CALCULATE_TOKEN_COSTS_API_URL, {
method: 'POST',
body: JSON.stringify({
model,
prompt_tokens: promptTokens,
completion_tokens: completionTokens
})
});
// Check if the response is okay and parse JSON
if (response && response.ok) {
const data = yield response.json();
return data;
}
else if (response) {
// Log error if response was not ok
const errorBody = yield response.text();
logger.warn(`API error calculating token costs for model ${model}: ${response.status} ${response.statusText}`, { errorBody });
return null;
}
else {
// Handle cases where _fetch might return null or undefined (though it shouldn't with current implementation)
logger.warn(`No response received when calculating token costs for model ${model}.`);
return null;
}
}
catch (error) {
logger.warn(`Failed to calculate token costs for model ${model}.`, { error: error instanceof Error ? error.message : String(error) });
return null;
}
});
}
}
// --- Helper Functions ---
// Helper function to sanitize names (e.g., replace spaces with underscores)
function sanitizeName(name) {
// Replace spaces with underscores and remove potentially problematic characters
// You can adjust the regex further if other characters cause issues.
return name.replace(/\s+/g, '_').replace(/[^a-zA-Z0-9_.-]/g, '');
}
// --- Core Trace Classes ---
/**
* Represents an ongoing trace context.
*/
class TraceClient {
constructor(config) {
var _a, _b, _c, _d, _e;
this.traceManager = null; // Made public for wrap access
this._spanDepths = {}; // Track depth of active spans
this.pendingEvaluationRuns = []; // <-- ADDED: Store pending evaluations
this.traceId = config.traceId || uuidv4();
this.originalName = config.name || 'default_trace'; // Store original
this.name = sanitizeName(this.originalName); // Use sanitized name internally
// If the sanitized name is empty, fallback to a default
if (!this.name) {
console.warn(`Original trace name "${this.originalName}" sanitized to empty string. Using default_trace_${this.traceId.substring(0, 8)}.`);
this.name = `default_trace_${this.traceId.substring(0, 8)}`;
}
this.projectName = (_a = config.projectName) !== null && _a !== void 0 ? _a : config.tracer.projectName;
this.overwrite = (_b = config.overwrite) !== null && _b !== void 0 ? _b : false;
this.rules = (_c = config.rules) !== null && _c !== void 0 ? _c : [];
// Determine effective monitoring status based on tracer and API keys
let effectiveMonitoring = (_d = config.enableMonitoring) !== null && _d !== void 0 ? _d : config.tracer.enableMonitoring;
if (effectiveMonitoring && (!config.apiKey || !config.organizationId)) {
console.warn(`TraceClient ${this.traceId}: Monitoring requires JUDGMENT_API_KEY and JUDGMENT_ORG_ID. Disabling monitoring for this trace.`);
effectiveMonitoring = false;
}
this.enableMonitoring = effectiveMonitoring;
// Evaluations depend on monitoring
this.enableEvaluations = effectiveMonitoring && ((_e = config.enableEvaluations) !== null && _e !== void 0 ? _e : config.tracer.enableEvaluations);
this.parentTraceId = config.parentTraceId;
this.parentName = config.parentName;
this.apiKey = config.apiKey;
this.organizationId = config.organizationId;
this.startTime = Date.now() / 1000;
if (this.enableMonitoring) {
this.traceManager = new TraceManagerClient(this.apiKey, this.organizationId);
}
}
addEntry(entry) {
const traceClientContext = getTraceClientContext();
if (this.enableMonitoring) {
traceClientContext.entries.push(entry);
}
}
recordInput(inputs) {
const traceClientContext = getTraceClientContext();
const currentEntry = traceClientContext.entryStack.at(-1);
if (!currentEntry || !currentEntry.span_id) {
console.warn(`No current entry to record input to\nStack trace: ${new Error().stack}`);
return;
}
this.addEntry({
type: 'input',
span_id: currentEntry.span_id,
inputs,
function: currentEntry.function,
depth: this._spanDepths[currentEntry.span_id],
created_at: Date.now() / 1000,
span_type: currentEntry.span_type,
message: `Inputs to ${currentEntry.function}`
});
}
recordOutput(output) {
const traceClientContext = getTraceClientContext();
const currentEntry = traceClientContext.entryStack.at(-1);
if (!currentEntry || !currentEntry.span_id) {
console.warn(`No current entry to record output to\nStack trace: ${new Error().stack}`);
return;
}
this.addEntry({
type: 'output',
span_id: currentEntry.span_id,
output,
function: currentEntry.function,
depth: this._spanDepths[currentEntry.span_id],
created_at: Date.now() / 1000,
span_type: currentEntry.span_type,
message: `Output from ${currentEntry.function}`
});
}
recordError(error) {
const traceClientContext = getTraceClientContext();
const currentEntry = traceClientContext.entryStack.at(-1);
if (!currentEntry || !currentEntry.span_id) {
console.warn(`No current entry to record error to\nStack trace: ${new Error().stack}`);
return;
}
this.addEntry({
type: 'error',
span_id: currentEntry.span_id,
output: error,
function: currentEntry.function,
depth: this._spanDepths[currentEntry.span_id],
created_at: Date.now() / 1000,
span_type: currentEntry.span_type,
message: `Error from ${currentEntry.function}`
});
}
startSpan(name, options = {}) {
var _a;
const traceClientContext = getTraceClientContext();
const parentEntry = traceClientContext.entryStack.at(-1);
const spanId = uuidv4();
const spanType = (_a = options.spanType) !== null && _a !== void 0 ? _a : 'span';
const startTime = Date.now() / 1000;
let depth = 0, parentSpanId = undefined;
if (parentEntry && parentEntry.span_id) {
depth = this._spanDepths[parentEntry.span_id] + 1;
parentSpanId = parentEntry.span_id;
}
this._spanDepths[spanId] = depth;
const entry = {
type: 'enter',
function: name,
span_id: spanId,
depth: depth,
created_at: startTime,
span_type: spanType,
parent_span_id: parentSpanId,
message: name
};
this.addEntry(entry);
traceClientContext.entryStack.push(entry);
}
endSpan() {
const traceClientContext = getTraceClientContext();
const enterEntry = traceClientContext.entryStack.pop();
if (!enterEntry || !enterEntry.span_id) {
console.warn("No enter entry to end");
return;
}
const endTime = Date.now() / 1000;
const duration = endTime - enterEntry.created_at;
this.addEntry({
type: 'exit',
function: enterEntry.function,
span_id: enterEntry.span_id,
depth: this._spanDepths[enterEntry.span_id],
created_at: endTime,
duration: duration,
span_type: enterEntry.span_type,
message: `ā ${enterEntry.function}`
});
// Clean up depth tracking
delete this._spanDepths[enterEntry.span_id];
}
*span(name, options = {}) {
if (!this.enableMonitoring) {
yield this;
}
else {
this.startSpan(name, options);
yield this;
this.endSpan();
}
}
/**
* Retrieves the ID of the currently active span in this trace context.
* Relies on AsyncLocalStorage context established by observe/span.
* @returns {string | undefined} The ID of the current span, or undefined if none is active.
*/
getCurrentSpanId() {
const traceClientContext = getTraceClientContext(); // Internal function using AsyncLocalStorage
if (!traceClientContext) {
// Should ideally not happen if called within an observe/trace context
console.warn("[Judgeval] getCurrentSpanId called outside of an active trace context.");
return undefined;
}
const currentEntry = traceClientContext.entryStack.at(-1); // Get the latest 'enter' entry
if (!currentEntry) {
// This might happen if called right at the start of a trace before the first span
// console.warn("[Judgeval] getCurrentSpanId called but span stack is empty.");
return undefined;
}
return currentEntry.span_id;
}
getDuration() {
return (Date.now() / 1000) - this.startTime;
}
condenseTrace(rawEntries) {
var _a, _b, _c, _d, _e;
const spansById = {};
for (const entry of rawEntries) {
const spanId = entry.span_id;
if (!spanId)
continue;
if (!spansById[spanId]) {
spansById[spanId] = {
span_id: spanId,
function: entry.function || 'unknown',
depth: (_a = entry.depth) !== null && _a !== void 0 ? _a : 0,
created_at: new Date(((_b = entry.created_at) !== null && _b !== void 0 ? _b : 0) * 1000).toISOString(), // Convert number to ISO string
trace_id: this.traceId, // Add trace_id
parent_span_id: entry.parent_span_id,
span_type: entry.span_type || 'span',
inputs: null,
output: null,
duration: null,
children: []
};
}
const currentSpanData = spansById[spanId];
switch (entry.type) {
case 'enter':
currentSpanData.function = entry.function || currentSpanData.function;
currentSpanData.depth = (_c = entry.depth) !== null && _c !== void 0 ? _c : currentSpanData.depth;
currentSpanData.created_at = new Date(((_d = entry.created_at) !== null && _d !== void 0 ? _d : 0) * 1000).toISOString(); // Ensure created_at is string on update
currentSpanData.parent_span_id = entry.parent_span_id;
currentSpanData.span_type = entry.span_type || currentSpanData.span_type;
currentSpanData.start_time = entry.created_at; // Keep original number for duration calc
break;
case 'exit':
currentSpanData.duration = (_e = entry.duration) !== null && _e !== void 0 ? _e : currentSpanData.duration;
currentSpanData.end_time = entry.created_at; // Keep original number for duration calc
if (currentSpanData.duration === null && currentSpanData.start_time && currentSpanData.end_time) {
currentSpanData.duration = currentSpanData.end_time - currentSpanData.start_time;
}
break;
case 'input':
if (currentSpanData.inputs === null && entry.inputs) {
currentSpanData.inputs = entry.inputs;
}
else if (typeof currentSpanData.inputs === 'object' && typeof entry.inputs === 'object') {
currentSpanData.inputs = Object.assign(Object.assign({}, currentSpanData.inputs), entry.inputs);
}
break;
case 'output':
case 'error':
currentSpanData.output = entry.output;
break;
}
}
const spansList = Object.values(spansById).map(span => {
if (span.duration === null && span.start_time && span.end_time) {
span.duration = span.end_time - span.start_time;
}
delete span.start_time;
delete span.end_time;
return span;
});
const childrenMap = {};
const roots = [];
const spanMap = {};
const sortedCondensedList = [];
const visited = new Set();
for (const span of spansList) {
spanMap[span.span_id] = span;
const parentId = span.parent_span_id;
if (parentId === undefined || parentId === null) {
roots.push(span);
}
else {
if (!childrenMap[parentId]) {
childrenMap[parentId] = [];
}
childrenMap[parentId].push(span);
}
}
// Sort using parsed dates
roots.sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
for (const parentId in childrenMap) {
// Sort using parsed dates
childrenMap[parentId].sort((a, b) => Date.parse(a.created_at) - Date.parse(b.created_at));
}
function buildFlatListDfs(span) {
if (visited.has(span.span_id))
return;
visited.add(span.span_id);
sortedCondensedList.push(span);
const children = childrenMap[span.span_id] || [];
for (const child of children) {
buildFlatListDfs(child);
}
}
for (const rootSpan of roots) {
buildFlatListDfs(rootSpan);
}
for (const span of spansList) {
if (!visited.has(span.span_id)) {
console.warn(`Orphaned span detected: ${span.span_id}, adding to end of list.`);
buildFlatListDfs(span);
}
}
return sortedCondensedList;
}
save() {
return __awaiter(this, arguments, void 0, function* (emptySave = false) {
var _a;
// If monitoring is disabled or trace hasn't started, don't save
if (!this.enableMonitoring || this.startTime === -1 || !this.traceManager) {
logger.info(`[TraceClient ${this.traceId}] Monitoring disabled or trace not started. Not saving.`);
return null;
}
const endTime = Date.now() / 1000; // Current time in seconds
const duration = endTime - this.startTime;
const condensedEntries = this.condenseTrace(getTraceClientContext().entries);
// Calculate token counts and costs
const tokenCounts = {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0,
prompt_tokens_cost_usd: 0.0,
completion_tokens_cost_usd: 0.0,
total_cost_usd: 0.0
};
// Use a Set to avoid double-counting tokens from nested calls if structure is complex
// Note: Assuming span_ids are unique across the trace
const processedSpanIds = new Set();
for (const entry of condensedEntries) {
if (processedSpanIds.has(entry.span_id))
continue;
processedSpanIds.add(entry.span_id);
if (entry.span_type === 'llm' && ((_a = entry.output) === null || _a === void 0 ? void 0 : _a.usage)) {
const usage = entry.output.usage;
// Sum tokens
tokenCounts.prompt_tokens += usage.prompt_tokens || usage.input_tokens || 0;
tokenCounts.completion_tokens += usage.completion_tokens || usage.output_tokens || 0;
tokenCounts.total_tokens += usage.total_tokens ||
((usage.prompt_tokens || usage.input_tokens || 0) +
(usage.completion_tokens || usage.output_tokens || 0));
// Sum costs embedded in the usage object (if they exist)
if (usage.prompt_tokens_cost_usd !== undefined) {
tokenCounts.prompt_tokens_cost_usd += usage.prompt_tokens_cost_usd;
}
if (usage.completion_tokens_cost_usd !== undefined) {
tokenCounts.completion_tokens_cost_usd += usage.completion_tokens_cost_usd;
}
if (usage.total_cost_usd !== undefined) {
tokenCounts.total_cost_usd += usage.total_cost_usd;
}
}
}
// --- Retrieve and include pending evaluation runs ---
const evaluationRunsToSave = [...this.pendingEvaluationRuns];
// ----------------------------------------------------
const traceData = {
trace_id: this.traceId,
name: this.name, // Use the sanitized name
project_name: this.projectName,
created_at: new Date(this.startTime * 1000).toISOString(), // Convert start time to ISO string
duration: duration < 0 ? 0 : duration,
token_counts: tokenCounts,
entries: condensedEntries, // Send the potentially nested structure from condenseTrace
evaluation_runs: evaluationRunsToSave, // <-- ADDED: Include collected evaluations
overwrite: this.overwrite,
parent_trace_id: this.parentTraceId,
parent_name: this.parentName,
};
// <<< ADD LOGGING HERE >>>
logger.info(`[TraceClient ${this.traceId}] Payload to be saved:`, JSON.stringify(traceData, null, 2));
// <<< ADD SPECIFIC LOGGING FOR EVALUATION RUNS (using INFO level) >>>
logger.info(`[TraceClient ${this.traceId}] Evaluation runs included in payload:`, JSON.stringify(traceData.evaluation_runs, null, 2));
// <<< END SPECIFIC LOGGING >>>
if (emptySave) {
// Skip actual saving if emptySave is true (used for context management in generators)
logger.info(`[TraceClient ${this.traceId}] emptySave=true, skipping actual save call.`);
return { traceId: this.traceId, traceData };
}
try {
logger.info(`[TraceClient ${this.traceId}] Calling traceManager.saveTrace...`);
const response = yield this.traceManager.saveTrace(traceData);
logger.info(`[TraceClient ${this.traceId}] Trace saved successfully. Response:`, response);
// Reset trace context after successful save
getTraceClientContext().entries = [];
getTraceClientContext().entryStack = [];
this.pendingEvaluationRuns = []; // <-- ADDED: Clear pending evaluations
this.startTime = -1; // Reset start time
return { traceId: this.traceId, traceData: traceData }; // Return payload on success
}
catch (error) {
logger.error(`[TraceClient ${this.traceId}] Error saving trace:`, error);
// Optionally reset context even on error?
// getTraceClientContext().entries = [];
// getTraceClientContext().entryStack = [];
// this.startTime = -1;
return null; // Indicate save failure
}
});
}
print() {
if (!this.enableMonitoring) {
// Keep console.log for direct user output when print() is called
console.log("Monitoring was disabled. No trace entries recorded.");
return;
}
const traceClientContext = getTraceClientContext();
if (traceClientContext.entries.length === 0) {
// Keep console.log for direct user output when print() is called
console.log("No trace entries recorded.");
return;
}
// Keep console.log for direct user output when print() is called
console.log(`\n--- Trace Details: ${this.name} (ID: ${this.traceId}) ---`);
traceClientContext.entries.forEach(entry => {
var _a;
const indent = " ".repeat((_a = entry.depth) !== null && _a !== void 0 ? _a : 0);
const timeStr = entry.created_at ? `@ ${new Date(entry.created_at * 1000).toISOString()}` : '';
const shortSpanId = entry.span_id ? `(id: ${entry.span_id.substring(0, 8)}...)` : '';
const shortParentId = entry.parent_span_id ? `(parent: ${entry.parent_span_id.substring(0, 8)}...)` : '';
try {
switch (entry.type) {
case 'enter':
console.log(`${indent}ā ${entry.function || 'unknown'} ${shortSpanId} ${shortParentId} [${entry.span_type || 'span'}] ${timeStr}`);
break;
case 'exit':
const durationStr = entry.duration !== undefined ? `(${entry.duration.toFixed(3)}s)` : '';
// Keep console.log
console.log(`${indent}ā ${entry.function || 'unknown'} ${shortSpanId} ${durationStr} ${timeStr}`);
break;
case 'input':
let inputStr = JSON.stringify(entry.inputs);
if (inputStr && inputStr.length > 200) {
inputStr = inputStr.substring(0, 197) + '...';
}
// Keep console.log
console.log(`${indent} Input (for ${shortSpanId}): ${inputStr || '{}'}`);
break;
case 'output':
case 'error':
let outputStr = JSON.stringify(entry.output);
if (outputStr && outputStr.length > 200) {
outputStr = outputStr.substring(0, 197) + '...';
}
const prefix = entry.type === 'error' ? 'Error' : 'Output';
// Keep console.log
console.log(`${indent} ${prefix} (for ${shortSpanId}): ${outputStr || 'null'}`);
break;
}
}
catch (stringifyError) {
const errorMessage = stringifyError instanceof Error ? stringifyError.message : String(stringifyError);
// Keep console.log
console.log(`${indent}! Error formatting entry: ${errorMessage}`);
console.log(`${indent} Raw entry:`, entry);
}
});
// Keep console.log
console.log(`--- End Trace: ${this.name} ---`);
}
delete() {
return __awaiter(this, void 0, void 0, function* () {
if (!this.enableMonitoring || !this.traceManager) {
logger.warn(`Cannot delete trace ${this.traceId}, monitoring disabled or manager missing.`);
return null;
}
try {
const result = yield this.traceManager.deleteTrace(this.traceId);
logger.info(`Trace ${this.traceId} deleted successfully.`);
return result;
}
catch (error) {
logger.error(`Failed to delete trace ${this.traceId}.`, { error: error instanceof Error ? error.message : String(error) });
throw error; // Re-throw after logging
}
});
}
/**
* Asynchronously evaluate an example using the provided scorers,
* embedding the evaluation request into the trace data.
* Ported from the Python SDK's async_evaluate method.
*
* @param scorers Array of scorers to use for evaluation
* @param options Evaluation options including input, outputs, and metadata
* @returns Promise that resolves when the evaluation entry has been added to the trace
*/
asyncEvaluate(scorers_1) {
return __awaiter(this, arguments, void 0, function* (scorers, options = {}) {
if (!this.enableEvaluations) {
logger.warn("Evaluations are disabled. Skipping async evaluation.");
return;
}
if (!scorers || scorers.length === 0) {
logger.warn("No scorers provided. Skipping async evaluation.");
return;
}
// Filter for APIJudgmentScorers as the backend evaluation needs them
const apiScorers = scorers.filter((s) => s instanceof APIJudgmentScorer);
if (apiScorers.length === 0) {
logger.warn("No APIJudgmentScorers found in the provided scorers list. Skipping async evaluation as backend requires API scorers.");
return;
}
// Process rules (currently just using this.rules directly)
const loadedRules = this.rules; // TODO: Add ScorerWrapper-like processing if needed in TS
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
if (loadedRules && loadedRules.length > 0 && scorers.some(s => !(s instanceof APIJudgmentScorer))) {
throw new Error("Cannot use Judgeval scorers, you can only use API scorers when using rules. Please either remove rules or use only APIJudgmentScorer types.");
}
// Create example structure matching Python/backend expectations
const example = {
input: options.input || "",
actual_output: options.actualOutput || "",
expected_output: options.expectedOutput || "",
context: options.context || [],
retrieval_context: options.retrievalContext || [],
tools_called: options.toolsCalled || [],
expected_tools: options.expectedTools || [],
additional_metadata: options.additionalMetadata || {},
trace_id: this.traceId
};
try {
const traceClientContext = getTraceClientContext();
const currentEntry = traceClientContext.entryStack.at(-1);
if (!currentEntry) {
logger.warn(`No current entry to associate evaluation with\\nStack trace: ${new Error().stack}`);
return;
}
const currentSpanId = currentEntry.span_id; // Get the span ID
// --- Create evaluation run name (similar to Python) ---
// Capitalize scorer names
const scorerNames = apiScorers.map(scorer => {
var _a;
// Attempt to get score_type, fallback to class name or Unknown
const name = (scorer === null || scorer === void 0 ? void 0 : scorer.scoreType) || ((_a = scorer === null || scorer === void 0 ? void 0 : scorer.constructor) === null || _a === void 0 ? void 0 : _a.name) || "Unknown";
return name.charAt(0).toUpperCase() + name.slice(1);
}).join(',');
// Use trace name and shortened span ID (or trace ID if no span)
const idPart = currentSpanId ? currentSpanId.substring(0, 8) : this.traceId.substring(0, 8);
const evalName = `${this.name.charAt(0).toUpperCase() + this.name.slice(1)}-${idPart}-[${scorerNames}]`;
// --- End eval name creation ---
// Construct the evaluation payload
const evalRunPayload = {
organization_id: this.organizationId,
log_results: options.logResults !== false, // Default to true
project_name: this.projectName,
eval_name: evalName,
examples: [example],
scorers: apiScorers, // Use the filtered list of API scorers
model: options.model || "",
metadata: {}, // Matches Python tracer
judgment_api_key: this.apiKey,
override: this.overwrite, // Use trace's overwrite setting
rules: loadedRules, // Pass the processed rules
trace_span_id: currentSpanId // <<< RENAMED: Assign the current span ID (matching backend)
};
// --- Log the payload before storing (using INFO level) ---
logger.info(`[TraceClient ${this.traceId}] Storing EvaluationRunPayload for span ${currentSpanId}:`, JSON.stringify(evalRunPayload, null, 2));
// --- End log before store ---
// --- Store the payload instead of sending ---
this.pendingEvaluationRuns.push(evalRunPayload);
logger.info(`[TraceClient ${this.traceId}] Evaluation payload stored for span ${currentSpanId}`);
// --- End store payload ---
}
catch (error) {
logger.error(`Failed during asyncEvaluate execution: ${error instanceof Error ? error.message : String(error)}`);
throw error; // Re-throw after logging
}
});
}
// OPTIONAL: Add a method to get the original name if needed elsewhere
getOriginalName() {
return this.originalName;
}
}
/**
* Singleton Tracer class. Manages overall tracing configuration and trace creation.
*/
class Tracer {
constructor(config) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
this.initialized = false;
const isNode = typeof process !== 'undefined' && process.versions != null && process.versions.node != null;
const envApiKey = isNode ? (_a = process.env) === null || _a === void 0 ? void 0 : _a.JUDGMENT_API_KEY : undefined;
const envOrgId = isNode ? (_b = process.env) === null || _b === void 0 ? void 0 : _b.JUDGMENT_ORG_ID : undefined;
const envProjectName = isNode ? (_c = process.env) === null || _c === void 0 ? void 0 : _c.JUDGMENT_PROJECT_NAME : undefined;
const envMonitoring = isNode ? (_d = process.env) === null || _d === void 0 ? void 0 : _d.JUDGMENT_MONITORING : 'true';
const envEvaluations = isNode ? (_e = process.env) === null || _e === void 0 ? void 0 : _e.JUDGMENT_EVALUATIONS : 'true';
this.apiKey = (_g = (_f = config === null || config === void 0 ? void 0 : config.apiKey) !== null && _f !== void 0 ? _f : envApiKey) !== null && _g !== void 0 ? _g : '';
this.organizationId = (_j = (_h = config === null || config === void 0 ? void 0 : config.organizationId) !== null && _h !== void 0 ? _h : envOrgId) !== null && _j !== void 0 ? _j : '';
this.projectName = (_l = (_k = config === null || config === void 0 ? void 0 : config.projectName) !== null && _k !== void 0 ? _k : envProjectName) !== null && _l !== void 0 ? _l : 'default_project';
this.defaultRules = (_m = config === null || config === void 0 ? void 0 : config.rules) !== null && _m !== void 0 ? _m : [];
let effectiveMonitoring = (_o = config === null || config === void 0 ? void 0 : config.enableMonitoring) !== null && _o !== void 0 ? _o : ((envMonitoring === null || envMonitoring === void 0 ? void 0 : envMonitoring.toLowerCase()) !== 'false');
if (effectiveMonitoring && (!this.apiKey || !this.organizationId)) {
console.warn("JUDGMENT_API_KEY or JUDGMENT_ORG_ID missing. Monitoring disabled.");
effectiveMonitoring = false;
}
this.enableMonitoring = effectiveMonitoring;
this.enableEvaluations = effectiveMonitoring && ((_p = config === null || config === void 0 ? void 0 : config.enableEvaluations) !== null && _p !== void 0 ? _p : ((envEvaluations === null || envEvaluations === void 0 ? void 0 : envEvaluations.toLowerCase()) !== 'false'));
this.initialized = true;
}
static getInstance(config) {
if (!Tracer.instance) {
Tracer.instance = new Tracer(config);
}
else if (config && !Tracer.instance.initialized) {
console.warn("Tracer getInstance called with config after implicit initialization. Re-initializing.");
Tracer.instance = new Tracer(config);
}
else if (config && Tracer.instance.initialized) {
if (config.projectName && config.projectName !== Tracer.instance.projectName) {
console.warn(`Attempting to re-initialize Tracer with different project_name. Original will be used: '${Tracer.instance.projectName}'.`);
}
if (config.rules && config.rules.length > 0 && Tracer.instance.defaultRules.length === 0) {
try {
console.warn("Setting default rules on Tracer instance after initial creation.");
Tracer.instance.defaultRules = config.rules;
}
catch (e) {
console.error("Failed to set default rules after tracer initialization:", e);
}
}
else if (config.rules && JSON.stringify(config.rules) !== JSON.stringify(Tracer.instance.defaultRules)) {
console.warn("Attempting to change default rules on Tracer after initialization. Original rules will be used.");
}
}
return Tracer.instance;
}
getCurrentTrace() {
return this.currentTrace;
}
startTrace(name, config) {
var _a, _b;
const parentTrace = this.getCurrentTrace();
const projectName = (_a = config.projectName) !== null && _a !== void 0 ? _a : this.projectName;
const effectiveRules = Object.values(Object.fromEntries([...this.defaultRules, ...((_b = config.rules) !== null && _b !== void 0 ? _b : [])]
.map(rule => { var _a; return [(_a = rule.rule_id) !== null && _a !== void 0 ? _a : rule.name, rule]; })));
const traceClient = new TraceClient({
tracer: this,
name: name,
projectName: projectName,
overwrite: config.overwrite,
rules: effectiveRules,
enableMonitoring: this.enableMonitoring,
enableEvaluations: this.enableEvaluations,
parentTraceId: parentTrace === null || parentTrace === void 0 ? void 0 : parentTrace.traceId,
parentName: parentTrace === null || parentTrace === void 0 ? void 0 : parentTrace.name,
apiKey: this.apiKey,
organizationId: this.organizationId,
});
return traceClient;
}
*trace(name, options = {}) {
var _a;
const trace = this.startTrace(name, Object.assign({}, options));
const shouldCreateRootSpan = (_a = options.createRootSpan) !== null && _a !== void 0 ? _a : true;
const prevTrace = this.currentTrace;
if (shouldCreateRootSpan) {
for (const span of trace.span(name, { spanType: 'chain' })) {
this.currentTrace = trace;
yield trace;
this.currentTrace = prevTrace;
}
}
else {
this.currentTrace = trace;
yield trace;
this.currentTrace = prevTrace;
}
if (trace.enableMonitoring) {
trace.save(false).catch(saveErr => {
console.error(`Failed to save completed trace '${name}' (${trace.traceId}):`, saveErr);
});
}
}
observe(options) {
if (!this.enableMonitoring) {
return (func) => (...args) => Promise.resolve(func(...args));
}
return (func) => {
var _a, _b, _c;
const spanName = (_b = (_a = options === null || options === void 0 ? void 0 : options.name) !== null && _a !== void 0 ? _a : func.name) !== null && _b !== void 0 ? _b : 'anonymous_function';
const spanType = (_c = options === null || options === void 0 ? void 0 : options.spanType) !== null && _c !== void 0 ? _c : 'span';
return asyncFunctionWrapper(((...args) => __awaiter(this, void 0, void 0, function* () {
const currentTrace = this.getCurrentTrace();
let output;
let error;
if (!currentTrace) {
for (const trace of this.trace(spanName, { createRootSpan: false })) {
for (const span of trace.span(spanName, { spanType })) {
span.recordInput({ args: args });
try {
span.recordOutput(output = yield func(...args));
}
catch (e) {
span.recordError(error = e);
}
}
}
}
else {
for (const span of currentTrace.span(spanName, { spanType })) {
span.recordInput({ args: args });
try {
span.recordOutput(output = yield func(...args));
}
catch (e) {
span.recordError(error = e);
}
}
}
if (error) {
throw error;
}
// @ts-expect-error output is assigned
return output;
})).bind(func));
};
}
}
Tracer.instance = null;
// --- Helper Functions for Wrapping LLM Clients --- //
// Return owner and method name for explicit patching
function _getClientConfig(client) {
var _a, _b, _c, _d, _e, _f;
// Check OpenAI structure first
if (client instanceof OpenAI && typeof ((_b = (_a = client === null || client === void 0 ? void 0 : client.chat) === null || _a === void 0 ? void 0 : _a.completions) === null || _b === void 0 ? void 0 : _b.create) === 'function') {
return {
spanName: "OPENAI_API_CALL",
originalMethod: client.chat.completions.create,
methodOwner: client.chat.completions,
methodName: 'create'
};
}
// Check Anthropic structure next
else if (client instanceof Anthropic && typeof ((_c = client === null || client === void 0 ? void 0 : client.messages) === null || _c === void 0 ? void 0 : _c.create) === 'function') {
return {
spanName: "ANTHROPIC_API_CALL",
originalMethod: client.messages.create,
methodOwner: client.messages,
methodName: 'create'
};
}
// Check for Together structure (duck typing - looking for .chat.completions.create for v0.7.0)
else if (typeof ((_e = (_d = client === null || client === void 0 ? void 0 : client.chat) === null || _d === void 0 ? void 0 : _d.completions) === null || _e === void 0 ? void 0 : _e.create) === 'function') {
const chatCompletionsObj = client.chat.completions;
return {
spanName: "TOGETHER_API_CALL",
originalMethod: chatCompletionsObj.create,
methodOwner: chatCompletionsObj, // Owner is chat.completions
methodName: 'create'
};
}
// Fallback/Warning if none match
logger.warn("Cannot wrap client: Unsupported type or incompatible SDK structure.", { clientType: (_f = client === null || client === void 0 ? void 0 : client.constructor) === null || _f === void 0 ? void 0 : _f.name });
return null;
}
function _formatInputData(client, args) {
var _a, _b;
const params = args[0] || {};
try {
// Check for OpenAI or Together (assuming chat structure)
if (client instanceof OpenAI || (((_a = client === null || client === void 0 ? void 0 : client.constructor) === null || _a === void 0 ? void 0 : _a.name) === 'Together' && ((_b = client === null || client === void 0 ? void 0 : client.chat) === null || _b === void 0 ? void 0 : _b.completions))) {
return { model: params.model, messages: params.messages, /* other potential params */ };
}
else if (client instanceof Anthropic) {
return { model: params.model, messages: params.messages, max_tokens: params.max_tokens, };
}
}
catch (e) {
logger.error("Error formatting LLM input:", { error: e instanceof Error ? e.message : String(e), params });
return { raw_params: params };
}
return { raw_params: params