UNPKG

judgeval

Version:

Judgment SDK for TypeScript/JavaScript

250 lines (249 loc) 7.97 kB
import OpenAI from 'openai'; import Anthropic from '@anthropic-ai/sdk'; import { APIJudgmentScorer, Scorer } from '../scorers/base-scorer.js'; interface NotificationConfig { enabled?: boolean; communication_methods?: string[]; email_addresses?: string[]; send_at?: number; } interface Condition { metric: Scorer; } type CombineType = "all" | "any"; interface Rule { rule_id?: string; name: string; description?: string; conditions: Condition[]; combine_type: CombineType; notification?: NotificationConfig; } type ApiClient = OpenAI | Anthropic | any; type SpanType = string; interface TraceEntry { type: 'enter' | 'exit' | 'input' | 'output' | 'error'; function: string; span_id: string; depth: number; created_at: number; duration?: number; output?: any; inputs?: Record<string, any>; span_type: SpanType; parent_span_id?: string; trace_id?: string; message?: string; } interface EvaluationRunPayload { organization_id: string; log_results: boolean; project_name: string; eval_name: string; examples: { input?: string; actual_output?: string; expected_output?: string; context?: string[]; retrieval_context?: string[]; tools_called?: string[]; expected_tools?: string[]; additional_metadata?: Record<string, any>; trace_id: string; }[]; scorers: APIJudgmentScorer[]; model?: string; metadata?: Record<string, any>; judgment_api_key: string; override?: boolean; rules?: Rule[]; trace_span_id?: string; } interface TraceSavePayload { trace_id: string; name: string; project_name: string; created_at: string; duration: number; token_counts: { prompt_tokens: number; completion_tokens: number; total_tokens: number; prompt_tokens_cost_usd: number; completion_tokens_cost_usd: number; total_cost_usd: number; }; entries: CondensedSpanEntry[]; evaluation_runs: EvaluationRunPayload[]; overwrite: boolean; parent_trace_id?: string | null; parent_name?: string | null; } interface CondensedSpanEntry { span_id: string; function: string; depth: number; created_at: string; parent_span_id?: string | null; span_type: SpanType; inputs: Record<string, any> | null; output: any | null; duration: number | null; trace_id?: string; children?: CondensedSpanEntry[]; } interface TokenCostResponse { model: string; prompt_tokens: number; completion_tokens: number; total_tokens: number; prompt_tokens_cost_usd: number; completion_tokens_cost_usd: number; total_cost_usd: number; } /** * Client for interacting with Judgment trace API endpoints. */ declare class TraceManagerClient { private apiKey; private organizationId; constructor(apiKey: string, organizationId: string); private _fetch; fetchTrace(traceId: string): Promise<any>; saveTrace(traceData: TraceSavePayload): Promise<any>; deleteTrace(traceId: string): Promise<any>; deleteTraces(traceIds: string[]): Promise<any>; /** * Calculate token costs directly using the API endpoint. * This is more accurate than client-side calculation as it uses the most up-to-date pricing. * * @param model The model name (e.g. 'gpt-4', 'claude-3-opus-20240229') * @param promptTokens Number of tokens in the prompt/input * @param completionTokens Number of tokens in the completion/output * @returns Object containing token counts and calculated costs in USD */ calculateTokenCosts(model: string, promptTokens: number, completionTokens: number): Promise<TokenCostResponse | null>; } /** * Represents an ongoing trace context. */ declare class TraceClient { readonly traceId: string; readonly name: string; readonly projectName: string; readonly overwrite: boolean; readonly rules: Rule[]; readonly enableMonitoring: boolean; readonly enableEvaluations: boolean; readonly parentTraceId?: string | null; readonly parentName?: string | null; private startTime; traceManager: TraceManagerClient | null; private apiKey; private organizationId; private originalName; private _spanDepths; private pendingEvaluationRuns; constructor(config: { tracer: Tracer; traceId?: string; name?: string; projectName?: string; overwrite?: boolean; rules?: Rule[]; enableMonitoring?: boolean; enableEvaluations?: boolean; parentTraceId?: string | null; parentName?: string | null; apiKey: string; organizationId: string; }); addEntry(entry: Partial<TraceEntry>): void; recordInput(inputs: any): void; recordOutput(output: any): void; recordError(error: any): void; startSpan(name: string, options?: { spanType?: SpanType; }): void; endSpan(): void; span(name: string, options?: { spanType?: SpanType; }): Generator<TraceClient>; /** * Retrieves the ID of the currently active span in this trace context. * Relies on AsyncLocalStorage context established by observe/span. * @returns {string | undefined} The ID of the current span, or undefined if none is active. */ getCurrentSpanId(): string | undefined; getDuration(): number; private condenseTrace; save(emptySave?: boolean): Promise<{ traceId: string; traceData: TraceSavePayload; } | null>; print(): void; delete(): Promise<any>; /** * Asynchronously evaluate an example using the provided scorers, * embedding the evaluation request into the trace data. * Ported from the Python SDK's async_evaluate method. * * @param scorers Array of scorers to use for evaluation * @param options Evaluation options including input, outputs, and metadata * @returns Promise that resolves when the evaluation entry has been added to the trace */ asyncEvaluate(scorers: Scorer[], options?: { input?: string; actualOutput?: string; expectedOutput?: string; context?: string[]; retrievalContext?: string[]; toolsCalled?: string[]; expectedTools?: string[]; additionalMetadata?: Record<string, any>; model?: string; logResults?: boolean; }): Promise<void>; getOriginalName(): string; } /** * Singleton Tracer class. Manages overall tracing configuration and trace creation. */ declare class Tracer { private static instance; readonly apiKey: string; readonly organizationId: string; readonly projectName: string; readonly defaultRules: Rule[]; readonly enableMonitoring: boolean; readonly enableEvaluations: boolean; private initialized; private currentTrace?; private constructor(); static getInstance(config?: { apiKey?: string; organizationId?: string; projectName?: string; rules?: Rule[]; enableMonitoring?: boolean; enableEvaluations?: boolean; }): Tracer; getCurrentTrace(): TraceClient | undefined; startTrace(name: string, config: { projectName?: string; overwrite?: boolean; rules?: Rule[]; }): TraceClient; trace(name: string, options?: { projectName?: string; overwrite?: boolean; createRootSpan?: boolean; rules?: Rule[]; }): Generator<TraceClient>; observe(options?: { name?: string; spanType?: SpanType; }): <T extends any[], S>(func: (...args: T) => S) => (...args: T) => Promise<S>; } export declare function wrap<T extends ApiClient>(client: T): T; export { Tracer, TraceClient, TraceManagerClient, Rule, Condition, NotificationConfig, CombineType, TraceEntry, SpanType, ApiClient, TraceSavePayload, CondensedSpanEntry };