UNPKG

axiom

Version:

Axiom AI SDK provides - an API to wrap your AI calls with observability instrumentation. - offline evals - online evals

283 lines (279 loc) 8.96 kB
import { getGlobalTracer } from "./chunk-PU64TWX4.js"; import { isValidName } from "./chunk-MM5FFQJT.js"; import { normalizeBooleanScore } from "./chunk-FGLJO4BD.js"; import { Attr } from "./chunk-4TKUTT24.js"; // src/online-evals/onlineEval.ts import { context as context2, trace as trace2, SpanStatusCode as SpanStatusCode2 } from "@opentelemetry/api"; // src/online-evals/executor.ts import { context, trace, SpanStatusCode } from "@opentelemetry/api"; function setScorerSpanAttrs(args) { const { score: scoreValue, metadata } = normalizeBooleanScore( args.result.score, args.result.metadata ); const attrs = { [Attr.GenAI.Operation.Name]: "eval.score", [Attr.Eval.Score.Name]: args.name, [Attr.Eval.Tags]: JSON.stringify(["online"]), [Attr.Eval.Score.Value]: scoreValue ?? void 0, [Attr.Eval.Name]: args.evalName, [Attr.Eval.Capability.Name]: args.capability, [Attr.Eval.Step.Name]: args.step }; if (metadata && Object.keys(metadata).length > 0) { attrs[Attr.Eval.Score.Metadata] = JSON.stringify(metadata); } args.span.setAttributes(attrs); } async function executeScorer(params) { const tracer = getGlobalTracer(); const parentContext = trace.setSpan(context.active(), params.parentSpan); return context.with(parentContext, async () => { const scorerName = typeof params.scorer === "function" ? ( // undefined/unknown case shouldn't happen, but better safe than sorry params.scorer.name || "unknown" ) : params.scorer.name; const scorerSpan = tracer.startSpan(`score ${scorerName}`); try { const result = typeof params.scorer === "function" ? { ...await params.scorer({ input: params.input, output: params.output }), name: scorerName } : params.scorer; setScorerSpanAttrs({ span: scorerSpan, name: scorerName, result, capability: params.capability, step: params.step, evalName: params.evalName }); if (result.error) { const error = new Error(result.error); scorerSpan.recordException(error); scorerSpan.setAttributes({ [Attr.Error.Message]: error.message, [Attr.Error.Type]: error.name }); scorerSpan.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); } else { scorerSpan.setStatus({ code: SpanStatusCode.OK }); } return result; } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); const failedResult = { name: scorerName, score: null, error: error.message }; setScorerSpanAttrs({ span: scorerSpan, name: scorerName, result: failedResult, capability: params.capability, step: params.step, evalName: params.evalName }); scorerSpan.recordException(error); scorerSpan.setAttributes({ [Attr.Error.Message]: error.message, [Attr.Error.Type]: error.name }); scorerSpan.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); return failedResult; } finally { scorerSpan.end(); } }); } // src/online-evals/onlineEval.ts async function shouldSample(sampling, args) { if (sampling === void 0) return true; if (typeof sampling === "number") { if (sampling >= 1) return true; if (sampling <= 0) return false; return Math.random() < sampling; } return Boolean(await sampling(args)); } function isSampledScorerEntry(entry) { return typeof entry === "object" && entry !== null && "scorer" in entry; } function resolveScorerName(scorer) { if (typeof scorer === "function") { return scorer.name || "unknown"; } return scorer.name; } function normalizeScorerEntry(entry) { if (isSampledScorerEntry(entry)) { return { name: resolveScorerName(entry.scorer), scorer: entry.scorer, sampling: entry.sampling }; } return { name: resolveScorerName(entry), scorer: entry }; } function getDuplicateScorerNames(entries) { const nameCounts = /* @__PURE__ */ new Map(); for (const entry of entries) { nameCounts.set(entry.name, (nameCounts.get(entry.name) ?? 0) + 1); } return [...nameCounts.entries()].filter(([, count]) => count > 1).map(([name]) => name).sort(); } function onlineEval(name, params) { const nameValidation = isValidName(name); if (!nameValidation.valid) { throw new Error(`[AxiomAI] Invalid eval name: ${nameValidation.error}`); } if (params.scorers.length === 0) { return Promise.resolve({}); } const rawLinks = params.links ?? trace2.getSpan(context2.active())?.spanContext(); const linkContexts = rawLinks ? Array.isArray(rawLinks) ? rawLinks : [rawLinks] : []; return executeOnlineEvalInternal(name, params, linkContexts); } async function executeOnlineEvalInternal(name, params, linkContexts) { const tracer = getGlobalTracer(); const evalSpan = tracer.startSpan( `eval ${name}`, linkContexts.length > 0 ? { links: linkContexts.map((ctx) => ({ context: ctx })) } : {} ); const evalAttrs = { [Attr.GenAI.Operation.Name]: "eval", [Attr.Eval.Name]: name, [Attr.Eval.Capability.Name]: params.capability, [Attr.Eval.Tags]: JSON.stringify(["online"]) }; if (params.step) { evalAttrs[Attr.Eval.Step.Name] = params.step; } evalSpan.setAttributes(evalAttrs); try { const normalizedScorers = params.scorers.map((entry) => normalizeScorerEntry(entry)); const duplicateScorerNames = getDuplicateScorerNames(normalizedScorers); if (duplicateScorerNames.length > 0) { throw new Error( `Duplicate scorer names are not allowed: ${duplicateScorerNames.map((name2) => `"${name2}"`).join(", ")}` ); } const outcomes = await Promise.all( normalizedScorers.map(async (entry) => { try { const sampledIn = await shouldSample(entry.sampling, { input: params.input, output: params.output }); if (!sampledIn) { return { sampledOut: true }; } return { sampledOut: false, result: await executeScorer({ scorer: entry.scorer, input: params.input, output: params.output, parentSpan: evalSpan, capability: params.capability, step: params.step, evalName: name }) }; } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); return { sampledOut: false, result: await executeScorer({ scorer: { name: entry.name, score: null, error: error.message }, input: params.input, output: params.output, parentSpan: evalSpan, capability: params.capability, step: params.step, evalName: name }) }; } }) ); const results = {}; let sampledOutCount = 0; for (const outcome of outcomes) { if (outcome.sampledOut) { sampledOutCount += 1; continue; } results[outcome.result.name] = outcome.result; } const failedCount = Object.values(results).filter((result) => result.error).length; const ranCount = Object.keys(results).length; const scoresSummary = {}; for (const [name2, result] of Object.entries(results)) { const { score: normalizedScore, metadata: normalizedMetadata } = normalizeBooleanScore( result.score, result.metadata ); scoresSummary[name2] = { name: result.name, score: normalizedScore, ...normalizedMetadata && Object.keys(normalizedMetadata).length > 0 && { metadata: normalizedMetadata }, ...result.error && { error: result.error } }; } evalSpan.setAttributes({ [Attr.Eval.Case.Scores]: JSON.stringify(scoresSummary), [Attr.Eval.Online.Scorers.Total]: normalizedScorers.length, [Attr.Eval.Online.Scorers.Ran]: ranCount, [Attr.Eval.Online.Scorers.SampledOut]: sampledOutCount, [Attr.Eval.Online.Scorers.Failed]: failedCount }); if (failedCount > 0) { evalSpan.setStatus({ code: SpanStatusCode2.ERROR, message: "One or more scorers failed" }); } else { evalSpan.setStatus({ code: SpanStatusCode2.OK }); } return results; } catch (err) { const error = err instanceof Error ? err : new Error(String(err)); evalSpan.recordException(error); evalSpan.setStatus({ code: SpanStatusCode2.ERROR, message: error.message }); return {}; } finally { evalSpan.end(); } } export { onlineEval }; //# sourceMappingURL=chunk-7PROY4ZA.js.map