axiom
Version:
Axiom AI SDK provides - an API to wrap your AI calls with observability instrumentation. - offline evals - online evals
283 lines (279 loc) • 8.96 kB
JavaScript
import {
getGlobalTracer
} from "./chunk-PU64TWX4.js";
import {
isValidName
} from "./chunk-MM5FFQJT.js";
import {
normalizeBooleanScore
} from "./chunk-FGLJO4BD.js";
import {
Attr
} from "./chunk-4TKUTT24.js";
// src/online-evals/onlineEval.ts
import { context as context2, trace as trace2, SpanStatusCode as SpanStatusCode2 } from "@opentelemetry/api";
// src/online-evals/executor.ts
import { context, trace, SpanStatusCode } from "@opentelemetry/api";
function setScorerSpanAttrs(args) {
const { score: scoreValue, metadata } = normalizeBooleanScore(
args.result.score,
args.result.metadata
);
const attrs = {
[Attr.GenAI.Operation.Name]: "eval.score",
[Attr.Eval.Score.Name]: args.name,
[Attr.Eval.Tags]: JSON.stringify(["online"]),
[Attr.Eval.Score.Value]: scoreValue ?? void 0,
[Attr.Eval.Name]: args.evalName,
[Attr.Eval.Capability.Name]: args.capability,
[Attr.Eval.Step.Name]: args.step
};
if (metadata && Object.keys(metadata).length > 0) {
attrs[Attr.Eval.Score.Metadata] = JSON.stringify(metadata);
}
args.span.setAttributes(attrs);
}
async function executeScorer(params) {
const tracer = getGlobalTracer();
const parentContext = trace.setSpan(context.active(), params.parentSpan);
return context.with(parentContext, async () => {
const scorerName = typeof params.scorer === "function" ? (
// undefined/unknown case shouldn't happen, but better safe than sorry
params.scorer.name || "unknown"
) : params.scorer.name;
const scorerSpan = tracer.startSpan(`score ${scorerName}`);
try {
const result = typeof params.scorer === "function" ? {
...await params.scorer({
input: params.input,
output: params.output
}),
name: scorerName
} : params.scorer;
setScorerSpanAttrs({
span: scorerSpan,
name: scorerName,
result,
capability: params.capability,
step: params.step,
evalName: params.evalName
});
if (result.error) {
const error = new Error(result.error);
scorerSpan.recordException(error);
scorerSpan.setAttributes({
[Attr.Error.Message]: error.message,
[Attr.Error.Type]: error.name
});
scorerSpan.setStatus({
code: SpanStatusCode.ERROR,
message: error.message
});
} else {
scorerSpan.setStatus({ code: SpanStatusCode.OK });
}
return result;
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err));
const failedResult = {
name: scorerName,
score: null,
error: error.message
};
setScorerSpanAttrs({
span: scorerSpan,
name: scorerName,
result: failedResult,
capability: params.capability,
step: params.step,
evalName: params.evalName
});
scorerSpan.recordException(error);
scorerSpan.setAttributes({
[Attr.Error.Message]: error.message,
[Attr.Error.Type]: error.name
});
scorerSpan.setStatus({
code: SpanStatusCode.ERROR,
message: error.message
});
return failedResult;
} finally {
scorerSpan.end();
}
});
}
// src/online-evals/onlineEval.ts
async function shouldSample(sampling, args) {
if (sampling === void 0) return true;
if (typeof sampling === "number") {
if (sampling >= 1) return true;
if (sampling <= 0) return false;
return Math.random() < sampling;
}
return Boolean(await sampling(args));
}
function isSampledScorerEntry(entry) {
return typeof entry === "object" && entry !== null && "scorer" in entry;
}
function resolveScorerName(scorer) {
if (typeof scorer === "function") {
return scorer.name || "unknown";
}
return scorer.name;
}
function normalizeScorerEntry(entry) {
if (isSampledScorerEntry(entry)) {
return {
name: resolveScorerName(entry.scorer),
scorer: entry.scorer,
sampling: entry.sampling
};
}
return {
name: resolveScorerName(entry),
scorer: entry
};
}
function getDuplicateScorerNames(entries) {
const nameCounts = /* @__PURE__ */ new Map();
for (const entry of entries) {
nameCounts.set(entry.name, (nameCounts.get(entry.name) ?? 0) + 1);
}
return [...nameCounts.entries()].filter(([, count]) => count > 1).map(([name]) => name).sort();
}
function onlineEval(name, params) {
const nameValidation = isValidName(name);
if (!nameValidation.valid) {
throw new Error(`[AxiomAI] Invalid eval name: ${nameValidation.error}`);
}
if (params.scorers.length === 0) {
return Promise.resolve({});
}
const rawLinks = params.links ?? trace2.getSpan(context2.active())?.spanContext();
const linkContexts = rawLinks ? Array.isArray(rawLinks) ? rawLinks : [rawLinks] : [];
return executeOnlineEvalInternal(name, params, linkContexts);
}
async function executeOnlineEvalInternal(name, params, linkContexts) {
const tracer = getGlobalTracer();
const evalSpan = tracer.startSpan(
`eval ${name}`,
linkContexts.length > 0 ? { links: linkContexts.map((ctx) => ({ context: ctx })) } : {}
);
const evalAttrs = {
[Attr.GenAI.Operation.Name]: "eval",
[Attr.Eval.Name]: name,
[Attr.Eval.Capability.Name]: params.capability,
[Attr.Eval.Tags]: JSON.stringify(["online"])
};
if (params.step) {
evalAttrs[Attr.Eval.Step.Name] = params.step;
}
evalSpan.setAttributes(evalAttrs);
try {
const normalizedScorers = params.scorers.map((entry) => normalizeScorerEntry(entry));
const duplicateScorerNames = getDuplicateScorerNames(normalizedScorers);
if (duplicateScorerNames.length > 0) {
throw new Error(
`Duplicate scorer names are not allowed: ${duplicateScorerNames.map((name2) => `"${name2}"`).join(", ")}`
);
}
const outcomes = await Promise.all(
normalizedScorers.map(async (entry) => {
try {
const sampledIn = await shouldSample(entry.sampling, {
input: params.input,
output: params.output
});
if (!sampledIn) {
return { sampledOut: true };
}
return {
sampledOut: false,
result: await executeScorer({
scorer: entry.scorer,
input: params.input,
output: params.output,
parentSpan: evalSpan,
capability: params.capability,
step: params.step,
evalName: name
})
};
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err));
return {
sampledOut: false,
result: await executeScorer({
scorer: {
name: entry.name,
score: null,
error: error.message
},
input: params.input,
output: params.output,
parentSpan: evalSpan,
capability: params.capability,
step: params.step,
evalName: name
})
};
}
})
);
const results = {};
let sampledOutCount = 0;
for (const outcome of outcomes) {
if (outcome.sampledOut) {
sampledOutCount += 1;
continue;
}
results[outcome.result.name] = outcome.result;
}
const failedCount = Object.values(results).filter((result) => result.error).length;
const ranCount = Object.keys(results).length;
const scoresSummary = {};
for (const [name2, result] of Object.entries(results)) {
const { score: normalizedScore, metadata: normalizedMetadata } = normalizeBooleanScore(
result.score,
result.metadata
);
scoresSummary[name2] = {
name: result.name,
score: normalizedScore,
...normalizedMetadata && Object.keys(normalizedMetadata).length > 0 && { metadata: normalizedMetadata },
...result.error && { error: result.error }
};
}
evalSpan.setAttributes({
[Attr.Eval.Case.Scores]: JSON.stringify(scoresSummary),
[Attr.Eval.Online.Scorers.Total]: normalizedScorers.length,
[Attr.Eval.Online.Scorers.Ran]: ranCount,
[Attr.Eval.Online.Scorers.SampledOut]: sampledOutCount,
[Attr.Eval.Online.Scorers.Failed]: failedCount
});
if (failedCount > 0) {
evalSpan.setStatus({
code: SpanStatusCode2.ERROR,
message: "One or more scorers failed"
});
} else {
evalSpan.setStatus({ code: SpanStatusCode2.OK });
}
return results;
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err));
evalSpan.recordException(error);
evalSpan.setStatus({
code: SpanStatusCode2.ERROR,
message: error.message
});
return {};
} finally {
evalSpan.end();
}
}
export {
onlineEval
};
//# sourceMappingURL=chunk-7PROY4ZA.js.map