@genkit-ai/ai
Version:
Genkit AI framework generative AI APIs.
225 lines • 7.33 kB
JavaScript
import { action, z } from "@genkit-ai/core";
import { logger } from "@genkit-ai/core/logging";
import { toJsonSchema } from "@genkit-ai/core/schema";
import { SPAN_TYPE_ATTR, runInNewSpan } from "@genkit-ai/core/tracing";
import { randomUUID } from "crypto";
const ATTR_PREFIX = "genkit";
const SPAN_STATE_ATTR = ATTR_PREFIX + ":state";
const BaseDataPointSchema = z.object({
input: z.unknown(),
output: z.unknown().optional(),
context: z.array(z.unknown()).optional(),
reference: z.unknown().optional(),
testCaseId: z.string().optional(),
traceIds: z.array(z.string()).optional()
});
const BaseEvalDataPointSchema = BaseDataPointSchema.extend({
testCaseId: z.string()
});
const EvalStatusEnumSchema = z.enum(["UNKNOWN", "PASS", "FAIL"]);
var EvalStatusEnum = /* @__PURE__ */ ((EvalStatusEnum2) => {
EvalStatusEnum2["UNKNOWN"] = "UNKNOWN";
EvalStatusEnum2["PASS"] = "PASS";
EvalStatusEnum2["FAIL"] = "FAIL";
return EvalStatusEnum2;
})(EvalStatusEnum || {});
const ScoreSchema = z.object({
id: z.string().describe(
"Optional ID to differentiate different scores if applying in a single evaluation"
).optional(),
score: z.union([z.number(), z.string(), z.boolean()]).optional(),
status: EvalStatusEnumSchema.optional(),
error: z.string().optional(),
details: z.object({
reasoning: z.string().optional()
}).passthrough().optional()
});
const EVALUATOR_METADATA_KEY_DISPLAY_NAME = "evaluatorDisplayName";
const EVALUATOR_METADATA_KEY_DEFINITION = "evaluatorDefinition";
const EVALUATOR_METADATA_KEY_IS_BILLED = "evaluatorIsBilled";
const EvalResponseSchema = z.object({
sampleIndex: z.number().optional(),
testCaseId: z.string(),
traceId: z.string().optional(),
spanId: z.string().optional(),
evaluation: z.union([ScoreSchema, z.array(ScoreSchema)])
});
const EvalResponsesSchema = z.array(EvalResponseSchema);
function withMetadata(evaluator2, dataPointType, configSchema) {
const withMeta = evaluator2;
withMeta.__dataPointType = dataPointType;
withMeta.__configSchema = configSchema;
return withMeta;
}
const EvalRequestSchema = z.object({
dataset: z.array(BaseDataPointSchema),
evalRunId: z.string(),
options: z.unknown()
});
function defineEvaluator(registry, options, runner) {
const e = evaluator(options, runner);
registry.registerAction("evaluator", e);
return e;
}
function evaluator(options, runner) {
const evalMetadata = {};
evalMetadata[EVALUATOR_METADATA_KEY_IS_BILLED] = options.isBilled == void 0 ? true : options.isBilled;
evalMetadata[EVALUATOR_METADATA_KEY_DISPLAY_NAME] = options.displayName;
evalMetadata[EVALUATOR_METADATA_KEY_DEFINITION] = options.definition;
if (options.configSchema) {
evalMetadata["customOptions"] = toJsonSchema({
schema: options.configSchema
});
}
const evaluator2 = action(
{
actionType: "evaluator",
name: options.name,
inputSchema: EvalRequestSchema.extend({
dataset: options.dataPointType ? z.array(options.dataPointType) : z.array(BaseDataPointSchema),
options: options.configSchema ?? z.unknown(),
evalRunId: z.string(),
batchSize: z.number().optional()
}),
outputSchema: EvalResponsesSchema,
metadata: {
type: "evaluator",
evaluator: evalMetadata
}
},
async (i) => {
const evalResponses = [];
const batches = getBatchedArray(i.dataset, i.batchSize);
for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) {
const batch = batches[batchIndex];
try {
await runInNewSpan(
{
metadata: {
name: i.batchSize ? `Batch ${batchIndex}` : `Test Case ${batch[0].testCaseId}`,
metadata: { "evaluator:evalRunId": i.evalRunId }
},
labels: {
[SPAN_TYPE_ATTR]: "evaluator"
}
},
async (metadata, otSpan) => {
const spanId = otSpan.spanContext().spanId;
const traceId = otSpan.spanContext().traceId;
const evalRunPromises = batch.map((d, index) => {
const sampleIndex = i.batchSize ? i.batchSize * batchIndex + index : batchIndex;
const datapoint = d;
metadata.input = {
input: datapoint.input,
output: datapoint.output,
context: datapoint.context
};
const evalOutputPromise = runner(datapoint, i.options).then((result) => ({
...result,
traceId,
spanId,
sampleIndex
})).catch((error) => {
return {
sampleIndex,
spanId,
traceId,
testCaseId: datapoint.testCaseId,
evaluation: {
error: `Evaluation of test case ${datapoint.testCaseId} failed:
${error}`
}
};
});
return evalOutputPromise;
});
const allResults = await Promise.all(evalRunPromises);
metadata.output = allResults.length === 1 ? allResults[0] : allResults;
allResults.map((result) => {
evalResponses.push(result);
});
}
);
} catch (e) {
logger.error(
`Evaluation of batch ${batchIndex} failed:
${e.stack}`
);
continue;
}
}
return evalResponses;
}
);
const ewm = withMetadata(
evaluator2,
options.dataPointType,
options.configSchema
);
return ewm;
}
async function evaluate(registry, params) {
let evaluator2;
if (typeof params.evaluator === "string") {
evaluator2 = await registry.lookupAction(`/evaluator/${params.evaluator}`);
} else if (Object.hasOwnProperty.call(params.evaluator, "info")) {
evaluator2 = await registry.lookupAction(
`/evaluator/${params.evaluator.name}`
);
} else {
evaluator2 = params.evaluator;
}
if (!evaluator2) {
throw new Error("Unable to utilize the provided evaluator");
}
return await evaluator2({
dataset: params.dataset,
options: params.options,
evalRunId: params.evalRunId ?? randomUUID()
});
}
const EvaluatorInfoSchema = z.object({
/** Friendly label for this evaluator */
label: z.string().optional(),
metrics: z.array(z.string())
});
function evaluatorRef(options) {
return { ...options };
}
function getBatchedArray(arr, batchSize) {
let size;
if (!batchSize) {
size = 1;
} else {
size = batchSize;
}
const batches = [];
for (var i = 0; i < arr.length; i += size) {
batches.push(
arr.slice(i, i + size).map((d) => ({
...d,
testCaseId: d.testCaseId ?? randomUUID()
}))
);
}
return batches;
}
export {
ATTR_PREFIX,
BaseDataPointSchema,
BaseEvalDataPointSchema,
EVALUATOR_METADATA_KEY_DEFINITION,
EVALUATOR_METADATA_KEY_DISPLAY_NAME,
EVALUATOR_METADATA_KEY_IS_BILLED,
EvalResponseSchema,
EvalResponsesSchema,
EvalStatusEnum,
EvaluatorInfoSchema,
SPAN_STATE_ATTR,
ScoreSchema,
defineEvaluator,
evaluate,
evaluator,
evaluatorRef
};
//# sourceMappingURL=evaluator.mjs.map