UNPKG

@genkit-ai/ai

Version:

Genkit AI framework generative AI APIs.

258 lines 7.78 kB
import { action, z } from "@genkit-ai/core"; import { logger } from "@genkit-ai/core/logging"; import { toJsonSchema } from "@genkit-ai/core/schema"; import { runInNewSpan } from "@genkit-ai/core/tracing"; import { randomUUID } from "crypto"; const ATTR_PREFIX = "genkit"; const SPAN_STATE_ATTR = ATTR_PREFIX + ":state"; const BaseDataPointSchema = z.object({ input: z.unknown(), output: z.unknown().optional(), context: z.array(z.unknown()).optional(), reference: z.unknown().optional(), testCaseId: z.string().optional(), traceIds: z.array(z.string()).optional() }); const BaseEvalDataPointSchema = BaseDataPointSchema.extend({ testCaseId: z.string() }); const EvalStatusEnumSchema = z.enum(["UNKNOWN", "PASS", "FAIL"]); var EvalStatusEnum = /* @__PURE__ */ ((EvalStatusEnum2) => { EvalStatusEnum2["UNKNOWN"] = "UNKNOWN"; EvalStatusEnum2["PASS"] = "PASS"; EvalStatusEnum2["FAIL"] = "FAIL"; return EvalStatusEnum2; })(EvalStatusEnum || {}); const ScoreSchema = z.object({ id: z.string().describe( "Optional ID to differentiate different scores if applying in a single evaluation" ).optional(), score: z.union([z.number(), z.string(), z.boolean()]).optional(), status: EvalStatusEnumSchema.optional(), error: z.string().optional(), details: z.object({ reasoning: z.string().optional() }).passthrough().optional() }); const EVALUATOR_METADATA_KEY_DISPLAY_NAME = "evaluatorDisplayName"; const EVALUATOR_METADATA_KEY_DEFINITION = "evaluatorDefinition"; const EVALUATOR_METADATA_KEY_IS_BILLED = "evaluatorIsBilled"; const EvalResponseSchema = z.object({ sampleIndex: z.number().optional(), testCaseId: z.string(), traceId: z.string().optional(), spanId: z.string().optional(), evaluation: z.union([ScoreSchema, z.array(ScoreSchema)]) }); const EvalResponsesSchema = z.array(EvalResponseSchema); function withMetadata(evaluator2, dataPointType, configSchema) { const withMeta = evaluator2; withMeta.__dataPointType = dataPointType; withMeta.__configSchema = configSchema; return withMeta; } const EvalRequestSchema = z.object({ dataset: z.array(BaseDataPointSchema), evalRunId: z.string(), options: z.unknown() }); function defineEvaluator(registry, options, runner) { const e = evaluator(options, runner); registry.registerAction("evaluator", e); return e; } function evaluator(options, runner) { const evalMetadata = {}; evalMetadata[EVALUATOR_METADATA_KEY_IS_BILLED] = options.isBilled == void 0 ? true : options.isBilled; evalMetadata[EVALUATOR_METADATA_KEY_DISPLAY_NAME] = options.displayName; evalMetadata[EVALUATOR_METADATA_KEY_DEFINITION] = options.definition; if (options.configSchema) { evalMetadata["customOptions"] = toJsonSchema({ schema: options.configSchema }); } const evaluator2 = action( { actionType: "evaluator", name: options.name, inputSchema: EvalRequestSchema.extend({ dataset: options.dataPointType ? z.array(options.dataPointType) : z.array(BaseDataPointSchema), options: options.configSchema ?? z.unknown(), evalRunId: z.string(), batchSize: z.number().optional() }), outputSchema: EvalResponsesSchema, metadata: { type: "evaluator", evaluator: evalMetadata } }, async (i) => { const evalResponses = []; const batches = getBatchedArray(i.dataset, i.batchSize); for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) { const batch = batches[batchIndex]; try { if (batch.length === 1) { const results = await runBatch( runner, batch, i.batchSize, batchIndex, i.evalRunId, i.options ); evalResponses.push(...results); } else { await runInNewSpan( { metadata: { name: `Batch ${batchIndex}`, metadata: { "evaluator:evalRunId": i.evalRunId } } }, async (metadata, otSpan) => { const results = await runBatch( runner, batch, i.batchSize, batchIndex, i.evalRunId, i.options, metadata ); evalResponses.push(...results); } ); } } catch (e) { logger.error( `Evaluation of batch ${batchIndex} failed: ${e.stack}` ); continue; } } return evalResponses; } ); const ewm = withMetadata( evaluator2, options.dataPointType, options.configSchema ); return ewm; } async function evaluate(registry, params) { let evaluator2; if (typeof params.evaluator === "string") { evaluator2 = await registry.lookupAction(`/evaluator/${params.evaluator}`); } else if (Object.hasOwnProperty.call(params.evaluator, "info")) { evaluator2 = await registry.lookupAction( `/evaluator/${params.evaluator.name}` ); } else { evaluator2 = params.evaluator; } if (!evaluator2) { throw new Error("Unable to utilize the provided evaluator"); } return await evaluator2({ dataset: params.dataset, options: params.options, evalRunId: params.evalRunId ?? randomUUID() }); } const EvaluatorInfoSchema = z.object({ /** Friendly label for this evaluator */ label: z.string().optional(), metrics: z.array(z.string()) }); function evaluatorRef(options) { return { ...options }; } function getBatchedArray(arr, batchSize) { let size; if (!batchSize) { size = 1; } else { size = batchSize; } const batches = []; for (var i = 0; i < arr.length; i += size) { batches.push( arr.slice(i, i + size).map((d) => ({ ...d, testCaseId: d.testCaseId ?? randomUUID() })) ); } return batches; } async function runBatch(runner, batch, batchSize, batchIndex, evalRunId, options, batchMetadata) { if (batchMetadata) { batchMetadata.input = batch; } const evalRunPromises = batch.map((d, index) => { const sampleIndex = batchSize ? batchSize * batchIndex + index : batchIndex; const datapoint = d; return runInNewSpan( { metadata: { name: `Test Case ${datapoint.testCaseId}`, metadata: { "evaluator:evalRunId": evalRunId } } }, async (metadata, otSpan) => { const spanId = otSpan.spanContext().spanId; const traceId = otSpan.spanContext().traceId; metadata.input = datapoint; try { const result = await runner(datapoint, options); metadata.output = result; return { ...result, traceId, spanId, sampleIndex }; } catch (error) { return { sampleIndex, spanId, traceId, testCaseId: datapoint.testCaseId, evaluation: { error: `Evaluation of test case ${datapoint.testCaseId} failed: ${error}` } }; } } ); }); const allResults = await Promise.all(evalRunPromises); if (batchMetadata) { batchMetadata.output = allResults; } return allResults; } export { ATTR_PREFIX, BaseDataPointSchema, BaseEvalDataPointSchema, EVALUATOR_METADATA_KEY_DEFINITION, EVALUATOR_METADATA_KEY_DISPLAY_NAME, EVALUATOR_METADATA_KEY_IS_BILLED, EvalResponseSchema, EvalResponsesSchema, EvalStatusEnum, EvaluatorInfoSchema, SPAN_STATE_ATTR, ScoreSchema, defineEvaluator, evaluate, evaluator, evaluatorRef }; //# sourceMappingURL=evaluator.mjs.map