UNPKG

@genkit-ai/ai

Version:

Genkit AI framework generative AI APIs.

218 lines 8.15 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var evaluator_exports = {}; __export(evaluator_exports, { ATTR_PREFIX: () => ATTR_PREFIX, BaseDataPointSchema: () => BaseDataPointSchema, BaseEvalDataPointSchema: () => BaseEvalDataPointSchema, EVALUATOR_METADATA_KEY_DEFINITION: () => EVALUATOR_METADATA_KEY_DEFINITION, EVALUATOR_METADATA_KEY_DISPLAY_NAME: () => EVALUATOR_METADATA_KEY_DISPLAY_NAME, EVALUATOR_METADATA_KEY_IS_BILLED: () => EVALUATOR_METADATA_KEY_IS_BILLED, EvalResponseSchema: () => EvalResponseSchema, EvalResponsesSchema: () => EvalResponsesSchema, EvaluatorInfoSchema: () => EvaluatorInfoSchema, SPAN_STATE_ATTR: () => SPAN_STATE_ATTR, ScoreSchema: () => ScoreSchema, defineEvaluator: () => defineEvaluator, evaluate: () => evaluate, evaluatorRef: () => evaluatorRef }); module.exports = __toCommonJS(evaluator_exports); var import_core = require("@genkit-ai/core"); var import_logging = require("@genkit-ai/core/logging"); var import_tracing = require("@genkit-ai/core/tracing"); var import_crypto = require("crypto"); const ATTR_PREFIX = "genkit"; const SPAN_STATE_ATTR = ATTR_PREFIX + ":state"; const BaseDataPointSchema = import_core.z.object({ input: import_core.z.unknown(), output: import_core.z.unknown().optional(), context: import_core.z.array(import_core.z.unknown()).optional(), reference: import_core.z.unknown().optional(), testCaseId: import_core.z.string().optional(), traceIds: import_core.z.array(import_core.z.string()).optional() }); const BaseEvalDataPointSchema = BaseDataPointSchema.extend({ testCaseId: import_core.z.string() }); const ScoreSchema = import_core.z.object({ id: import_core.z.string().describe( "Optional ID to differentiate different scores if applying in a single evaluation" ).optional(), score: import_core.z.union([import_core.z.number(), import_core.z.string(), import_core.z.boolean()]).optional(), // TODO: use StatusSchema error: import_core.z.string().optional(), details: import_core.z.object({ reasoning: import_core.z.string().optional() }).passthrough().optional() }); const EVALUATOR_METADATA_KEY_DISPLAY_NAME = "evaluatorDisplayName"; const EVALUATOR_METADATA_KEY_DEFINITION = "evaluatorDefinition"; const EVALUATOR_METADATA_KEY_IS_BILLED = "evaluatorIsBilled"; const EvalResponseSchema = import_core.z.object({ sampleIndex: import_core.z.number().optional(), testCaseId: import_core.z.string(), traceId: import_core.z.string().optional(), spanId: import_core.z.string().optional(), evaluation: import_core.z.union([ScoreSchema, import_core.z.array(ScoreSchema)]) }); const EvalResponsesSchema = import_core.z.array(EvalResponseSchema); function withMetadata(evaluator, dataPointType, configSchema) { const withMeta = evaluator; withMeta.__dataPointType = dataPointType; withMeta.__configSchema = configSchema; return withMeta; } const EvalRequestSchema = import_core.z.object({ dataset: import_core.z.array(BaseDataPointSchema), evalRunId: import_core.z.string(), options: import_core.z.unknown() }); function defineEvaluator(registry, options, runner) { const metadata = {}; metadata[EVALUATOR_METADATA_KEY_IS_BILLED] = options.isBilled == void 0 ? true : options.isBilled; metadata[EVALUATOR_METADATA_KEY_DISPLAY_NAME] = options.displayName; metadata[EVALUATOR_METADATA_KEY_DEFINITION] = options.definition; const evaluator = (0, import_core.defineAction)( registry, { actionType: "evaluator", name: options.name, inputSchema: EvalRequestSchema.extend({ dataset: options.dataPointType ? import_core.z.array(options.dataPointType) : import_core.z.array(BaseDataPointSchema), options: options.configSchema ?? import_core.z.unknown(), evalRunId: import_core.z.string() }), outputSchema: EvalResponsesSchema, metadata }, async (i) => { let evalResponses = []; for (let index = 0; index < i.dataset.length; index++) { const datapoint = { ...i.dataset[index], testCaseId: i.dataset[index].testCaseId ?? (0, import_crypto.randomUUID)() }; try { await (0, import_tracing.runInNewSpan)( registry, { metadata: { name: `Test Case ${datapoint.testCaseId}`, metadata: { "evaluator:evalRunId": i.evalRunId } }, labels: { [import_tracing.SPAN_TYPE_ATTR]: "evaluator" } }, async (metadata2, otSpan) => { const spanId = otSpan.spanContext().spanId; const traceId = otSpan.spanContext().traceId; try { metadata2.input = { input: datapoint.input, output: datapoint.output, context: datapoint.context }; const testCaseOutput = await runner(datapoint, i.options); testCaseOutput.sampleIndex = index; testCaseOutput.spanId = spanId; testCaseOutput.traceId = traceId; metadata2.output = testCaseOutput; evalResponses.push(testCaseOutput); return testCaseOutput; } catch (e) { evalResponses.push({ sampleIndex: index, spanId, traceId, testCaseId: datapoint.testCaseId, evaluation: { error: `Evaluation of test case ${datapoint.testCaseId} failed: ${e.stack}` } }); throw e; } } ); } catch (e) { import_logging.logger.error( `Evaluation of test case ${datapoint.testCaseId} failed: ${e.stack}` ); continue; } } return evalResponses; } ); const ewm = withMetadata( evaluator, options.dataPointType, options.configSchema ); return ewm; } async function evaluate(registry, params) { let evaluator; if (typeof params.evaluator === "string") { evaluator = await registry.lookupAction(`/evaluator/${params.evaluator}`); } else if (Object.hasOwnProperty.call(params.evaluator, "info")) { evaluator = await registry.lookupAction( `/evaluator/${params.evaluator.name}` ); } else { evaluator = params.evaluator; } if (!evaluator) { throw new Error("Unable to utilize the provided evaluator"); } return await evaluator({ dataset: params.dataset, options: params.options, evalRunId: params.evalRunId ?? (0, import_crypto.randomUUID)() }); } const EvaluatorInfoSchema = import_core.z.object({ /** Friendly label for this evaluator */ label: import_core.z.string().optional(), metrics: import_core.z.array(import_core.z.string()) }); function evaluatorRef(options) { return { ...options }; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { ATTR_PREFIX, BaseDataPointSchema, BaseEvalDataPointSchema, EVALUATOR_METADATA_KEY_DEFINITION, EVALUATOR_METADATA_KEY_DISPLAY_NAME, EVALUATOR_METADATA_KEY_IS_BILLED, EvalResponseSchema, EvalResponsesSchema, EvaluatorInfoSchema, SPAN_STATE_ATTR, ScoreSchema, defineEvaluator, evaluate, evaluatorRef }); //# sourceMappingURL=evaluator.js.map