UNPKG

@genkit-ai/ai

Version:

Genkit AI framework generative AI APIs.

264 lines 9.7 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var evaluator_exports = {}; __export(evaluator_exports, { ATTR_PREFIX: () => ATTR_PREFIX, BaseDataPointSchema: () => BaseDataPointSchema, BaseEvalDataPointSchema: () => BaseEvalDataPointSchema, EVALUATOR_METADATA_KEY_DEFINITION: () => EVALUATOR_METADATA_KEY_DEFINITION, EVALUATOR_METADATA_KEY_DISPLAY_NAME: () => EVALUATOR_METADATA_KEY_DISPLAY_NAME, EVALUATOR_METADATA_KEY_IS_BILLED: () => EVALUATOR_METADATA_KEY_IS_BILLED, EvalResponseSchema: () => EvalResponseSchema, EvalResponsesSchema: () => EvalResponsesSchema, EvalStatusEnum: () => EvalStatusEnum, EvaluatorInfoSchema: () => EvaluatorInfoSchema, SPAN_STATE_ATTR: () => SPAN_STATE_ATTR, ScoreSchema: () => ScoreSchema, defineEvaluator: () => defineEvaluator, evaluate: () => evaluate, evaluator: () => evaluator, evaluatorRef: () => evaluatorRef }); module.exports = __toCommonJS(evaluator_exports); var import_core = require("@genkit-ai/core"); var import_logging = require("@genkit-ai/core/logging"); var import_schema = require("@genkit-ai/core/schema"); var import_tracing = require("@genkit-ai/core/tracing"); var import_crypto = require("crypto"); const ATTR_PREFIX = "genkit"; const SPAN_STATE_ATTR = ATTR_PREFIX + ":state"; const BaseDataPointSchema = import_core.z.object({ input: import_core.z.unknown(), output: import_core.z.unknown().optional(), context: import_core.z.array(import_core.z.unknown()).optional(), reference: import_core.z.unknown().optional(), testCaseId: import_core.z.string().optional(), traceIds: import_core.z.array(import_core.z.string()).optional() }); const BaseEvalDataPointSchema = BaseDataPointSchema.extend({ testCaseId: import_core.z.string() }); const EvalStatusEnumSchema = import_core.z.enum(["UNKNOWN", "PASS", "FAIL"]); var EvalStatusEnum = /* @__PURE__ */ ((EvalStatusEnum2) => { EvalStatusEnum2["UNKNOWN"] = "UNKNOWN"; EvalStatusEnum2["PASS"] = "PASS"; EvalStatusEnum2["FAIL"] = "FAIL"; return EvalStatusEnum2; })(EvalStatusEnum || {}); const ScoreSchema = import_core.z.object({ id: import_core.z.string().describe( "Optional ID to differentiate different scores if applying in a single evaluation" ).optional(), score: import_core.z.union([import_core.z.number(), import_core.z.string(), import_core.z.boolean()]).optional(), status: EvalStatusEnumSchema.optional(), error: import_core.z.string().optional(), details: import_core.z.object({ reasoning: import_core.z.string().optional() }).passthrough().optional() }); const EVALUATOR_METADATA_KEY_DISPLAY_NAME = "evaluatorDisplayName"; const EVALUATOR_METADATA_KEY_DEFINITION = "evaluatorDefinition"; const EVALUATOR_METADATA_KEY_IS_BILLED = "evaluatorIsBilled"; const EvalResponseSchema = import_core.z.object({ sampleIndex: import_core.z.number().optional(), testCaseId: import_core.z.string(), traceId: import_core.z.string().optional(), spanId: import_core.z.string().optional(), evaluation: import_core.z.union([ScoreSchema, import_core.z.array(ScoreSchema)]) }); const EvalResponsesSchema = import_core.z.array(EvalResponseSchema); function withMetadata(evaluator2, dataPointType, configSchema) { const withMeta = evaluator2; withMeta.__dataPointType = dataPointType; withMeta.__configSchema = configSchema; return withMeta; } const EvalRequestSchema = import_core.z.object({ dataset: import_core.z.array(BaseDataPointSchema), evalRunId: import_core.z.string(), options: import_core.z.unknown() }); function defineEvaluator(registry, options, runner) { const e = evaluator(options, runner); registry.registerAction("evaluator", e); return e; } function evaluator(options, runner) { const evalMetadata = {}; evalMetadata[EVALUATOR_METADATA_KEY_IS_BILLED] = options.isBilled == void 0 ? true : options.isBilled; evalMetadata[EVALUATOR_METADATA_KEY_DISPLAY_NAME] = options.displayName; evalMetadata[EVALUATOR_METADATA_KEY_DEFINITION] = options.definition; if (options.configSchema) { evalMetadata["customOptions"] = (0, import_schema.toJsonSchema)({ schema: options.configSchema }); } const evaluator2 = (0, import_core.action)( { actionType: "evaluator", name: options.name, inputSchema: EvalRequestSchema.extend({ dataset: options.dataPointType ? import_core.z.array(options.dataPointType) : import_core.z.array(BaseDataPointSchema), options: options.configSchema ?? import_core.z.unknown(), evalRunId: import_core.z.string(), batchSize: import_core.z.number().optional() }), outputSchema: EvalResponsesSchema, metadata: { type: "evaluator", evaluator: evalMetadata } }, async (i) => { const evalResponses = []; const batches = getBatchedArray(i.dataset, i.batchSize); for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) { const batch = batches[batchIndex]; try { await (0, import_tracing.runInNewSpan)( { metadata: { name: i.batchSize ? `Batch ${batchIndex}` : `Test Case ${batch[0].testCaseId}`, metadata: { "evaluator:evalRunId": i.evalRunId } }, labels: { [import_tracing.SPAN_TYPE_ATTR]: "evaluator" } }, async (metadata, otSpan) => { const spanId = otSpan.spanContext().spanId; const traceId = otSpan.spanContext().traceId; const evalRunPromises = batch.map((d, index) => { const sampleIndex = i.batchSize ? i.batchSize * batchIndex + index : batchIndex; const datapoint = d; metadata.input = { input: datapoint.input, output: datapoint.output, context: datapoint.context }; const evalOutputPromise = runner(datapoint, i.options).then((result) => ({ ...result, traceId, spanId, sampleIndex })).catch((error) => { return { sampleIndex, spanId, traceId, testCaseId: datapoint.testCaseId, evaluation: { error: `Evaluation of test case ${datapoint.testCaseId} failed: ${error}` } }; }); return evalOutputPromise; }); const allResults = await Promise.all(evalRunPromises); metadata.output = allResults.length === 1 ? allResults[0] : allResults; allResults.map((result) => { evalResponses.push(result); }); } ); } catch (e) { import_logging.logger.error( `Evaluation of batch ${batchIndex} failed: ${e.stack}` ); continue; } } return evalResponses; } ); const ewm = withMetadata( evaluator2, options.dataPointType, options.configSchema ); return ewm; } async function evaluate(registry, params) { let evaluator2; if (typeof params.evaluator === "string") { evaluator2 = await registry.lookupAction(`/evaluator/${params.evaluator}`); } else if (Object.hasOwnProperty.call(params.evaluator, "info")) { evaluator2 = await registry.lookupAction( `/evaluator/${params.evaluator.name}` ); } else { evaluator2 = params.evaluator; } if (!evaluator2) { throw new Error("Unable to utilize the provided evaluator"); } return await evaluator2({ dataset: params.dataset, options: params.options, evalRunId: params.evalRunId ?? (0, import_crypto.randomUUID)() }); } const EvaluatorInfoSchema = import_core.z.object({ /** Friendly label for this evaluator */ label: import_core.z.string().optional(), metrics: import_core.z.array(import_core.z.string()) }); function evaluatorRef(options) { return { ...options }; } function getBatchedArray(arr, batchSize) { let size; if (!batchSize) { size = 1; } else { size = batchSize; } const batches = []; for (var i = 0; i < arr.length; i += size) { batches.push( arr.slice(i, i + size).map((d) => ({ ...d, testCaseId: d.testCaseId ?? (0, import_crypto.randomUUID)() })) ); } return batches; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { ATTR_PREFIX, BaseDataPointSchema, BaseEvalDataPointSchema, EVALUATOR_METADATA_KEY_DEFINITION, EVALUATOR_METADATA_KEY_DISPLAY_NAME, EVALUATOR_METADATA_KEY_IS_BILLED, EvalResponseSchema, EvalResponsesSchema, EvalStatusEnum, EvaluatorInfoSchema, SPAN_STATE_ATTR, ScoreSchema, defineEvaluator, evaluate, evaluator, evaluatorRef }); //# sourceMappingURL=evaluator.js.map