UNPKG

@genkit-ai/ai

Version:

Genkit AI framework generative AI APIs.

1 lines 14 kB
{"version":3,"sources":["../src/evaluator.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport { Action, defineAction, z } from '@genkit-ai/core';\nimport { logger } from '@genkit-ai/core/logging';\nimport { Registry } from '@genkit-ai/core/registry';\nimport { SPAN_TYPE_ATTR, runInNewSpan } from '@genkit-ai/core/tracing';\nimport { randomUUID } from 'crypto';\n\nexport const ATTR_PREFIX = 'genkit';\nexport const SPAN_STATE_ATTR = ATTR_PREFIX + ':state';\n\nexport const BaseDataPointSchema = z.object({\n input: z.unknown(),\n output: z.unknown().optional(),\n context: z.array(z.unknown()).optional(),\n reference: z.unknown().optional(),\n testCaseId: z.string().optional(),\n traceIds: z.array(z.string()).optional(),\n});\n\n// DataPoint that is to be used for actions. This needs testCaseId to be present.\nexport const BaseEvalDataPointSchema = BaseDataPointSchema.extend({\n testCaseId: z.string(),\n});\nexport type BaseEvalDataPoint = z.infer<typeof BaseEvalDataPointSchema>;\n\nexport const ScoreSchema = z.object({\n id: z\n .string()\n .describe(\n 'Optional ID to differentiate different scores if applying in a single evaluation'\n )\n .optional(),\n score: z.union([z.number(), z.string(), z.boolean()]).optional(),\n // TODO: use StatusSchema\n error: z.string().optional(),\n details: z\n .object({\n reasoning: z.string().optional(),\n })\n .passthrough()\n .optional(),\n});\n\n// Update genkit-tools/src/utils/evals.ts if you change this value\nexport const EVALUATOR_METADATA_KEY_DISPLAY_NAME = 'evaluatorDisplayName';\nexport const EVALUATOR_METADATA_KEY_DEFINITION = 'evaluatorDefinition';\nexport const EVALUATOR_METADATA_KEY_IS_BILLED = 'evaluatorIsBilled';\n\nexport type Score = z.infer<typeof ScoreSchema>;\nexport type BaseDataPoint = z.infer<typeof BaseDataPointSchema>;\nexport type Dataset<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n> = Array<z.infer<DataPoint>>;\n\nexport const EvalResponseSchema = z.object({\n sampleIndex: z.number().optional(),\n testCaseId: z.string(),\n traceId: z.string().optional(),\n spanId: z.string().optional(),\n evaluation: z.union([ScoreSchema, z.array(ScoreSchema)]),\n});\nexport type EvalResponse = z.infer<typeof EvalResponseSchema>;\n\nexport const EvalResponsesSchema = z.array(EvalResponseSchema);\nexport type EvalResponses = z.infer<typeof EvalResponsesSchema>;\n\nexport type EvaluatorFn<\n EvalDataPoint extends\n typeof BaseEvalDataPointSchema = typeof BaseEvalDataPointSchema,\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> = (\n input: z.infer<EvalDataPoint>,\n evaluatorOptions?: z.infer<CustomOptions>\n) => Promise<EvalResponse>;\n\nexport type EvaluatorAction<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> = Action<typeof EvalRequestSchema, typeof EvalResponsesSchema> & {\n __dataPointType?: DataPoint;\n __configSchema?: CustomOptions;\n};\n\nfunction withMetadata<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n evaluator: Action<typeof EvalRequestSchema, typeof EvalResponsesSchema>,\n dataPointType?: DataPoint,\n configSchema?: CustomOptions\n): EvaluatorAction<DataPoint, CustomOptions> {\n const withMeta = evaluator as EvaluatorAction<DataPoint, CustomOptions>;\n withMeta.__dataPointType = dataPointType;\n withMeta.__configSchema = configSchema;\n return withMeta;\n}\n\nconst EvalRequestSchema = z.object({\n dataset: z.array(BaseDataPointSchema),\n evalRunId: z.string(),\n options: z.unknown(),\n});\n\nexport interface EvaluatorParams<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> {\n evaluator: EvaluatorArgument<DataPoint, CustomOptions>;\n dataset: Dataset<DataPoint>;\n evalRunId?: string;\n options?: z.infer<CustomOptions>;\n}\n\n/**\n * Creates evaluator action for the provided {@link EvaluatorFn} implementation.\n */\nexport function defineEvaluator<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n EvalDataPoint extends\n typeof BaseEvalDataPointSchema = typeof BaseEvalDataPointSchema,\n EvaluatorOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n registry: Registry,\n options: {\n name: string;\n displayName: string;\n definition: string;\n dataPointType?: DataPoint;\n configSchema?: EvaluatorOptions;\n isBilled?: boolean;\n },\n runner: EvaluatorFn<EvalDataPoint, EvaluatorOptions>\n) {\n const metadata = {};\n metadata[EVALUATOR_METADATA_KEY_IS_BILLED] =\n options.isBilled == undefined ? true : options.isBilled;\n metadata[EVALUATOR_METADATA_KEY_DISPLAY_NAME] = options.displayName;\n metadata[EVALUATOR_METADATA_KEY_DEFINITION] = options.definition;\n const evaluator = defineAction(\n registry,\n {\n actionType: 'evaluator',\n name: options.name,\n inputSchema: EvalRequestSchema.extend({\n dataset: options.dataPointType\n ? z.array(options.dataPointType)\n : z.array(BaseDataPointSchema),\n options: options.configSchema ?? z.unknown(),\n evalRunId: z.string(),\n }),\n outputSchema: EvalResponsesSchema,\n metadata: metadata,\n },\n async (i) => {\n let evalResponses: EvalResponses = [];\n for (let index = 0; index < i.dataset.length; index++) {\n const datapoint: BaseEvalDataPoint = {\n ...i.dataset[index],\n testCaseId: i.dataset[index].testCaseId ?? randomUUID(),\n };\n try {\n await runInNewSpan(\n registry,\n {\n metadata: {\n name: `Test Case ${datapoint.testCaseId}`,\n metadata: { 'evaluator:evalRunId': i.evalRunId },\n },\n labels: {\n [SPAN_TYPE_ATTR]: 'evaluator',\n },\n },\n async (metadata, otSpan) => {\n const spanId = otSpan.spanContext().spanId;\n const traceId = otSpan.spanContext().traceId;\n try {\n metadata.input = {\n input: datapoint.input,\n output: datapoint.output,\n context: datapoint.context,\n };\n const testCaseOutput = await runner(datapoint, i.options);\n testCaseOutput.sampleIndex = index;\n testCaseOutput.spanId = spanId;\n testCaseOutput.traceId = traceId;\n metadata.output = testCaseOutput;\n evalResponses.push(testCaseOutput);\n return testCaseOutput;\n } catch (e) {\n evalResponses.push({\n sampleIndex: index,\n spanId,\n traceId,\n testCaseId: datapoint.testCaseId,\n evaluation: {\n error: `Evaluation of test case ${datapoint.testCaseId} failed: \\n${(e as Error).stack}`,\n },\n });\n throw e;\n }\n }\n );\n } catch (e) {\n logger.error(\n `Evaluation of test case ${datapoint.testCaseId} failed: \\n${(e as Error).stack}`\n );\n continue;\n }\n }\n return evalResponses;\n }\n );\n const ewm = withMetadata(\n evaluator as any as Action<\n typeof EvalRequestSchema,\n typeof EvalResponsesSchema\n >,\n options.dataPointType,\n options.configSchema\n );\n return ewm;\n}\n\nexport type EvaluatorArgument<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> =\n | string\n | EvaluatorAction<DataPoint, CustomOptions>\n | EvaluatorReference<CustomOptions>;\n\n/**\n * A veneer for interacting with evaluators.\n */\nexport async function evaluate<\n DataPoint extends typeof BaseDataPointSchema = typeof BaseDataPointSchema,\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n registry: Registry,\n params: EvaluatorParams<DataPoint, CustomOptions>\n): Promise<EvalResponses> {\n let evaluator: EvaluatorAction<DataPoint, CustomOptions>;\n if (typeof params.evaluator === 'string') {\n evaluator = await registry.lookupAction(`/evaluator/${params.evaluator}`);\n } else if (Object.hasOwnProperty.call(params.evaluator, 'info')) {\n evaluator = await registry.lookupAction(\n `/evaluator/${params.evaluator.name}`\n );\n } else {\n evaluator = params.evaluator as EvaluatorAction<DataPoint, CustomOptions>;\n }\n if (!evaluator) {\n throw new Error('Unable to utilize the provided evaluator');\n }\n return (await evaluator({\n dataset: params.dataset,\n options: params.options,\n evalRunId: params.evalRunId ?? randomUUID(),\n })) as EvalResponses;\n}\n\nexport const EvaluatorInfoSchema = z.object({\n /** Friendly label for this evaluator */\n label: z.string().optional(),\n metrics: z.array(z.string()),\n});\nexport type EvaluatorInfo = z.infer<typeof EvaluatorInfoSchema>;\n\nexport interface EvaluatorReference<CustomOptions extends z.ZodTypeAny> {\n name: string;\n configSchema?: CustomOptions;\n info?: EvaluatorInfo;\n}\n\n/**\n * Helper method to configure a {@link EvaluatorReference} to a plugin.\n */\nexport function evaluatorRef<\n CustomOptionsSchema extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n options: EvaluatorReference<CustomOptionsSchema>\n): EvaluatorReference<CustomOptionsSchema> {\n return { ...options };\n}\n"],"mappings":"AAgBA,SAAiB,cAAc,SAAS;AACxC,SAAS,cAAc;AAEvB,SAAS,gBAAgB,oBAAoB;AAC7C,SAAS,kBAAkB;AAEpB,MAAM,cAAc;AACpB,MAAM,kBAAkB,cAAc;AAEtC,MAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,OAAO,EAAE,QAAQ;AAAA,EACjB,QAAQ,EAAE,QAAQ,EAAE,SAAS;AAAA,EAC7B,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,SAAS;AAAA,EACvC,WAAW,EAAE,QAAQ,EAAE,SAAS;AAAA,EAChC,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS;AACzC,CAAC;AAGM,MAAM,0BAA0B,oBAAoB,OAAO;AAAA,EAChE,YAAY,EAAE,OAAO;AACvB,CAAC;AAGM,MAAM,cAAc,EAAE,OAAO;AAAA,EAClC,IAAI,EACD,OAAO,EACP;AAAA,IACC;AAAA,EACF,EACC,SAAS;AAAA,EACZ,OAAO,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS;AAAA;AAAA,EAE/D,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,SAAS,EACN,OAAO;AAAA,IACN,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EACjC,CAAC,EACA,YAAY,EACZ,SAAS;AACd,CAAC;AAGM,MAAM,sCAAsC;AAC5C,MAAM,oCAAoC;AAC1C,MAAM,mCAAmC;AAQzC,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,aAAa,EAAE,OAAO,EAAE,SAAS;AAAA,EACjC,YAAY,EAAE,OAAO;AAAA,EACrB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,QAAQ,EAAE,OAAO,EAAE,SAAS;AAAA,EAC5B,YAAY,EAAE,MAAM,CAAC,aAAa,EAAE,MAAM,WAAW,CAAC,CAAC;AACzD,CAAC;AAGM,MAAM,sBAAsB,EAAE,MAAM,kBAAkB;AAoB7D,SAAS,aAIP,WACA,eACA,cAC2C;AAC3C,QAAM,WAAW;AACjB,WAAS,kBAAkB;AAC3B,WAAS,iBAAiB;AAC1B,SAAO;AACT;AAEA,MAAM,oBAAoB,EAAE,OAAO;AAAA,EACjC,SAAS,EAAE,MAAM,mBAAmB;AAAA,EACpC,WAAW,EAAE,OAAO;AAAA,EACpB,SAAS,EAAE,QAAQ;AACrB,CAAC;AAeM,SAAS,gBAMd,UACA,SAQA,QACA;AACA,QAAM,WAAW,CAAC;AAClB,WAAS,gCAAgC,IACvC,QAAQ,YAAY,SAAY,OAAO,QAAQ;AACjD,WAAS,mCAAmC,IAAI,QAAQ;AACxD,WAAS,iCAAiC,IAAI,QAAQ;AACtD,QAAM,YAAY;AAAA,IAChB;AAAA,IACA;AAAA,MACE,YAAY;AAAA,MACZ,MAAM,QAAQ;AAAA,MACd,aAAa,kBAAkB,OAAO;AAAA,QACpC,SAAS,QAAQ,gBACb,EAAE,MAAM,QAAQ,aAAa,IAC7B,EAAE,MAAM,mBAAmB;AAAA,QAC/B,SAAS,QAAQ,gBAAgB,EAAE,QAAQ;AAAA,QAC3C,WAAW,EAAE,OAAO;AAAA,MACtB,CAAC;AAAA,MACD,cAAc;AAAA,MACd;AAAA,IACF;AAAA,IACA,OAAO,MAAM;AACX,UAAI,gBAA+B,CAAC;AACpC,eAAS,QAAQ,GAAG,QAAQ,EAAE,QAAQ,QAAQ,SAAS;AACrD,cAAM,YAA+B;AAAA,UACnC,GAAG,EAAE,QAAQ,KAAK;AAAA,UAClB,YAAY,EAAE,QAAQ,KAAK,EAAE,cAAc,WAAW;AAAA,QACxD;AACA,YAAI;AACF,gBAAM;AAAA,YACJ;AAAA,YACA;AAAA,cACE,UAAU;AAAA,gBACR,MAAM,aAAa,UAAU,UAAU;AAAA,gBACvC,UAAU,EAAE,uBAAuB,EAAE,UAAU;AAAA,cACjD;AAAA,cACA,QAAQ;AAAA,gBACN,CAAC,cAAc,GAAG;AAAA,cACpB;AAAA,YACF;AAAA,YACA,OAAOA,WAAU,WAAW;AAC1B,oBAAM,SAAS,OAAO,YAAY,EAAE;AACpC,oBAAM,UAAU,OAAO,YAAY,EAAE;AACrC,kBAAI;AACF,gBAAAA,UAAS,QAAQ;AAAA,kBACf,OAAO,UAAU;AAAA,kBACjB,QAAQ,UAAU;AAAA,kBAClB,SAAS,UAAU;AAAA,gBACrB;AACA,sBAAM,iBAAiB,MAAM,OAAO,WAAW,EAAE,OAAO;AACxD,+BAAe,cAAc;AAC7B,+BAAe,SAAS;AACxB,+BAAe,UAAU;AACzB,gBAAAA,UAAS,SAAS;AAClB,8BAAc,KAAK,cAAc;AACjC,uBAAO;AAAA,cACT,SAAS,GAAG;AACV,8BAAc,KAAK;AAAA,kBACjB,aAAa;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,YAAY,UAAU;AAAA,kBACtB,YAAY;AAAA,oBACV,OAAO,2BAA2B,UAAU,UAAU;AAAA,EAAe,EAAY,KAAK;AAAA,kBACxF;AAAA,gBACF,CAAC;AACD,sBAAM;AAAA,cACR;AAAA,YACF;AAAA,UACF;AAAA,QACF,SAAS,GAAG;AACV,iBAAO;AAAA,YACL,2BAA2B,UAAU,UAAU;AAAA,EAAe,EAAY,KAAK;AAAA,UACjF;AACA;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAAA,EACF;AACA,QAAM,MAAM;AAAA,IACV;AAAA,IAIA,QAAQ;AAAA,IACR,QAAQ;AAAA,EACV;AACA,SAAO;AACT;AAaA,eAAsB,SAIpB,UACA,QACwB;AACxB,MAAI;AACJ,MAAI,OAAO,OAAO,cAAc,UAAU;AACxC,gBAAY,MAAM,SAAS,aAAa,cAAc,OAAO,SAAS,EAAE;AAAA,EAC1E,WAAW,OAAO,eAAe,KAAK,OAAO,WAAW,MAAM,GAAG;AAC/D,gBAAY,MAAM,SAAS;AAAA,MACzB,cAAc,OAAO,UAAU,IAAI;AAAA,IACrC;AAAA,EACF,OAAO;AACL,gBAAY,OAAO;AAAA,EACrB;AACA,MAAI,CAAC,WAAW;AACd,UAAM,IAAI,MAAM,0CAA0C;AAAA,EAC5D;AACA,SAAQ,MAAM,UAAU;AAAA,IACtB,SAAS,OAAO;AAAA,IAChB,SAAS,OAAO;AAAA,IAChB,WAAW,OAAO,aAAa,WAAW;AAAA,EAC5C,CAAC;AACH;AAEO,MAAM,sBAAsB,EAAE,OAAO;AAAA;AAAA,EAE1C,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC;AAC7B,CAAC;AAYM,SAAS,aAGd,SACyC;AACzC,SAAO,EAAE,GAAG,QAAQ;AACtB;","names":["metadata"]}