UNPKG

@genkit-ai/vertexai

Version:

Genkit AI framework plugin for Google Cloud Vertex AI APIs including Gemini APIs, Imagen, and more.

346 lines 9.94 kB
import { z } from "genkit"; import { EvaluatorFactory } from "./evaluator_factory.js"; var VertexAIEvaluationMetricType = /* @__PURE__ */ ((VertexAIEvaluationMetricType2) => { VertexAIEvaluationMetricType2["BLEU"] = "BLEU"; VertexAIEvaluationMetricType2["ROUGE"] = "ROUGE"; VertexAIEvaluationMetricType2["FLUENCY"] = "FLEUNCY"; VertexAIEvaluationMetricType2["SAFETY"] = "SAFETY"; VertexAIEvaluationMetricType2["GROUNDEDNESS"] = "GROUNDEDNESS"; VertexAIEvaluationMetricType2["SUMMARIZATION_QUALITY"] = "SUMMARIZATION_QUALITY"; VertexAIEvaluationMetricType2["SUMMARIZATION_HELPFULNESS"] = "SUMMARIZATION_HELPFULNESS"; VertexAIEvaluationMetricType2["SUMMARIZATION_VERBOSITY"] = "SUMMARIZATION_VERBOSITY"; return VertexAIEvaluationMetricType2; })(VertexAIEvaluationMetricType || {}); function stringify(input) { return typeof input === "string" ? input : JSON.stringify(input); } function vertexEvaluators(ai, auth, metrics, projectId, location) { const factory = new EvaluatorFactory(auth, location, projectId); return metrics.map((metric) => { const metricType = isConfig(metric) ? metric.type : metric; const metricSpec = isConfig(metric) ? metric.metricSpec : {}; switch (metricType) { case "BLEU" /* BLEU */: { return createBleuEvaluator(ai, factory, metricSpec); } case "ROUGE" /* ROUGE */: { return createRougeEvaluator(ai, factory, metricSpec); } case "FLEUNCY" /* FLUENCY */: { return createFluencyEvaluator(ai, factory, metricSpec); } case "SAFETY" /* SAFETY */: { return createSafetyEvaluator(ai, factory, metricSpec); } case "GROUNDEDNESS" /* GROUNDEDNESS */: { return createGroundednessEvaluator(ai, factory, metricSpec); } case "SUMMARIZATION_QUALITY" /* SUMMARIZATION_QUALITY */: { return createSummarizationQualityEvaluator(ai, factory, metricSpec); } case "SUMMARIZATION_HELPFULNESS" /* SUMMARIZATION_HELPFULNESS */: { return createSummarizationHelpfulnessEvaluator(ai, factory, metricSpec); } case "SUMMARIZATION_VERBOSITY" /* SUMMARIZATION_VERBOSITY */: { return createSummarizationVerbosityEvaluator(ai, factory, metricSpec); } } }); } function isConfig(config) { return config.type !== void 0; } const BleuResponseSchema = z.object({ bleuResults: z.object({ bleuMetricValues: z.array(z.object({ score: z.number() })) }) }); function createBleuEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "BLEU" /* BLEU */, displayName: "BLEU", definition: "Computes the BLEU score by comparing the output against the ground truth", responseSchema: BleuResponseSchema }, (datapoint) => { return { bleuInput: { metricSpec, instances: [ { prediction: stringify(datapoint.output), reference: datapoint.reference } ] } }; }, (response) => { return { score: response.bleuResults.bleuMetricValues[0].score }; } ); } const RougeResponseSchema = z.object({ rougeResults: z.object({ rougeMetricValues: z.array(z.object({ score: z.number() })) }) }); function createRougeEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "ROUGE" /* ROUGE */, displayName: "ROUGE", definition: "Computes the ROUGE score by comparing the output against the ground truth", responseSchema: RougeResponseSchema }, (datapoint) => { return { rougeInput: { metricSpec, instances: { prediction: stringify(datapoint.output), reference: datapoint.reference } } }; }, (response) => { return { score: response.rougeResults.rougeMetricValues[0].score }; } ); } const FluencyResponseSchema = z.object({ fluencyResult: z.object({ score: z.number(), explanation: z.string(), confidence: z.number() }) }); function createFluencyEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "FLEUNCY" /* FLUENCY */, displayName: "Fluency", definition: "Assesses the language mastery of an output", responseSchema: FluencyResponseSchema }, (datapoint) => { return { fluencyInput: { metricSpec, instance: { prediction: stringify(datapoint.output) } } }; }, (response) => { return { score: response.fluencyResult.score, details: { reasoning: response.fluencyResult.explanation } }; } ); } const SafetyResponseSchema = z.object({ safetyResult: z.object({ score: z.number(), explanation: z.string(), confidence: z.number() }) }); function createSafetyEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "SAFETY" /* SAFETY */, displayName: "Safety", definition: "Assesses the level of safety of an output", responseSchema: SafetyResponseSchema }, (datapoint) => { return { safetyInput: { metricSpec, instance: { prediction: stringify(datapoint.output) } } }; }, (response) => { return { score: response.safetyResult.score, details: { reasoning: response.safetyResult.explanation } }; } ); } const GroundednessResponseSchema = z.object({ groundednessResult: z.object({ score: z.number(), explanation: z.string(), confidence: z.number() }) }); function createGroundednessEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "GROUNDEDNESS" /* GROUNDEDNESS */, displayName: "Groundedness", definition: "Assesses the ability to provide or reference information included only in the context", responseSchema: GroundednessResponseSchema }, (datapoint) => { return { groundednessInput: { metricSpec, instance: { prediction: stringify(datapoint.output), context: datapoint.context?.join(". ") } } }; }, (response) => { return { score: response.groundednessResult.score, details: { reasoning: response.groundednessResult.explanation } }; } ); } const SummarizationQualityResponseSchema = z.object({ summarizationQualityResult: z.object({ score: z.number(), explanation: z.string(), confidence: z.number() }) }); function createSummarizationQualityEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "SUMMARIZATION_QUALITY" /* SUMMARIZATION_QUALITY */, displayName: "Summarization quality", definition: "Assesses the overall ability to summarize text", responseSchema: SummarizationQualityResponseSchema }, (datapoint) => { return { summarizationQualityInput: { metricSpec, instance: { prediction: stringify(datapoint.output), instruction: stringify(datapoint.input), context: datapoint.context?.join(". ") } } }; }, (response) => { return { score: response.summarizationQualityResult.score, details: { reasoning: response.summarizationQualityResult.explanation } }; } ); } const SummarizationHelpfulnessResponseSchema = z.object({ summarizationHelpfulnessResult: z.object({ score: z.number(), explanation: z.string(), confidence: z.number() }) }); function createSummarizationHelpfulnessEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "SUMMARIZATION_HELPFULNESS" /* SUMMARIZATION_HELPFULNESS */, displayName: "Summarization helpfulness", definition: "Assesses the ability to provide a summarization, which contains the details necessary to substitute the original text", responseSchema: SummarizationHelpfulnessResponseSchema }, (datapoint) => { return { summarizationHelpfulnessInput: { metricSpec, instance: { prediction: stringify(datapoint.output), instruction: stringify(datapoint.input), context: datapoint.context?.join(". ") } } }; }, (response) => { return { score: response.summarizationHelpfulnessResult.score, details: { reasoning: response.summarizationHelpfulnessResult.explanation } }; } ); } const SummarizationVerbositySchema = z.object({ summarizationVerbosityResult: z.object({ score: z.number(), explanation: z.string(), confidence: z.number() }) }); function createSummarizationVerbosityEvaluator(ai, factory, metricSpec) { return factory.create( ai, { metric: "SUMMARIZATION_VERBOSITY" /* SUMMARIZATION_VERBOSITY */, displayName: "Summarization verbosity", definition: "Aassess the ability to provide a succinct summarization", responseSchema: SummarizationVerbositySchema }, (datapoint) => { return { summarizationVerbosityInput: { metricSpec, instance: { prediction: stringify(datapoint.output), instruction: stringify(datapoint.input), context: datapoint.context?.join(". ") } } }; }, (response) => { return { score: response.summarizationVerbosityResult.score, details: { reasoning: response.summarizationVerbosityResult.explanation } }; } ); } export { VertexAIEvaluationMetricType, vertexEvaluators }; //# sourceMappingURL=evaluation.mjs.map