UNPKG

mcp-evals

Version:

GitHub Action for evaluating MCP server tool calls using LLM-based scoring

96 lines 3.4 kB
import * as yaml from 'js-yaml'; import * as fs from 'fs'; import * as path from 'path'; import { openai } from "@ai-sdk/openai"; import { anthropic } from "@ai-sdk/anthropic"; import { grade } from './index.js'; /** * Load and parse a YAML configuration file */ export function loadYamlConfig(filePath) { try { const absolutePath = path.resolve(filePath); const fileContents = fs.readFileSync(absolutePath, 'utf8'); const config = yaml.load(fileContents); if (!config || !config.evals || !Array.isArray(config.evals)) { throw new Error('Invalid YAML config: must have an "evals" array'); } // Validate each eval for (const evalItem of config.evals) { if (!evalItem.name || !evalItem.description || !evalItem.prompt) { throw new Error(`Invalid eval: each eval must have "name", "description", and "prompt" fields`); } } return config; } catch (error) { if (error instanceof Error) { throw new Error(`Failed to load YAML config: ${error.message}`); } throw new Error('Failed to load YAML config: Unknown error'); } } /** * Convert a YAML configuration to an EvalConfig */ export function yamlConfigToEvalConfig(yamlConfig, serverPath) { // Setup model let model; if (yamlConfig.model) { if (yamlConfig.model.provider === 'openai') { // Set the API key as an environment variable if provided if (yamlConfig.model.api_key) { process.env.OPENAI_API_KEY = yamlConfig.model.api_key; } model = openai(yamlConfig.model.name); } else if (yamlConfig.model.provider === 'anthropic') { // Set the API key as an environment variable if provided if (yamlConfig.model.api_key) { process.env.ANTHROPIC_API_KEY = yamlConfig.model.api_key; } model = anthropic(yamlConfig.model.name); } else { throw new Error(`Unsupported model provider: ${yamlConfig.model.provider}`); } } else { // Default to GPT-4 model = openai("gpt-4o"); } // Convert YAML evals to EvalFunctions const evalFunctions = yamlConfig.evals.map((yamlEval) => ({ name: yamlEval.name, description: yamlEval.description, run: async (evalModel) => { try { const result = await grade(evalModel, yamlEval.prompt, serverPath); return JSON.parse(result); } catch (error) { // If JSON parsing fails, return a default structure return { accuracy: 0, completeness: 0, relevance: 0, clarity: 0, reasoning: 0, overall_comments: `Error running evaluation: ${error instanceof Error ? error.message : String(error)}` }; } } })); return { model, evals: evalFunctions }; } /** * Load a YAML config file and convert it to an EvalConfig */ export function loadYamlEvalConfig(filePath, serverPath) { const yamlConfig = loadYamlConfig(filePath); return yamlConfigToEvalConfig(yamlConfig, serverPath); } //# sourceMappingURL=yaml-loader.js.map