UNPKG

@executeautomation/playwright-mcp-server

Version:
42 lines (41 loc) 1.94 kB
//evals.ts import { openai } from "@ai-sdk/openai"; import { grade } from "mcp-evals"; const startCodegenSessionEval = { name: 'startCodegenSession Evaluation', description: 'Evaluates the start codegen session tool', run: async () => { const result = await grade(openai("gpt-4"), "Please start a new code generation session with an output path of /my/test/path, a testNamePrefix of MyPrefix, and comments enabled. Confirm the session was created successfully."); return JSON.parse(result); } }; const end_codegen_sessionEval = { name: 'end_codegen_session Evaluation', description: 'Evaluates the end_codegen_session tool functionality', run: async () => { const result = await grade(openai("gpt-4"), "Please end the code generation session with ID session123 and generate the Playwright test code"); return JSON.parse(result); } }; const get_codegen_sessionEval = { name: 'get_codegen_session Tool Evaluation', description: 'Evaluates the retrieval of code generation session details', run: async () => { const result = await grade(openai("gpt-4"), "Please retrieve the code generation session details using session ID abc123."); return JSON.parse(result); } }; const clearCodegenSessionEval = { name: 'clear_codegen_session Evaluation', description: 'Evaluates the functionality of clearing a code generation session', run: async () => { const result = await grade(openai("gpt-4"), "Please clear the code generation session with the ID testSession_123 to verify removal."); return JSON.parse(result); } }; const config = { model: openai("gpt-4"), evals: [startCodegenSessionEval, end_codegen_sessionEval, get_codegen_sessionEval, clearCodegenSessionEval] }; export default config; export const evals = [startCodegenSessionEval, end_codegen_sessionEval, get_codegen_sessionEval, clearCodegenSessionEval];