UNPKG

@vantasdk/vanta-mcp-server

Version:

Model Context Protocol server for Vanta's security compliance platform

204 lines (203 loc) โ€ข 8.06 kB
import OpenAI from "openai"; import { zodToJsonSchema } from "zod-to-json-schema"; import { GetTestsTool, GetTestEntitiesTool } from "../operations/tests.js"; import { GetFrameworksTool, GetFrameworkControlsTool, } from "../operations/frameworks.js"; import { GetControlsTool, GetControlTestsTool, } from "../operations/controls.js"; // Format all tools for OpenAI const tools = [ { type: "function", function: { name: GetTestsTool.name, description: GetTestsTool.description, parameters: zodToJsonSchema(GetTestsTool.parameters), }, }, { type: "function", function: { name: GetTestEntitiesTool.name, description: GetTestEntitiesTool.description, parameters: zodToJsonSchema(GetTestEntitiesTool.parameters), }, }, { type: "function", function: { name: GetFrameworksTool.name, description: GetFrameworksTool.description, parameters: zodToJsonSchema(GetFrameworksTool.parameters), }, }, { type: "function", function: { name: GetFrameworkControlsTool.name, description: GetFrameworkControlsTool.description, parameters: zodToJsonSchema(GetFrameworkControlsTool.parameters), }, }, { type: "function", function: { name: GetControlsTool.name, description: GetControlsTool.description, parameters: zodToJsonSchema(GetControlsTool.parameters), }, }, { type: "function", function: { name: GetControlTestsTool.name, description: GetControlTestsTool.description, parameters: zodToJsonSchema(GetControlTestsTool.parameters), }, }, ]; const testCases = [ { prompt: "What security issues do I have in my AWS infrastructure?", expectedTool: "get_tests", expectedParams: { statusFilter: "NEEDS_ATTENTION", integrationFilter: "aws", }, description: "Should call get_tests with AWS filter and NEEDS_ATTENTION status", }, { prompt: "Show me all my SOC2 compliance tests that are failing", expectedTool: "get_tests", expectedParams: { frameworkFilter: "soc2", statusFilter: "NEEDS_ATTENTION", }, description: "Should call get_tests with SOC2 framework filter", }, { prompt: "Show me the specific failing entities for test ID aws-security-groups-open-to-world", expectedTool: "get_test_entities", expectedParams: { testId: "aws-security-groups-open-to-world" }, description: "Should call get_test_entities for specific test details", }, { prompt: "What compliance frameworks are we tracking?", expectedTool: "get_frameworks", expectedParams: {}, description: "Should call get_frameworks to list available frameworks", }, { prompt: "Get the control requirements for framework ID soc2", expectedTool: "get_framework_controls", expectedParams: { frameworkId: "soc2" }, description: "Should call get_framework_controls for SOC2", }, { prompt: "What is the current % status of my SOC 2?", expectedTool: "get_frameworks", expectedParams: {}, description: "Should call get_frameworks to get SOC2 completion percentage", }, { prompt: "List all security controls in my Vanta account", expectedTool: "get_controls", expectedParams: {}, description: "Should call get_controls to list all available controls", }, { prompt: "Show me the tests for control ID access-control-1", expectedTool: "get_control_tests", expectedParams: { controlId: "access-control-1" }, description: "Should call get_control_tests for specific control", }, { prompt: "What programming tests should I write for my API?", expectedTool: "none", expectedParams: {}, description: "Should NOT call any Vanta tools - this is about code testing, not compliance", }, { prompt: "Help me debug this JavaScript function", expectedTool: "none", expectedParams: {}, description: "Should NOT call any Vanta tools - this is about code debugging", }, ]; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); console.log("๐Ÿงช Vanta MCP Server Tool Evaluation"); console.log("====================================\n"); let passedTests = 0; let totalTests = testCases.length; for (const testCase of testCases) { console.log(`๐Ÿ“ Test: ${testCase.description}`); console.log(`๐Ÿ’ฌ Prompt: "${testCase.prompt}"`); console.log(`๐ŸŽฏ Expected Tool: ${testCase.expectedTool}`); try { const res = await openai.chat.completions.create({ model: "gpt-4o", messages: [{ role: "user", content: testCase.prompt }], tools: tools, tool_choice: "auto", }); const toolCalls = res.choices[0]?.message?.tool_calls; if (testCase.expectedTool === "none") { if (!toolCalls || toolCalls.length === 0) { console.log("โœ… PASS: Correctly did not call any tools"); passedTests++; } else { console.log(`โŒ FAIL: Should not have called tools, but called: ${toolCalls.map(tc => tc.function.name).join(", ")}`); } } else { if (toolCalls && toolCalls.length > 0) { const calledTool = toolCalls[0].function.name; const calledParams = JSON.parse(toolCalls[0].function.arguments); if (calledTool === testCase.expectedTool) { console.log(`โœ… PASS: Correctly called ${calledTool}`); // Check specific parameters if provided if (testCase.expectedParams && Object.keys(testCase.expectedParams).length > 0) { let paramsMatch = true; for (const [key, value] of Object.entries(testCase.expectedParams)) { if (calledParams[key] !== value) { paramsMatch = false; break; } } if (paramsMatch) { console.log("โœ… Parameters match expected values"); } else { console.log(`โš ๏ธ Parameters don't fully match. Expected: ${JSON.stringify(testCase.expectedParams)}, Got: ${JSON.stringify(calledParams)}`); } } console.log(`๐Ÿ“‹ Called with: ${JSON.stringify(calledParams, null, 2)}`); passedTests++; } else { console.log(`โŒ FAIL: Expected ${testCase.expectedTool}, but called ${calledTool}`); } } else { console.log(`โŒ FAIL: Expected to call ${testCase.expectedTool}, but no tools were called`); } } } catch (error) { console.log(`โŒ ERROR: ${String(error)}`); } console.log(""); // Empty line for spacing } console.log("๐Ÿ“Š Final Results"); console.log("================"); console.log(`โœ… Passed: ${passedTests.toString()}/${totalTests.toString()} tests`); console.log(`โŒ Failed: ${(totalTests - passedTests).toString()}/${totalTests.toString()} tests`); console.log(`๐Ÿ“ˆ Success Rate: ${Math.round((passedTests / totalTests) * 100).toString()}%`); if (passedTests === totalTests) { console.log("๐ŸŽ‰ All tests passed! Tool calling behavior is working correctly."); } else { console.log("โš ๏ธ Some tests failed. Review the tool descriptions or test cases."); }