UNPKG

@runhuman/mcp-server

Version:

Model Context Protocol (MCP) server for RunHuman - Human-powered QA testing for AI agents

yueranyuan/qa-experiment

414 lines (385 loc) • 16.1 kB

JavaScript

#!/usr/bin/env node /** * RunHuman MCP Server * * This MCP server provides tools for AI agents to interact with the RunHuman QA testing service. * It allows agents to: * - Create QA jobs * - Check job status * - Retrieve job results * * @see https://modelcontextprotocol.io/ */ import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ListToolsRequestSchema, ListPromptsRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; import * as dotenv from 'dotenv'; // Load environment variables (optional - for standalone testing) dotenv.config(); // Configuration // Priority: CLI args > env vars > defaults // Usage: node dist/index.js --api-key=qa_live_xxx [--api-url=https://...] const args = process.argv.slice(2); const apiKeyArg = args.find(arg => arg.startsWith('--api-key='))?.split('=')[1]; const apiUrlArg = args.find(arg => arg.startsWith('--api-url='))?.split('=')[1]; const API_URL = apiUrlArg || process.env.RUNHUMAN_API_URL || 'https://qa-experiment.fly.dev'; const API_KEY = apiKeyArg || process.env.RUNHUMAN_API_KEY; if (!API_KEY) { console.error('❌ Error: API key is required'); console.error(''); console.error('For Claude Desktop, add to your config:'); console.error('{'); console.error(' "mcpServers": {'); console.error(' "runhuman": {'); console.error(' "command": "npx",'); console.error(' "args": ["-y", "@runhuman/mcp-server", "--api-key=qa_live_xxxxx"]'); console.error(' }'); console.error(' }'); console.error('}'); console.error(''); console.error('Get your API key at: https://qa-experiment.fly.dev/app.html'); process.exit(1); } console.error(`🔗 Connected to RunHuman API at: ${API_URL}`); console.error(`🔑 Using API key: ${API_KEY.substring(0, 12)}...`); /** * Create and configure the MCP server */ const server = new Server({ name: 'runhuman-mcp-server', version: '1.0.0', }, { capabilities: { tools: {}, prompts: {}, }, }); /** * List available prompts (documentation for the agent) */ server.setRequestHandler(ListPromptsRequestSchema, async () => { return { prompts: [ { name: 'explain_runhuman', description: 'Get an explanation of how to use RunHuman for QA testing', }, ], }; }); /** * List available tools */ server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools: [ { name: 'create_job', description: `⚠️ IMPORTANT: This ONLY creates and queues a job. It does NOT perform the test or return results. You MUST follow up with get_job_status and get_job_result. Creates a QA job that will be performed by a REAL HUMAN tester (not AI). The human will manually test your application, describe findings in natural language, and GPT-4o will extract structured data from their response. Use this when you need human verification of: - UI/UX functionality that's hard to automate - Visual issues, accessibility problems - Complex user flows (login, checkout, forms) - Cross-browser compatibility - Real user experience feedback ⚠️ REQUIRED WORKFLOW (do NOT skip steps): 1. create_job → Returns jobId (job is now QUEUED, not complete!) 2. get_job_status → Poll every 30-60 seconds until status="completed" (takes 2-10 min) 3. get_job_result → Retrieve the actual test results DO NOT treat job creation as completion. You MUST wait for and retrieve results.`, inputSchema: { type: 'object', properties: { url: { type: 'string', description: 'The URL to test (must be publicly accessible). Example: "https://myapp.com/checkout"', }, description: { type: 'string', description: 'Clear instructions for the human tester. Be specific about what to test and how. Example: "Test the checkout flow: Add a product to cart, proceed to checkout, fill in shipping info, and verify the order summary shows correct totals before submitting."', }, schema: { type: 'object', description: 'JSON Schema defining the structure you want extracted from the tester\'s response. Example: { "type": "object", "properties": { "checkoutWorks": { "type": "boolean" }, "totalIsCorrect": { "type": "boolean" }, "issues": { "type": "array", "items": { "type": "string" } } } }', }, }, required: ['url', 'description', 'schema'], }, }, { name: 'get_job_status', description: `Check the current status of a QA job. Jobs progress through states: pending → claimed → in_progress → completed (or failed/timeout). Use this to poll for completion before fetching results. Typical job completion time is 2-10 minutes depending on test complexity. Returns: { status: "pending" | "claimed" | "in_progress" | "completed" | "failed" | "timeout", message: "..." }`, inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'The job ID returned from create_job. Example: "550e8400-e29b-41d4-a716-446655440000"', }, }, required: ['jobId'], }, }, { name: 'get_job_result', description: `Get the structured results of a completed QA job. Only call this after get_job_status shows status="completed". Returns the tester's response extracted into your specified schema, plus metadata about timing and the raw tester response. If the job isn't complete yet, returns an error. If extraction failed, includes the raw response so you can see what the tester said.`, inputSchema: { type: 'object', properties: { jobId: { type: 'string', description: 'The job ID of a completed job. Example: "550e8400-e29b-41d4-a716-446655440000"', }, }, required: ['jobId'], }, }, ], }; }); /** * Handle tool calls */ server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; if (!args) { throw new Error('Missing arguments'); } switch (name) { case 'create_job': try { // Call RunHuman API to create job const response = await fetch(`${API_URL}/api/jobs`, { method: 'POST', headers: { 'Authorization': `Bearer ${API_KEY}`, 'Content-Type': 'application/json' }, body: JSON.stringify({ url: args.url, description: args.description, outputSchema: args.schema }) }); if (!response.ok) { const error = await response.json().catch(() => ({ error: response.statusText })); return { content: [{ type: 'text', text: `❌ Failed to create job Error: ${error.error || error.message || response.statusText} Status: ${response.status} Please check: - Your RUNHUMAN_API_KEY is valid - The API server is running at ${API_URL} - Your API key has permission to create jobs` }], isError: true }; } const data = await response.json(); return { content: [ { type: 'text', text: `⏳ Job QUEUED (not complete yet!) Job ID: ${data.jobId} Status: ${data.status} URL: ${args.url} The test has been queued for a HUMAN tester. It is NOT complete yet. ⚠️ REQUIRED NEXT STEPS: 1. Wait 30-60 seconds 2. Call get_job_status(jobId: "${data.jobId}") to check if complete 3. Repeat step 2 until status="completed" (typical time: 2-10 minutes) 4. Call get_job_result(jobId: "${data.jobId}") to get the actual test results DO NOT consider this task done until you have retrieved the results!`, }, ], }; } catch (error) { return { content: [{ type: 'text', text: `❌ Error creating job ${error instanceof Error ? error.message : 'Unknown error'} Please check: - The API server is running at ${API_URL} - Your network connection - Your .env file configuration` }], isError: true }; } case 'get_job_status': try { const response = await fetch(`${API_URL}/api/job/${args.jobId}`, { headers: { 'Authorization': `Bearer ${API_KEY}` } }); if (!response.ok) { if (response.status === 404) { return { content: [{ type: 'text', text: `❌ Job not found Job ID: ${args.jobId} The job does not exist or you don't have permission to access it.` }], isError: true }; } return { content: [{ type: 'text', text: `❌ Failed to get job status Status: ${response.status} Error: ${response.statusText}` }], isError: true }; } const job = await response.json(); const statusEmoji = { pending: '⏳', claimed: '👤', in_progress: '🔄', completed: '✅', failed: '❌', timeout: '⏰' }; const emoji = statusEmoji[job.status] || '📊'; let message = `${emoji} Job Status: ${job.status} Job ID: ${job.id}`; if (job.status === 'pending') { message += '\n\nWaiting for a tester to claim this job...\nTypical completion time: 2-10 minutes'; } else if (job.status === 'claimed' || job.status === 'in_progress') { message += '\n\nThe tester is working on your test...\nTypical completion time: 2-10 minutes'; } else if (job.status === 'completed') { message += '\n\n✅ Job is complete! Use get_job_result to retrieve the results.'; } else if (job.status === 'failed') { message += '\n\n❌ Job failed.' + (job.error ? ` Error: ${job.error}` : ' Check the error with get_job_result.'); } else if (job.status === 'timeout') { message += '\n\n⏰ Job timed out. The tester did not complete in time.'; } return { content: [{ type: 'text', text: message }] }; } catch (error) { return { content: [{ type: 'text', text: `❌ Error checking job status ${error instanceof Error ? error.message : 'Unknown error'}` }], isError: true }; } case 'get_job_result': try { const response = await fetch(`${API_URL}/api/job/${args.jobId}`, { headers: { 'Authorization': `Bearer ${API_KEY}` } }); if (!response.ok) { if (response.status === 404) { return { content: [{ type: 'text', text: `❌ Job not found Job ID: ${args.jobId} The job does not exist or you don't have permission to access it.` }], isError: true }; } return { content: [{ type: 'text', text: `❌ Failed to get job result Status: ${response.status} Error: ${response.statusText}` }], isError: true }; } const job = await response.json(); if (job.status !== 'completed') { return { content: [{ type: 'text', text: `⏳ Job not yet completed Job ID: ${job.id} Current status: ${job.status} ${job.status === 'pending' ? 'Waiting for a tester to claim this job...' : job.status === 'claimed' || job.status === 'in_progress' ? 'The tester is working on your test...' : job.status === 'failed' ? '❌ Job failed. Error: ' + (job.error || 'Unknown error') : job.status === 'timeout' ? '⏰ Job timed out.' : 'Use get_job_status to check current status.'}` }] }; } // Job is completed, format results const message = `✅ Test completed! Job ID: ${job.id} **Test Results:**`; const contents = [ { type: 'text', text: message }, { type: 'text', text: JSON.stringify(job.result || {}, null, 2) } ]; if (job.error) { contents.push({ type: 'text', text: `\n⚠️ Note: ${job.error}` }); } return { content: contents }; } catch (error) { return { content: [{ type: 'text', text: `❌ Error getting job result ${error instanceof Error ? error.message : 'Unknown error'}` }], isError: true }; } default: throw new Error(`Unknown tool: ${name}`); } }); /** * Start the server */ async function main() { const transport = new StdioServerTransport(); await server.connect(transport); console.error('RunHuman MCP server running on stdio'); } main().catch((error) => { console.error('Server error:', error); process.exit(1); }); //# sourceMappingURL=index.js.map