@grasplabs/grasp
Version:
TypeScript SDK for browser automation and secure command execution in highly available and scalable cloud browser environments
414 lines (363 loc) • 15.2 kB
JavaScript
#!/usr/bin/env node
"use strict";
/*
* MCP Server
*/
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
const zod_1 = require("zod");
const index_1 = __importDefault(require("../index"));
const open_1 = __importDefault(require("open"));
/* Configuration
{
"mcpServers": {
"grasp-run-code": {
"command": "npx",
"args": ["-y", "grasp-run-mcp"],
"env": {
"GRASP_KEY": "gk-**************"
}
},
}
}
*/
// Create an MCP server
const server = new mcp_js_1.McpServer({
name: 'Grasp Server',
version: '0.4.1',
});
const grasp = new index_1.default({
apiKey: process.env.GRASP_KEY || '',
});
let session = null;
let liveviewOpened = false;
server.tool('grasp-launch', 'Launch a new Grasp session with a Chromium browser instance.', async () => {
session = await grasp.launch({
browser: {
liveview: true,
},
keepAliveMS: 10000,
timeout: 3600000,
logLevel: 'error',
});
return {
content: [
{
type: 'text',
text: JSON.stringify({
sessionId: session.id,
}),
},
],
};
});
server.tool('grasp-close', 'Close the current Grasp session and release all resources.', async () => {
if (session) {
await session.close();
session = null;
liveviewOpened = false;
}
return {
content: [{ type: 'text', text: 'Grasp session closed' }],
};
});
server.tool('grasp-look-at-page', `Visually examine and analyze a web page by capturing a screenshot and gathering initial observations.
This is the first step in the three-phase workflow: Look → Plan → Action
**IMPORTANT: This tool returns a JSON object with format {screen: url} containing the screenshot URL.**
**You MUST first identify and analyze the image content to understand the target webpage structure before planning further steps.**
Purpose:
- Provide visual context and initial understanding of the target page
- Capture the current state and layout of the webpage
- Enable informed planning for subsequent automation tasks
- Serve as a foundation for strategic decision-making
Key characteristics:
- Non-intrusive observation - no interactions with page elements
- High-quality screenshot capture for visual analysis
- Returns screenshot URL in JSON format: {screen: "screenshot_url"}
- Quick page load assessment
- Foundation for the planning phase
Workflow:
1. Tool captures screenshot and returns {screen: url}
2. You must analyze the screenshot to understand page structure
3. Based on visual analysis, proceed to grasp-code-plan for detailed strategy
Use this tool when:
- Starting any web automation workflow
- Need to understand page structure before planning
- Verifying page load status and visual state
- Gathering context for complex multi-step operations
After using this tool, always analyze the returned screenshot before proceeding to grasp-code-plan.`, {
url: zod_1.z
.string()
.describe('The URL of the webpage to visually examine and capture.'),
}, async ({ url }) => {
if (!session) {
throw new Error('Grasp session not initialized');
}
const host = session.browser.getHost();
const id = Date.now();
const code = `import { chromium } from 'playwright';
import { setTimeout } from 'timers/promises';
import fs from 'fs';
const browser = await chromium.launch();
const context = browser.contexts()[0] || (await browser.newContext());
const page = await context.newPage();
await page.goto('${url}');
await setTimeout(5000);
const session = await context.newCDPSession(page);
const res = await session.send(
"Page.captureScreenshot",
{
"format": "jpeg",
"quality": 70,
"fromSurface": true,
"captureBeyondViewport": true,
},
);
fs.writeFileSync('/home/user/public/mcp-screenshot-${id}.jpg', Buffer.from(res.data, 'base64'));
page.close();
browser.close();`;
if (!liveviewOpened) {
const url = await session.browser.getLiveviewPageUrl();
if (url) {
(0, open_1.default)(url);
liveviewOpened = true;
}
}
await session.codeRunner.run(code, {
inject: true,
syncDir: '/tmp/.grasp/code-runner',
});
return {
content: [
{
// 本来这个应该是 type: image, data: result.stdout,但是 Tras 不认识这个格式
type: 'text',
text: JSON.stringify({
screenshot: `https://${host}/public/mcp-screenshot-${id}.jpg`,
}),
mimeType: 'image/jpeg',
},
],
};
});
// 共享的代码执行函数
async function executePlaywrightCode(code) {
try {
// 执行传入的 Playwright 代码
if (!session) {
throw new Error('Grasp session not initialized');
}
const codeRunner = session.codeRunner;
if (!liveviewOpened) {
const url = await session.browser.getLiveviewPageUrl();
if (url) {
(0, open_1.default)(url);
liveviewOpened = true;
}
}
const result = await codeRunner.run(code, {
inject: true,
syncDir: '/tmp/.grasp/code-runner',
});
// 构建返回结果
let responseText = '';
if (result.exitCode === 0) {
responseText += '✅ 代码执行成功\n\n';
if (result.stdout) {
responseText += '📤 标准输出:\n```\n' + result.stdout + '\n```\n\n';
}
if (result.stderr) {
responseText += '⚠️ 警告信息:\n```\n' + result.stderr + '\n```\n\n';
}
}
else {
responseText += '❌ 代码执行失败\n\n';
if (result.error) {
responseText += '🚨 错误信息:\n```\n' + result.error + '\n```\n\n';
}
if (result.stdout) {
responseText += '📤 标准输出:\n```\n' + result.stdout + '\n```\n\n';
}
if (result.stderr) {
responseText += '⚠️ 警告信息:\n```\n' + result.stderr + '\n```\n\n';
}
}
return {
content: [{ type: 'text', text: responseText }],
};
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [
{
type: 'text',
text: `❌ 执行过程中发生错误:\n\n🚨 错误详情:\n\`\`\`\n${errorMessage}\n\`\`\``,
},
],
};
}
}
server.tool('grasp-code-plan', `Execute Playwright code for strategic analysis and action planning in a secure sandbox environment.
This is the second step in the three-phase workflow: Look → Plan → Action
Purpose:
- Conduct detailed reconnaissance and structural analysis of web pages
- Develop comprehensive understanding of page elements and interactions
- Formulate strategic plans for subsequent automation actions
- Identify optimal selectors, timing, and interaction patterns
- Validate assumptions and gather actionable intelligence
Key characteristics:
- Deep exploration and analysis without making assumptions
- Systematic extraction of page structure and element properties
- Strategic information gathering for informed decision making
- Comprehensive documentation of findings and observations
- Risk assessment and contingency planning
Planning activities:
- Mapping page layout and navigation structure
- Analyzing form fields, validation rules, and input requirements
- Identifying interactive elements and their behavioral patterns
- Testing element visibility, accessibility, and responsiveness
- Documenting optimal interaction sequences and timing
- Capturing reference screenshots for comparison
- Evaluating potential challenges and alternative approaches
The sandbox provides:
- Chromium browser with full JavaScript support
- Network access for web requests
- File system access for downloads/uploads
- Isolated execution environment for security
After planning, use grasp-code-action to execute the formulated strategy.`, {
code: zod_1.z.string().describe(`JavaScript code using Playwright API for strategic page analysis and action planning.
Focus on systematic exploration and strategic analysis to formulate actionable plans.
The code should gather comprehensive intelligence, analyze element patterns, and develop
robust strategies for subsequent automation actions.
If the code includes any screenshots or downloads, **always save them to the \`./code-runner\` directory**.
Strategic planning patterns:
Example 1 - Page structure analysis:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
// Analyze page structure
const title = await page.title();
const forms = await page.$$('form');
const buttons = await page.$$('button, input[type="submit"]');
console.log('Page Analysis:');
console.log('Title:', title);
console.log('Forms found:', forms.length);
console.log('Buttons found:', buttons.length);
// Take screenshot for visual analysis
await page.screenshot({ path: './code-runner/page_analysis.png' });
await browser.close();
\`\`\`
Example 2 - Form field discovery:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com/form');
// Discover form fields
const inputs = await page.$$eval('input', inputs =>
inputs.map(input => ({
type: input.type,
name: input.name,
placeholder: input.placeholder,
required: input.required
}))
);
console.log('Form fields discovered:', JSON.stringify(inputs, null, 2));
await browser.close();
\`\`\`
Note: Always include browser.close() to properly cleanup resources.
Focus on gathering information rather than performing actions.`),
}, async ({ code }) => {
return await executePlaywrightCode(code);
});
server.tool('grasp-code-action', `Execute Playwright code to perform precise actions and achieve specific automation goals.
This is the final step in the three-phase workflow: Look → Plan → Action
Purpose:
- Execute the strategic plan formulated in the planning phase
- Perform precise, goal-oriented automation tasks
- Complete complex workflows with confidence and accuracy
- Achieve specific objectives based on gathered intelligence
- Handle dynamic scenarios and edge cases effectively
Key characteristics:
- Action-oriented execution based on strategic planning
- Precise implementation of predetermined strategies
- Robust error handling and recovery mechanisms
- Goal-focused task completion with measurable outcomes
- Adaptive execution that responds to real-time conditions
Execution capabilities:
- Form filling with validated data and proper sequencing
- Strategic navigation and multi-step user journeys
- Complex interactions with dynamic page elements
- Transaction completion and workflow finalization
- Data extraction and result verification
- File downloads and content capture
- Authentication and session management
- Error recovery and alternative path execution
The sandbox provides:
- Chromium browser with full JavaScript support
- Network access for web requests
- File system access for downloads/uploads
- Isolated execution environment for security
This tool should only be used after thorough planning with grasp-code-plan.
Code execution is asynchronous and results will include any console output,
errors, and return values from your Playwright script.`, {
code: zod_1.z.string().describe(`JavaScript code using Playwright API to execute strategic plans and achieve automation goals.
The code should implement the strategic plan developed in the planning phase,
executing precise actions with confidence and handling edge cases effectively.
Focus on goal achievement and robust execution based on gathered intelligence.
If the code includes any screenshots or downloads, **always save them to the \`./code-runner\` directory**.
Strategic execution patterns:
Example 1 - Form submission:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com/form');
// Perform specific actions
await page.fill('input[name="email"]', 'user@example.com');
await page.fill('input[name="password"]', 'password123');
await page.click('button[type="submit"]');
// Wait for result and capture
await page.waitForNavigation();
await page.screenshot({ path: './code-runner/result.png' });
const result = await page.textContent('.success-message');
console.log('Action result:', result);
await browser.close();
\`\`\`
Example 2 - Data extraction after interaction:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
// Perform search action
await page.fill('input[name="search"]', 'query');
await page.click('button[type="submit"]');
// Extract results
await page.waitForSelector('.results');
const results = await page.$$eval('.result-item', items =>
items.map(item => item.textContent)
);
console.log('Search results:', results);
await browser.close();
\`\`\`
Note: Always include browser.close() to properly cleanup resources.
Focus on completing specific tasks and achieving goals.`),
}, async ({ code }) => {
return await executePlaywrightCode(code);
});
async function main() {
// Start receiving messages on stdin and sending messages on stdout
const transport = new stdio_js_1.StdioServerTransport();
await server.connect(transport);
console.log('Server started');
}
main();
//# sourceMappingURL=mcp-server.js.map