cerevox
Version:
TypeScript SDK for browser automation and secure command execution in highly available and scalable micro computer environments
398 lines (347 loc) • 14.9 kB
JavaScript
#!/usr/bin/env node
"use strict";
/*
* MCP Server
*/
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.run = run;
const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
const zod_1 = require("zod");
const index_1 = __importDefault(require("../../index"));
const open_1 = __importDefault(require("open"));
const constants_1 = require("../../utils/constants");
/* Configuration
{
"mcpServers": {
"cerevox-browser-use": {
"command": "npx",
"args": ["-y", "cerevox-browser-use"],
"env": {
"CEREVOX_API_KEY": "ck-**************",
"CEREVOX_SYNC_DIR": "${workspaceFolder}/cerevox-output"
}
},
}
}
*/
// Create an MCP server
const server = new mcp_js_1.McpServer({
name: 'Cerevox Browser Use',
version: constants_1.VERSION,
});
const cerevox = new index_1.default({
apiKey: process.env.CEREVOX_API_KEY || '',
logLevel: 'error',
});
let session = null;
let liveviewOpened = false;
server.tool('cerevox-launch', 'Launch a new Cerevox session with a Chromium browser instance.', async () => {
session = await cerevox.launch({
timeout: 60,
});
return {
content: [
{
type: 'text',
text: JSON.stringify({
sessionId: session.id,
}),
},
],
};
});
server.tool('cerevox-close', 'Close the current Cerevox session and release all resources.', async () => {
if (session) {
await session.close();
session = null;
liveviewOpened = false;
}
return {
content: [{ type: 'text', text: 'Cerevox session closed' }],
};
});
server.tool('cerevox-look-at-page', `Visually examine and analyze a web page by capturing a screenshot and gathering initial observations.
This is the first step in the three-phase workflow: Look → Plan → Action
**IMPORTANT: This tool returns a JSON object with format {screen: url} containing the screenshot URL.**
**You MUST first identify and analyze the image content to understand the target webpage structure before planning further steps.**
Purpose:
- Provide visual context and initial understanding of the target page
- Capture the current state and layout of the webpage
- Enable informed planning for subsequent automation tasks
- Serve as a foundation for strategic decision-making
Key characteristics:
- Non-intrusive observation - no interactions with page elements
- High-quality screenshot capture for visual analysis
- Returns screenshot URL in JSON format: {screen: "screenshot_url"}
- Quick page load assessment
- Foundation for the planning phase
Workflow:
1. Tool captures screenshot and returns {screen: url}
2. You must analyze the screenshot to understand page structure
3. Based on visual analysis, proceed to cerevox-code-plan for detailed strategy
Use this tool when:
- Starting any web automation workflow
- Need to understand page structure before planning
- Verifying page load status and visual state
- Gathering context for complex multi-step operations
After using this tool, always analyze the returned screenshot before proceeding to cerevox-code-plan.`, {
url: zod_1.z
.string()
.describe('The URL of the webpage to visually examine and capture.'),
}, async ({ url }) => {
if (!session) {
throw new Error('Cerevox session not initialized');
}
const id = Date.now();
const code = `import { chromium } from 'playwright';
import { setTimeout } from 'timers/promises';
import fs from 'fs';
const browser = await chromium.launch();
const context = browser.contexts()[0] || (await browser.newContext());
const page = await context.newPage();
await page.goto('${url}');
await setTimeout(5000);
const session = await context.newCDPSession(page);
const res = await session.send(
"Page.captureScreenshot",
{
"format": "jpeg",
"quality": 70,
"fromSurface": true,
"captureBeyondViewport": true,
},
);
fs.writeFileSync('/home/user/public/mcp-screenshot-${id}.jpg', Buffer.from(res.data, 'base64'));
page.close();
browser.close();`;
if (!liveviewOpened) {
const url = await session.browser.getLiveviewPageUrl();
if (url) {
(0, open_1.default)(url);
liveviewOpened = true;
}
}
await session.codeRunner.run(code, {
inject: true,
syncDir: '/tmp/.cerevox/code-runner',
});
const screeshotUrl = session.sandbox.getUrl(`/public/mcp-screenshot-${id}.jpg`);
return {
content: [
{
// 本来这个应该是 type: image, data: result.stdout,但是 Tras 不认识这个格式
type: 'text',
text: JSON.stringify({
screenshot: screeshotUrl,
}),
mimeType: 'image/jpeg',
},
],
};
});
// 共享的代码执行函数
async function executePlaywrightCode(code, syncDir = process.env.CEREVOX_SYNC_DIR || './cerevox-output') {
try {
// 执行传入的 Playwright 代码
if (!session) {
throw new Error('Cerevox session not initialized');
}
const codeRunner = session.codeRunner;
if (!liveviewOpened) {
const url = await session.browser.getLiveviewPageUrl();
if (url) {
(0, open_1.default)(url);
liveviewOpened = true;
}
}
const result = await codeRunner.run(code, {
inject: true,
syncDir,
});
return {
content: [{ type: 'text', text: JSON.stringify(result) }],
};
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
content: [
{
type: 'text',
text: `❌ 执行过程中发生错误:\n\n🚨 错误详情:\n\`\`\`\n${errorMessage}\n\`\`\``,
},
],
};
}
}
server.tool('cerevox-code-plan', `Execute Playwright code for strategic analysis and action planning in a secure sandbox environment.
This is the second step in the three-phase workflow: Look → Plan → Action
Purpose:
- Conduct detailed reconnaissance and structural analysis of web pages
- Develop comprehensive understanding of page elements and interactions
- Formulate strategic plans for subsequent automation actions
- Identify optimal selectors, timing, and interaction patterns
- Validate assumptions and gather actionable intelligence
Key characteristics:
- Deep exploration and analysis without making assumptions
- Systematic extraction of page structure and element properties
- Strategic information gathering for informed decision making
- Comprehensive documentation of findings and observations
- Risk assessment and contingency planning
Planning activities:
- Mapping page layout and navigation structure
- Analyzing form fields, validation rules, and input requirements
- Identifying interactive elements and their behavioral patterns
- Testing element visibility, accessibility, and responsiveness
- Documenting optimal interaction sequences and timing
- Capturing reference screenshots for comparison
- Evaluating potential challenges and alternative approaches
The sandbox provides:
- Chromium browser with full JavaScript support
- Network access for web requests
- File system access for downloads/uploads
- Isolated execution environment for security
After planning, use cerevox-code-action to execute the formulated strategy.`, {
code: zod_1.z.string().describe(`JavaScript code using Playwright API for strategic page analysis and action planning.
Focus on systematic exploration and strategic analysis to formulate actionable plans.
The code should gather comprehensive intelligence, analyze element patterns, and develop
robust strategies for subsequent automation actions.
**Screenshot Guidelines:**
- Only take screenshots when necessary for analysis or documentation
- Always use optimized parameters: \`{ path: 'filename.jpg', type: 'jpeg', quality: 70 }\`
- This prevents large image files that could cause browser crashes
Strategic planning patterns:
Example 1 - Page structure analysis:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
// Analyze page structure
const title = await page.title();
const forms = await page.$$('form');
const buttons = await page.$$('button, input[type="submit"]');
console.log('Page Analysis:');
console.log('Title:', title);
console.log('Forms found:', forms.length);
console.log('Buttons found:', buttons.length);
// Take screenshot only if needed for visual analysis
await page.screenshot({ path: 'page_analysis.jpg', type: 'jpeg', quality: 70 });
await browser.close();
\`\`\`
Example 2 - Form field discovery:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com/form');
// Discover form fields
const inputs = await page.$$eval('input', inputs =>
inputs.map(input => ({
type: input.type,
name: input.name,
placeholder: input.placeholder,
required: input.required
}))
);
console.log('Form fields discovered:', JSON.stringify(inputs, null, 2));
await browser.close();
\`\`\`
Note: Always include browser.close() to properly cleanup resources.
Focus on gathering information rather than performing actions.`),
}, async ({ code }) => {
return await executePlaywrightCode(code);
});
server.tool('cerevox-code-action', `Execute Playwright code to perform precise actions and achieve specific automation goals.
This is the final step in the three-phase workflow: Look → Plan → Action
Purpose:
- Execute the strategic plan formulated in the planning phase
- Perform precise, goal-oriented automation tasks
- Complete complex workflows with confidence and accuracy
- Achieve specific objectives based on gathered intelligence
- Handle dynamic scenarios and edge cases effectively
Key characteristics:
- Action-oriented execution based on strategic planning
- Precise implementation of predetermined strategies
- Robust error handling and recovery mechanisms
- Goal-focused task completion with measurable outcomes
- Adaptive execution that responds to real-time conditions
Execution capabilities:
- Form filling with validated data and proper sequencing
- Strategic navigation and multi-step user journeys
- Complex interactions with dynamic page elements
- Transaction completion and workflow finalization
- Data extraction and result verification
- File downloads and content capture
- Authentication and session management
- Error recovery and alternative path execution
The sandbox provides:
- Chromium browser with full JavaScript support
- Network access for web requests
- File system access for downloads/uploads
- Isolated execution environment for security
This tool should only be used after thorough planning with cerevox-code-plan.
Code execution is asynchronous and results will include any console output,
errors, and return values from your Playwright script.`, {
code: zod_1.z.string().describe(`JavaScript code using Playwright API to execute strategic plans and achieve automation goals.
The code should implement the strategic plan developed in the planning phase,
executing precise actions with confidence and handling edge cases effectively.
Focus on goal achievement and robust execution based on gathered intelligence.
**Screenshot Guidelines:**
- Only take screenshots when necessary for verification or documentation
- Always use optimized parameters: \`{ path: 'filename.jpg', type: 'jpeg', quality: 70 }\`
- This prevents large image files that could cause browser crashes
Strategic execution patterns:
Example 1 - Form submission:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com/form');
// Perform specific actions
await page.fill('input[name="email"]', 'user@example.com');
await page.fill('input[name="password"]', 'password123');
await page.click('button[type="submit"]');
// Wait for result and capture only if needed
await page.waitForNavigation();
await page.screenshot({ path: 'result.jpg', type: 'jpeg', quality: 70 });
const result = await page.textContent('.success-message');
console.log('Action result:', result);
await browser.close();
\`\`\`
Example 2 - Data extraction after interaction:
\`\`\`typescript
import { chromium } from 'playwright';
const browser = await chromium.launch();
const page = await browser.newPage();
await page.goto('https://example.com');
// Perform search action
await page.fill('input[name="search"]', 'query');
await page.click('button[type="submit"]');
// Extract results
await page.waitForSelector('.results');
const results = await page.$$eval('.result-item', items =>
items.map(item => item.textContent)
);
console.log('Search results:', results);
await browser.close();
\`\`\`
Note: Always include browser.close() to properly cleanup resources.
Focus on completing specific tasks and achieving goals.`),
}, async ({ code }) => {
return await executePlaywrightCode(code);
});
async function run() {
// Start receiving messages on stdin and sending messages on stdout
const transport = new stdio_js_1.StdioServerTransport();
await server.connect(transport);
console.log('Server started');
}
if (require.main === module) {
run();
}
//# sourceMappingURL=browser-use.js.map