UNPKG

@democratize-quality/mcp-server

Version:

MCP Server for democratizing quality through browser automation and comprehensive API testing capabilities

326 lines (280 loc) 13.7 kB
const CDP = require('chrome-remote-interface'); //const launchChrome = require('chrome-launcher').launch; const fs = require('fs'); const path = require('path'); const { findNodeBySelector, getElementClickCoordinates } = require('../utils/browserHelpers'); // Import helpers const config = require('../config'); // A private in-memory store for our browser instances within the service // Each key will be a unique browserId, value will be { chromeInstance, cdpClient, userDataDir } const activeBrowsers = {}; /** * Ensures a user data directory exists. * @param {string} dirPath - The absolute path to the user data directory. */ function ensureUserDataDir(dirPath) { if (!fs.existsSync(dirPath)) { fs.mkdirSync(dirPath, { recursive: true }); console.log(`[BrowserService] Created user data directory: ${dirPath}`); } else { console.log(`[BrowserService] Using existing user data directory: ${dirPath}`); } } /** * Retrieves a browser instance by ID. * @param {string} browserId - The ID of the browser instance. * @returns {object|null} - The browser instance object or null if not found. */ function getBrowserInstance(browserId) { return activeBrowsers[browserId]; } /** * Launches a new Chrome instance. * @param {boolean} headless - Whether to run Chrome in headless mode. * @param {number} port - The port for remote debugging. * @param {string|null} userDataDir - Path to the user data directory for persistent profiles. * @returns {Promise<object>} - Object containing browserId, port, and resolvedUserDataDir. */ async function launchBrowser(headless, port, userDataDir) { let chrome; let client; const { launch: launchChrome } = await import('chrome-launcher'); let resolvedUserDataDir = null; try { if (userDataDir) { resolvedUserDataDir = path.resolve(process.cwd(), userDataDir); ensureUserDataDir(resolvedUserDataDir); } console.log(`[BrowserService] Launching Chrome (headless: ${headless}, userDataDir: ${resolvedUserDataDir || 'temporary'})...`); const launchOptions = { port: port, userDataDir: resolvedUserDataDir, // Set userDataDir if provided chromeFlags: [ headless ? '--headless=new' : '', '--disable-gpu', '--disable-setuid-sandbox', '--no-sandbox' ].filter(Boolean) }; chrome = await launchChrome(launchOptions); // Generate browserId: profile-name if userDataDir is used, otherwise a unique timestamped ID const browserId = userDataDir ? `profile-${path.basename(resolvedUserDataDir)}` : `browser-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`; if (activeBrowsers[browserId]) { console.warn(`[BrowserService] Warning: Browser ID '${browserId}' already exists. Overwriting.`); // In a real scenario, you might want more sophisticated handling here, // e.g., error if ID exists, or try to attach to existing. // For now, we're assuming a new launch means a fresh start or overwrite. try { // Attempt to clean up old instance if it exists if (activeBrowsers[browserId].cdpClient) activeBrowsers[browserId].cdpClient.close(); if (activeBrowsers[browserId].chromeInstance) await activeBrowsers[browserId].chromeInstance.kill(); } catch (cleanupErr) { console.error(`[BrowserService] Error cleaning up old instance for ${browserId}:`, cleanupErr.message); } } activeBrowsers[browserId] = { chromeInstance: chrome, cdpClient: null, userDataDir: resolvedUserDataDir }; console.log(`[BrowserService] Chrome launched on port ${chrome.port} with ID: ${browserId}`); client = await CDP({ port: chrome.port }); activeBrowsers[browserId].cdpClient = client; const { Page, Runtime, DOM, Network, Security, Input } = client; // Enable Input domain here await Page.enable(); await Runtime.enable(); await DOM.enable(); await Network.enable(); await Security.enable(); //await Input.enable(); // Enable Input domain console.log(`[BrowserService] CDP client connected and domains enabled for ${browserId}.`); return { browserId, port: chrome.port, userDataDir: resolvedUserDataDir }; } catch (error) { console.error(`[BrowserService] Error launching browser:`, error); if (chrome && !client) { // If chrome launched but CDP connection failed try { await chrome.kill(); console.log(`[BrowserService] Partially launched Chrome instance killed due to error.`); } catch (killError) { console.error(`[BrowserService] Error killing partially launched Chrome:`, killError); } } throw error; // Re-throw to be caught by the route handler } } /** * Navigates a specific browser instance to a URL. * @param {string} browserId - The ID of the browser instance. * @param {string} url - The URL to navigate to. * @returns {Promise<void>} */ async function navigateBrowser(browserId, url) { const instance = getBrowserInstance(browserId); if (!instance) throw new Error(`Browser instance with ID '${browserId}' not found.`); const { cdpClient } = instance; console.log(`[BrowserService] Browser ${browserId} navigating to: ${url}`); await cdpClient.Page.navigate({ url: url }); await cdpClient.Page.loadEventFired(); // Wait for page to load console.log(`[BrowserService] Browser ${browserId} successfully navigated to ${url}.`); } /** * Takes a screenshot of a specific browser page and can save it to disk. * @param {string} browserId - The ID of the browser instance. * @param {string} [fileName='screenshot.png'] - Optional: The name of the file to save the screenshot as. * @param {boolean} [saveToDisk=true] - Optional: Whether to save the screenshot to disk. * @returns {Promise<string>} - Base64 encoded screenshot data. */ async function takeScreenshot(browserId, fileName = 'screenshot.png', saveToDisk = true) { // Added fileName and saveToDisk params const instance = getBrowserInstance(browserId); if (!instance) throw new Error(`Browser instance with ID '${browserId}' not found.`); const { cdpClient } = instance; console.log(`[BrowserService] Taking screenshot for browser ${browserId}...`); const screenshot = await cdpClient.Page.captureScreenshot({ format: 'png', quality: 80 }); if (saveToDisk) { const screenshotBuffer = Buffer.from(screenshot.data, 'base64'); // Ensure the output directory exists if (!fs.existsSync(config.OUTPUT_DIR)) { fs.mkdirSync(config.OUTPUT_DIR, { recursive: true }); } const filePath = path.join(config.OUTPUT_DIR, fileName); fs.writeFileSync(filePath, screenshotBuffer); console.log(`[BrowserService] Screenshot saved to ${filePath}`); } console.log(`[BrowserService] Screenshot captured for browser ${browserId}.`); return screenshot.data; // Always return base64 data to the caller (e.g., AI agent) } /** * Gets the current DOM content of a specific browser page. * @param {string} browserId - The ID of the browser instance. * @returns {Promise<string>} - The outer HTML of the document. */ async function getDomContent(browserId) { const instance = getBrowserInstance(browserId); if (!instance) throw new Error(`Browser instance with ID '${browserId}' not found.`); const { cdpClient } = instance; console.log(`[BrowserService] Getting DOM for browser ${browserId}...`); const documentNode = await cdpClient.DOM.getDocument({ depth: -1 }); const outerHTML = await cdpClient.DOM.getOuterHTML({ nodeId: documentNode.root.nodeId }); console.log(`[BrowserService] DOM content retrieved for browser ${browserId}.`); return outerHTML.outerHTML; } /** * Clicks an element identified by a locator. * @param {string} browserId - The ID of the browser instance. * @param {object} locator - { type: 'css'|'xpath', value: 'selector' } * @returns {Promise<object>} - Coordinates of the click. */ async function clickElement(browserId, locator) { const instance = getBrowserInstance(browserId); if (!instance) throw new Error(`Browser instance with ID '${browserId}' not found.`); const { cdpClient } = instance; const { Input } = cdpClient; console.log(`[BrowserService] Browser ${browserId}: Attempting to click element with locator:`, locator); const nodeId = await findNodeBySelector(cdpClient, locator.type, locator.value); if (!nodeId) { throw new Error(`Element not found for locator: ${JSON.stringify(locator)}`); } const coords = await getElementClickCoordinates(cdpClient, nodeId); if (!coords) { throw new Error(`Could not determine click coordinates for element: ${JSON.stringify(locator)}`); } await Input.dispatchMouseEvent({ type: 'mousePressed', button: 'left', x: coords.x, y: coords.y, clickCount: 1 }); await Input.dispatchMouseEvent({ type: 'mouseReleased', button: 'left', x: coords.x, y: coords.y, clickCount: 1 }); console.log(`[BrowserService] Browser ${browserId}: Clicked element at x: ${coords.x}, y: ${coords.y}`); return coords; } /** * Types text into an element identified by a locator. * @param {string} browserId - The ID of the browser instance. * @param {object} locator - { type: 'css'|'xpath', value: 'selector' } * @param {string} text - The text to type. * @returns {Promise<void>} */ async function typeIntoElement(browserId, locator, text) { const instance = getBrowserInstance(browserId); if (!instance) throw new Error(`Browser instance with ID '${browserId}' not found.`); const { cdpClient } = instance; const { DOM, Input } = cdpClient; console.log(`[BrowserService] Browser ${browserId}: Attempting to type "${text}" into element with locator:`, locator); const nodeId = await findNodeBySelector(cdpClient, locator.type, locator.value); if (!nodeId) { throw new Error(`Element not found for locator: ${JSON.stringify(locator)}`); } await DOM.focus({ nodeId: nodeId }); await new Promise(resolve => setTimeout(resolve, 50)); // Small delay for focus // Clear existing text: Cmd/Ctrl+A then Backspace await Input.dispatchKeyEvent({ type: 'keyDown', text: 'a', modifiers: (process.platform === 'darwin' ? 4 : 2) }); // 4 for Meta (Cmd), 2 for Control await Input.dispatchKeyEvent({ type: 'keyUp', text: 'a', modifiers: (process.platform === 'darwin' ? 4 : 2) }); await Input.dispatchKeyEvent({ type: 'keyDown', key: 'Backspace' }); await Input.dispatchKeyEvent({ type: 'keyUp', key: 'Backspace' }); await new Promise(resolve => setTimeout(resolve, 50)); // Small delay for clear for (const char of text) { await Input.dispatchKeyEvent({ type: 'keyDown', text: char, key: char }); await Input.dispatchKeyEvent({ type: 'keyUp', text: char, key: char }); await new Promise(resolve => setTimeout(resolve, 10)); // Small delay for realism } console.log(`[BrowserService] Browser ${browserId}: Typed "${text}" into element.`); } /** * Closes a specific browser instance. * @param {string} browserId - The ID of the browser instance. * @returns {Promise<void>} */ async function closeBrowser(browserId) { const instance = getBrowserInstance(browserId); if (!instance) throw new Error(`Browser instance with ID '${browserId}' not found.`); const { chromeInstance, cdpClient, userDataDir } = instance; console.log(`[BrowserService] Closing browser ${browserId} (profile: ${userDataDir || 'temporary'})...`); if (cdpClient) { try { cdpClient.close(); console.log(`[BrowserService] CDP client disconnected for ${browserId}.`); } catch (err) { console.warn(`[BrowserService] Error during CDP client close for ${browserId}:`, err.message); } } if (chromeInstance) { try { await chromeInstance.kill(); console.log(`[BrowserService] Chrome instance ${browserId} killed.`); } catch (err) { console.warn(`[BrowserService] Error during Chrome instance kill for ${browserId}:`, err.message); } } delete activeBrowsers[browserId]; // Remove from our store console.log(`[BrowserService] Browser ${browserId} removed from active list.`); } /** * Shuts down all active browser instances. Used for graceful server shutdown. * @returns {Promise<void>} */ async function shutdownAllBrowsers() { const browserIds = Object.keys(activeBrowsers); if (browserIds.length === 0) { console.log('[BrowserService] No active browsers to shut down.'); return; } console.log(`[BrowserService] Shutting down ${browserIds.length} active browser(s)...`); await Promise.all(browserIds.map(id => closeBrowser(id).catch(err => { console.error(`[BrowserService] Failed to gracefully close browser ${id}:`, err.message); // Continue with other shutdowns even if one fails }))); console.log('[BrowserService] All active browsers shut down.'); } module.exports = { launchBrowser, navigateBrowser, getBrowserInstance, takeScreenshot, getDomContent, clickElement, typeIntoElement, closeBrowser, shutdownAllBrowsers // Export for server.js to use };