UNPKG

@claudemind/mcp-webresearch

Version:

MCP server for web research

github.com/Hawstein/mcp-webresearch

1,057 lines (1,053 loc) • 53.5 kB

JavaScript

#!/usr/bin/env node // Core dependencies for MCP server and protocol handling import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListResourcesRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema, ListPromptsRequestSchema, GetPromptRequestSchema, McpError, ErrorCode, } from "@modelcontextprotocol/sdk/types.js"; // Web scraping and content processing dependencies import { chromium } from 'playwright'; import TurndownService from "turndown"; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; // Initialize temp directory for screenshots const SCREENSHOTS_DIR = fs.mkdtempSync(path.join(os.tmpdir(), 'mcp-screenshots-')); // Initialize Turndown service for converting HTML to Markdown // Configure with specific formatting preferences const turndownService = new TurndownService({ headingStyle: 'atx', // Use # style headings hr: '---', // Horizontal rule style bulletListMarker: '-', // List item marker codeBlockStyle: 'fenced', // Use ``` for code blocks emDelimiter: '_', // Italics style strongDelimiter: '**', // Bold style linkStyle: 'inlined', // Use inline links }); // Custom Turndown rules for better content extraction // Remove script and style tags completely turndownService.addRule('removeScripts', { filter: ['script', 'style', 'noscript'], replacement: () => '' }); // Preserve link elements with their href attributes turndownService.addRule('preserveLinks', { filter: 'a', replacement: (content, node) => { const element = node; const href = element.getAttribute('href'); return href ? `[${content}](${href})` : content; } }); // Preserve image elements with their src and alt attributes turndownService.addRule('preserveImages', { filter: 'img', replacement: (content, node) => { const element = node; const alt = element.getAttribute('alt') || ''; const src = element.getAttribute('src'); return src ? `![${alt}](${src})` : ''; } }); // Screenshot management functions async function saveScreenshot(screenshot, title) { // Convert screenshot from base64 to buffer const buffer = Buffer.from(screenshot, 'base64'); // Check size before saving const MAX_SIZE = 5 * 1024 * 1024; // 5MB if (buffer.length > MAX_SIZE) { throw new McpError(ErrorCode.InvalidRequest, `Screenshot too large: ${Math.round(buffer.length / (1024 * 1024))}MB exceeds ${MAX_SIZE / (1024 * 1024)}MB limit`); } // Generate a safe filename const timestamp = new Date().getTime(); const safeTitle = title.replace(/[^a-z0-9]/gi, '_').toLowerCase(); const filename = `${safeTitle}-${timestamp}.png`; const filepath = path.join(SCREENSHOTS_DIR, filename); // Save the validated screenshot await fs.promises.writeFile(filepath, buffer); // Return the filepath to the saved screenshot return filepath; } // Cleanup function to remove all screenshots from disk async function cleanupScreenshots() { try { // Remove all files in the screenshots directory const files = await fs.promises.readdir(SCREENSHOTS_DIR); await Promise.all(files.map(file => fs.promises.unlink(path.join(SCREENSHOTS_DIR, file)))); // Remove the directory itself await fs.promises.rmdir(SCREENSHOTS_DIR); } catch (error) { console.error('Error cleaning up screenshots:', error); } } // Available tools for web research functionality const TOOLS = [ { name: "search_google", description: "Search Google for a query", inputSchema: { type: "object", properties: { query: { type: "string", description: "Search query" }, }, required: ["query"], }, }, { name: "visit_page", description: "Visit a webpage and extract its content", inputSchema: { type: "object", properties: { url: { type: "string", description: "URL to visit" }, takeScreenshot: { type: "boolean", description: "Whether to take a screenshot" }, }, required: ["url"], }, }, { name: "take_screenshot", description: "Take a screenshot of the current page", inputSchema: { type: "object", properties: {}, // No parameters needed }, }, ]; // Configure available prompts with their specifications const PROMPTS = { // Agentic research prompt configuration "agentic-research": { name: "agentic-research", // Type-safe name description: "Conduct iterative web research on a topic, exploring it thoroughly through multiple steps while maintaining a dialogue with the user", arguments: [ { name: "topic", // Topic argument specification description: "The topic or question to research", // Description of the argument required: true // Topic is mandatory } ] } }; // Make object immutable // Global state management for browser and research session let browser; // Puppeteer browser instance let page; // Current active page let currentSession; // Current research session data // Configuration constants for session management const MAX_RESULTS_PER_SESSION = 100; // Maximum number of results to store per session const MAX_RETRIES = 3; // Maximum retry attempts for operations const RETRY_DELAY = 1000; // Delay between retries in milliseconds // Generic retry mechanism for handling transient failures async function withRetry(operation, // Operation to retry retries = MAX_RETRIES, // Number of retry attempts delay = RETRY_DELAY // Delay between retries ) { let lastError; // Attempt operation up to max retries for (let i = 0; i < retries; i++) { try { return await operation(); } catch (error) { lastError = error; if (i < retries - 1) { console.error(`Attempt ${i + 1} failed, retrying in ${delay}ms:`, error); await new Promise(resolve => setTimeout(resolve, delay)); } } } throw lastError; // Throw last error if all retries failed } // Add a new research result to the current session with data management function addResult(result) { // If no current session exists, initialize a new one if (!currentSession) { currentSession = { query: "Research Session", results: [], lastUpdated: new Date().toISOString(), }; } // If the session has reached the maximum number of results, remove the oldest result if (currentSession.results.length >= MAX_RESULTS_PER_SESSION) { currentSession.results.shift(); } // Add the new result to the session and update the last updated timestamp currentSession.results.push(result); currentSession.lastUpdated = new Date().toISOString(); } /** * Specifically handles Google's consent dialog in regions that require it * @param page - Playwright Page object */ async function dismissGoogleConsent(page) { // Regions that commonly show cookie/consent banners const regions = [ // Europe '.google.de', '.google.fr', '.google.co.uk', '.google.it', '.google.es', '.google.nl', '.google.pl', '.google.ie', '.google.dk', '.google.no', '.google.se', '.google.fi', '.google.at', '.google.ch', '.google.be', '.google.pt', '.google.gr', '.google.com.tr', // Asia Pacific '.google.co.id', '.google.com.sg', '.google.co.th', '.google.com.my', '.google.com.ph', '.google.co.nz', '.google.com.vn', // Generic domains '.google.com', '.google.co' ]; try { // Get current URL const currentUrl = page.url(); // Skip consent check if not in a supported region if (!regions.some(domain => currentUrl.includes(domain))) { return; } // Quick check for consent dialog existence const hasConsent = await page.$('form:has(button[aria-label]), div[aria-modal="true"], ' + // Common dialog containers 'div[role="dialog"], div[role="alertdialog"], ' + // Common cookie/consent specific elements 'div[class*="consent"], div[id*="consent"], ' + 'div[class*="cookie"], div[id*="cookie"], ' + // Common modal/popup classes 'div[class*="modal"]:has(button), div[class*="popup"]:has(button), ' + // Common banner patterns 'div[class*="banner"]:has(button), div[id*="banner"]:has(button)').then(Boolean); // If no consent dialog is found, return if (!hasConsent) { return; } // Handle the consent dialog using common consent button patterns await page.evaluate(() => { const consentPatterns = { // Common accept button text patterns across languages text: [ // English 'accept all', 'agree', 'consent', // German 'alle akzeptieren', 'ich stimme zu', 'zustimmen', // French 'tout accepter', 'j\'accepte', // Spanish 'aceptar todo', 'acepto', // Italian 'accetta tutto', 'accetto', // Portuguese 'aceitar tudo', 'concordo', // Dutch 'alles accepteren', 'akkoord', // Polish 'zaakceptuj wszystko', 'zgadzam się', // Swedish 'godkänn alla', 'godkänn', // Danish 'accepter alle', 'accepter', // Norwegian 'godta alle', 'godta', // Finnish 'hyväksy kaikki', 'hyväksy', // Indonesian 'terima semua', 'setuju', 'saya setuju', // Malay 'terima semua', 'setuju', // Thai 'ยอมรับทั้งหมด', 'ยอมรับ', // Vietnamese 'chấp nhận tất cả', 'đồng ý', // Filipino/Tagalog 'tanggapin lahat', 'sumang-ayon', // Japanese 'すべて同意する', '同意する', // Korean '모두 동의', '동의' ], // Common aria-label patterns ariaLabels: [ 'consent', 'accept', 'agree', 'cookie', 'privacy', 'terms', 'persetujuan', 'setuju', // Indonesian 'ยอมรับ', // Thai 'đồng ý', // Vietnamese '同意' // Japanese/Chinese ] }; // Finds the accept button by text or aria-label const findAcceptButton = () => { // Get all buttons on the page const buttons = Array.from(document.querySelectorAll('button')); // Find the accept button return buttons.find(button => { // Get the text content and aria-label of the button const text = button.textContent?.toLowerCase() || ''; const label = button.getAttribute('aria-label')?.toLowerCase() || ''; // Check for matching text patterns const hasMatchingText = consentPatterns.text.some(pattern => text.includes(pattern)); // Check for matching aria-labels const hasMatchingLabel = consentPatterns.ariaLabels.some(pattern => label.includes(pattern)); // Return true if either text or aria-label matches return hasMatchingText || hasMatchingLabel; }); }; // Find the accept button const acceptButton = findAcceptButton(); // If an accept button is found, click it if (acceptButton) { acceptButton.click(); } }); } catch (error) { console.log('Consent handling failed:', error); } } // Safe page navigation with error handling and bot detection async function safePageNavigation(page, url) { try { // Step 1: Set cookies to bypass consent banner await page.context().addCookies([{ name: 'CONSENT', value: 'YES+', domain: '.google.com', path: '/' }]); // Step 2: Initial navigation const response = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 }); // Step 3: Basic response validation if (!response) { throw new Error('Navigation failed: no response received'); } // Check HTTP status code; if 400 or higher, throw an error const status = response.status(); if (status >= 400) { throw new Error(`HTTP ${status}: ${response.statusText()}`); } // Step 4: Wait for network to become idle or timeout await Promise.race([ page.waitForLoadState('networkidle', { timeout: 5000 }) .catch(() => { }), // Fallback timeout in case networkidle never occurs new Promise(resolve => setTimeout(resolve, 5000)) ]); // Step 5: Security and content validation const validation = await page.evaluate(() => { const botProtectionExists = [ '#challenge-running', // Cloudflare '#cf-challenge-running', // Cloudflare '#px-captcha', // PerimeterX '#ddos-protection', // Various '#waf-challenge-html' // Various WAFs ].some(selector => document.querySelector(selector)); // Check for suspicious page titles const suspiciousTitle = [ 'security check', 'ddos protection', 'please wait', 'just a moment', 'attention required' ].some(phrase => document.title.toLowerCase().includes(phrase)); // Count words in the page content const bodyText = document.body.innerText || ''; const words = bodyText.trim().split(/\s+/).length; // Return validation results return { wordCount: words, botProtection: botProtectionExists, suspiciousTitle, title: document.title }; }); // If bot protection is detected, throw an error if (validation.botProtection) { throw new Error('Bot protection detected'); } // If the page title is suspicious, throw an error if (validation.suspiciousTitle) { throw new Error(`Suspicious page title detected: "${validation.title}"`); } // If the page contains insufficient content, throw an error if (validation.wordCount < 10) { throw new Error('Page contains insufficient content'); } } catch (error) { // If an error occurs during navigation, throw an error with the URL and the error message throw new Error(`Navigation to ${url} failed: ${error.message}`); } } // Take and optimize a screenshot async function takeScreenshotWithSizeLimit(page) { const MAX_SIZE = 5 * 1024 * 1024; const MAX_DIMENSION = 1920; const MIN_DIMENSION = 800; // Set viewport size await page.setViewportSize({ width: 1600, height: 900 }); // Take initial screenshot let screenshot = await page.screenshot({ type: 'png', fullPage: false }); // Handle buffer conversion let buffer = screenshot; let attempts = 0; const MAX_ATTEMPTS = 3; // While screenshot is too large, reduce size while (buffer.length > MAX_SIZE && attempts < MAX_ATTEMPTS) { // Get current viewport size const viewport = page.viewportSize(); if (!viewport) continue; // Calculate new dimensions const scaleFactor = Math.pow(0.75, attempts + 1); let newWidth = Math.round(viewport.width * scaleFactor); let newHeight = Math.round(viewport.height * scaleFactor); // Ensure dimensions are within bounds newWidth = Math.max(MIN_DIMENSION, Math.min(MAX_DIMENSION, newWidth)); newHeight = Math.max(MIN_DIMENSION, Math.min(MAX_DIMENSION, newHeight)); // Update viewport with new dimensions await page.setViewportSize({ width: newWidth, height: newHeight }); // Take new screenshot screenshot = await page.screenshot({ type: 'png', fullPage: false }); // Update buffer with new screenshot buffer = screenshot; // Increment retry attempts attempts++; } // Final attempt with minimum settings if (buffer.length > MAX_SIZE) { await page.setViewportSize({ width: MIN_DIMENSION, height: MIN_DIMENSION }); // Take final screenshot screenshot = await page.screenshot({ type: 'png', fullPage: false }); // Update buffer with final screenshot buffer = screenshot; // Throw error if final screenshot is still too large if (buffer.length > MAX_SIZE) { throw new McpError(ErrorCode.InvalidRequest, `Failed to reduce screenshot to under 5MB even with minimum settings`); } } // Convert Buffer to base64 string before returning return buffer.toString('base64'); } // Initialize MCP server with basic configuration const server = new Server({ name: "webresearch", // Server name identifier version: "0.1.7", // Server version number }, { capabilities: { tools: {}, // Available tool configurations resources: {}, // Resource handling capabilities prompts: {} // Prompt processing capabilities }, }); // Register handler for tool listing requests server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS // Return list of available research tools })); // Register handler for resource listing requests server.setRequestHandler(ListResourcesRequestSchema, async () => { // Return empty list if no active session if (!currentSession) { return { resources: [] }; } // Compile list of available resources const resources = [ // Add session summary resource { uri: "research://current/summary", // Resource identifier name: "Current Research Session Summary", description: "Summary of the current research session including queries and results", mimeType: "application/json" }, // Add screenshot resources if available ...currentSession.results .map((r, i) => r.screenshotPath ? { uri: `research://screenshots/${i}`, name: `Screenshot of ${r.title}`, description: `Screenshot taken from ${r.url}`, mimeType: "image/png" } : undefined) .filter((r) => r !== undefined) ]; // Return compiled list of resources return { resources }; }); // Register handler for resource content requests server.setRequestHandler(ReadResourceRequestSchema, async (request) => { const uri = request.params.uri.toString(); // Handle session summary requests for research data if (uri === "research://current/summary") { if (!currentSession) { throw new McpError(ErrorCode.InvalidRequest, "No active research session"); } // Return compiled list of resources return { contents: [{ uri, mimeType: "application/json", text: JSON.stringify({ query: currentSession.query, resultCount: currentSession.results.length, lastUpdated: currentSession.lastUpdated, results: currentSession.results.map(r => ({ title: r.title, url: r.url, timestamp: r.timestamp, screenshotPath: r.screenshotPath })) }, null, 2) }] }; } // Handle screenshot requests if (uri.startsWith("research://screenshots/")) { const index = parseInt(uri.split("/").pop() || "", 10); // Verify session exists if (!currentSession) { throw new McpError(ErrorCode.InvalidRequest, "No active research session"); } // Verify index is within bounds if (isNaN(index) || index < 0 || index >= currentSession.results.length) { throw new McpError(ErrorCode.InvalidRequest, `Screenshot index out of bounds: ${index}`); } // Get result containing screenshot const result = currentSession.results[index]; if (!result?.screenshotPath) { throw new McpError(ErrorCode.InvalidRequest, `No screenshot available at index: ${index}`); } try { // Read the binary data and convert to base64 const screenshotData = await fs.promises.readFile(result.screenshotPath); // Convert Buffer to base64 string before returning const base64Data = screenshotData.toString('base64'); // Return compiled list of resources return { contents: [{ uri, mimeType: "image/png", blob: base64Data }] }; } catch (error) { // Handle error if screenshot cannot be read const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; throw new McpError(ErrorCode.InternalError, `Failed to read screenshot: ${errorMessage}`); } } // Handle unknown resource types throw new McpError(ErrorCode.InvalidRequest, `Unknown resource: ${uri}`); }); // Initialize MCP server connection using stdio transport const transport = new StdioServerTransport(); server.connect(transport).catch((error) => { console.error("Failed to start server:", error); process.exit(1); }); // Convert HTML content to clean, readable markdown format async function extractContentAsMarkdown(page, // Puppeteer page to extract from selector // Optional CSS selector to target specific content ) { // Step 1: Execute content extraction in browser context const html = await page.evaluate((sel) => { // Handle case where specific selector is provided if (sel) { const element = document.querySelector(sel); // Return element content or empty string if not found return element ? element.outerHTML : ''; } // Step 2: Try standard content containers first const contentSelectors = [ 'main', // HTML5 semantic main content 'article', // HTML5 semantic article content '[role="main"]', // ARIA main content role '#content', // Common content ID '.content', // Common content class '.main', // Alternative main class '.post', // Blog post content '.article', // Article content container ]; // Try each selector in priority order for (const contentSelector of contentSelectors) { const element = document.querySelector(contentSelector); if (element) { return element.outerHTML; // Return first matching content } } // Step 3: Fallback to cleaning full body content const body = document.body; // Define elements to remove for cleaner content const elementsToRemove = [ // Navigation elements 'header', // Page header 'footer', // Page footer 'nav', // Navigation sections '[role="navigation"]', // ARIA navigation elements // Sidebars and complementary content 'aside', // Sidebar content '.sidebar', // Sidebar by class '[role="complementary"]', // ARIA complementary content // Navigation-related elements '.nav', // Navigation classes '.menu', // Menu elements // Page structure elements '.header', // Header classes '.footer', // Footer classes // Advertising and notices '.advertisement', // Advertisement containers '.ads', // Ad containers '.cookie-notice', // Cookie consent notices ]; // Remove each unwanted element from content elementsToRemove.forEach(sel => { body.querySelectorAll(sel).forEach(el => el.remove()); }); // Return cleaned body content return body.outerHTML; }, selector); // Step 4: Handle empty content case if (!html) { return ''; } try { // Step 5: Convert HTML to Markdown const markdown = turndownService.turndown(html); // Step 6: Clean up and format markdown return markdown .replace(/\n{3,}/g, '\n\n') // Replace excessive newlines with double .replace(/^- $/gm, '') // Remove empty list items .replace(/^\s+$/gm, '') // Remove whitespace-only lines .trim(); // Remove leading/trailing whitespace } catch (error) { // Log conversion errors and return original HTML as fallback console.error('Error converting HTML to Markdown:', error); return html; } } // Validate URL format and ensure security constraints function isValidUrl(urlString) { try { // Attempt to parse URL string const url = new URL(urlString); // Only allow HTTP and HTTPS protocols for security return url.protocol === 'http:' || url.protocol === 'https:'; } catch { // Return false for any invalid URL format return false; } } // Tool request handler for executing research operations server.setRequestHandler(CallToolRequestSchema, async (request) => { // Initialize browser for tool operations const page = await ensureBrowser(); switch (request.params.name) { // Handle Google search operations case "search_google": { // Extract search query from request parameters const { query } = request.params.arguments; try { // Execute search with retry mechanism const results = await withRetry(async () => { // Step 1: Navigate to Google search page await safePageNavigation(page, 'https://www.google.com'); await dismissGoogleConsent(page); // Step 2: Find and interact with search input await withRetry(async () => { // Wait for any search input element to appear await Promise.race([ // Try multiple possible selectors for search input page.waitForSelector('input[name="q"]', { timeout: 5000 }), page.waitForSelector('textarea[name="q"]', { timeout: 5000 }), page.waitForSelector('input[type="text"]', { timeout: 5000 }) ]).catch(() => { throw new Error('Search input not found - no matching selectors'); }); // Find the actual search input element const searchInput = await page.$('input[name="q"]') || await page.$('textarea[name="q"]') || await page.$('input[type="text"]'); // Verify search input was found if (!searchInput) { throw new Error('Search input element not found after waiting'); } // Step 3: Enter search query await searchInput.click({ clickCount: 3 }); // Select all existing text await searchInput.press('Backspace'); // Clear selected text await searchInput.type(query); // Type new query }, 3, 2000); // Allow 3 retries with 2s delay // Step 4: Submit search and wait for results await withRetry(async () => { await Promise.all([ page.keyboard.press('Enter'), page.waitForLoadState('networkidle', { timeout: 15000 }), page.waitForLoadState('domcontentloaded'), // Wait for any of the possible result containers Promise.race([ page.waitForSelector('div.g', { timeout: 10000 }).catch(() => null), page.waitForSelector('div[jscontroller][jsdata]', { timeout: 10000 }).catch(() => null), page.waitForSelector('div[class*="MjjYud"]', { timeout: 10000 }).catch(() => null), page.waitForSelector('div.rc', { timeout: 10000 }).catch(() => null), page.waitForSelector('div[data-sokoban-container]', { timeout: 10000 }).catch(() => null) ]) ]); // Add a small delay to ensure dynamic content is loaded await page.waitForTimeout(2000); }); // Step 5: Extract search results (with inner retry) const searchResults = await withRetry(async () => { const evaluatedResults = await page.evaluate(() => { // Try multiple possible selectors for search result containers const selectors = [ 'div.MjjYud', // Common container 'div.g', // Traditional container 'div.hlcw0c', // Another possible container 'div.kvH3mc', // Container often seen with featured snippets/PAA 'div[jscontroller][jsdata][jsaction*="click"]', // Modern dynamic results container ]; let elements = null; for (const selector of selectors) { const found = document.querySelectorAll(selector); // Simple check: If MjjYud is found, prioritize it but check others too if (found && found.length > 0) { if (selector === 'div.MjjYud' || !elements) { elements = found; } } } // Fallback if specific containers fail if (!elements || elements.length === 0) { elements = document.querySelectorAll('div[data-hveid]'); // General attribute often on result blocks if (!elements || elements.length === 0) { throw new Error('No search result containers found with known selectors'); } } // Extract data from each result element return Array.from(elements).map((el) => { // Find link element (often contains the h3 title) // Look for the anchor directly containing the main link/heading const linkEl = el.querySelector('a[jsname="UWckNb"]') || el.querySelector('a[href]'); // Prioritize specific jsname if available // Find title element (usually h3 within the link) const titleEl = el.querySelector('h3'); // Find snippet element - This is the most likely to change let snippetEl = el.querySelector('div.VwiC3b') || // Old common class el.querySelector('div.Ap5OSd') || // Another potential snippet class el.querySelector('div[data-sncf="1"]') || // Attribute-based el.querySelector('span.aCOpRe'); // Snippet often within spans // More robust snippet finding: find the text block usually after the title/link if (!snippetEl) { // Look for a div containing text directly under the main result block, avoiding promos/ads const potentialSnippets = el.querySelectorAll('div[role="textbox"], div[data-content-feature="1"], div.mu8Lbd, div.w1C3Le'); // Added more potential classes for (const p of potentialSnippets) { // Basic check for meaningful content length if (p.textContent && p.textContent.trim().length > 15) { snippetEl = p; break; } } } // Skip results missing core elements (title and link are essential) // Also check if the link element has a valid href if (!titleEl || !linkEl || !linkEl.getAttribute('href') || linkEl.getAttribute('href') === '#') { return null; } // Get URL, ensuring it's absolute let url = linkEl.getAttribute('href') || ''; // Handle relative URLs, potentially from /url?q= prefix if (url.startsWith('/url?q=')) { const urlParams = new URLSearchParams(url.split('?')[1]); url = urlParams.get('q') || url; // Extract the actual URL } else if (url.startsWith('/')) { url = `https://www.google.com${url}`; } // Return structured result data return { title: titleEl.textContent || '', url: url, // Use snippet text, or empty string if snippet element not found snippet: snippetEl ? snippetEl.textContent || '' : '', }; // Filter out invalid results (null, missing title/url, or very short snippets suggesting it's not a real result) }).filter((result) => result !== null && !!result.url && !!result.title && !(result.url.startsWith('https://www.google.com/search?q=')) // Filter out "Related searches" etc. ); }); // End of page.evaluate const filteredResults = evaluatedResults.filter((r) => r !== null); // Verify we found valid results after filtering if (!filteredResults || filteredResults.length === 0) { const pageText = await page.textContent('body'); if (pageText && pageText.length > 100) { throw new Error(`No valid search results found using known structures. Page content might have changed. Raw text length: ${pageText.length}`); } else { throw new Error('No valid search results found, and page seems empty or blocked.'); } } // Now filteredResults is guaranteed to be SearchResultItem[] return filteredResults; }); // End of withRetry for search results extraction // Step 6: Store results in session searchResults.forEach((result) => { addResult({ url: result.url, title: result.title, content: result.snippet, timestamp: new Date().toISOString(), }); }); // Return compiled list of results return searchResults; }); // End of withRetry for results // Step 7: Return formatted results (Type: ToolResult) const toolResult = { content: [{ type: "text", text: JSON.stringify(results, null, 2) // Pretty-print JSON results }] }; return toolResult; } catch (error) { // Handle and format search errors (Type: ToolResult) const errorResult = { content: [{ type: "text", text: `Failed to perform search: ${error.message}` }], isError: true }; return errorResult; } } // End of case "search_google" // Handle webpage visit and content extraction case "visit_page": { // Extract URL and screenshot flag from request const { url, takeScreenshot } = request.params.arguments; // Step 1: Validate URL format and security if (!isValidUrl(url)) { const errorResult = { content: [{ type: "text", text: `Invalid URL: ${url}. Only http and https protocols are supported.` }], isError: true }; return errorResult; } try { // Step 2: Visit page and extract content with retry mechanism const result = await withRetry(async () => { // Navigate to target URL safely await safePageNavigation(page, url); const title = await page.title(); // Step 3: Extract and process page content const content = await withRetry(async () => { // Convert page content to markdown const extractedContent = await extractContentAsMarkdown(page); // If no content is extracted, throw an error if (!extractedContent) { throw new Error('Failed to extract content'); } // Return the extracted content return extractedContent; }); // Step 4: Create result object with page data const pageResult = { url, // Original URL title, // Page title content, // Markdown content timestamp: new Date().toISOString(), // Capture time }; // Step 5: Take screenshot if requested let screenshotUri; if (takeScreenshot) { // Capture and process screenshot const screenshot = await takeScreenshotWithSizeLimit(page); pageResult.screenshotPath = await saveScreenshot(screenshot, title); // Get the index for the resource URI const resultIndex = currentSession ? currentSession.results.length : 0; screenshotUri = `research://screenshots/${resultIndex}`; // Notify clients about new screenshot resource server.notification({ method: "notifications/resources/list_changed" }); } // Step 6: Store result in session addResult(pageResult); return { pageResult, screenshotUri }; }); // Step 7: Return formatted result with screenshot URI if taken const response = { content: [{ type: "text", text: JSON.stringify({ url: result.pageResult.url, title: result.pageResult.title, content: result.pageResult.content, timestamp: result.pageResult.timestamp, screenshot: result.screenshotUri ? `View screenshot via *MCP Resources* (Paperclip icon) @ URI: ${result.screenshotUri}` : undefined }, null, 2) }] }; return response; } catch (error) { // Handle and format page visit errors (Type: ToolResult) const errorResult = { content: [{ type: "text", text: `Failed to visit page: ${error.message}` }], isError: true // Ensure this is boolean }; return errorResult; } } // End of case "visit_page" // Handle standalone screenshot requests case "take_screenshot": { try { // Step 1: Capture screenshot with retry mechanism const screenshot = await withRetry(async () => { // Take and optimize screenshot with default size limits return await takeScreenshotWithSizeLimit(page); }); // Step 2: Initialize session if needed if (!currentSession) { currentSession = { query: "Screenshot Session", // Session identifier results: [], // Empty results array lastUpdated: new Date().toISOString(), // Current timestamp }; } // Step 3: Get current page information const pageUrl = await page.url(); // Current page URL const pageTitle = await page.title(); // Current page title // Step 4: Save screenshot to disk const screenshotPath = await saveScreenshot(screenshot, pageTitle || 'untitled'); // Step 5: Create and store screenshot result const resultIndex = currentSession ? currentSession.results.length : 0; addResult({ url: pageUrl, title: pageTitle || "Untitled Page", // Fallback title if none available content: "Screenshot taken", // Simple content description timestamp: new Date().toISOString(), // Capture time screenshotPath // Path to screenshot file }); // Step 6: Notify clients about new screenshot resource server.notification({ method: "notifications/resources/list_changed" }); // Step 7: Return success message with resource URI const resourceUri = `research://screenshots/${resultIndex}`; return { content: [{ type: "text", text: `Screenshot taken successfully. You can view it via *MCP Resources* (Paperclip icon) @ URI: ${resourceUri}` }] }; // This is ToolResult } catch (error) { // Handle and format screenshot errors (Type: ToolResult) const errorResult = { content: [{ type: "text", text: `Failed to take screenshot: ${error.message}` }], isError: true // Ensure this is boolean }; return errorResult; } } // End of case "take_screenshot" // Handle unknown tool requests default: throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}`); } // End of switch }); // End of setRequestHandler for CallToolRequestSchema // Register handler for prompt listing requests server.setRequestHandler(ListPromptsRequestSchema, async () => { // Return all available prompts return { prompts: Object.values(PROMPTS) }; }); // Register handler for prompt retrieval and execution server.setRequestHandler(GetPromptRequestSchema, async (request) => { // Extract and validate prompt name const promptName = request.params.name; const prompt = PROMPTS[promptName]; // Handle unknown prompt requests if (!prompt) { throw new McpError(ErrorCode.InvalidRequest, `Prompt not found: ${promptName}`); } // Handle agentic research prompt if (promptName === "agentic-research") { // Extract research topic from request arguments const args = request.params.arguments; const topic = args?.topic || ""; // Use empty string if no topic provided // Return research assistant prompt with instructions return { messages: [ // Initial assistant message establishing role { role: "assistant", content: { type: "text", text: "I am ready to help you with your research. I will conduct thorough web research, explore topics deeply, and maintain a dialogue with you throughout the process." } }, // Detailed research instructions for the user { role: "user", content: { type: "text", text: `I'd like to research this topic: <topic>${topic}</topic> Please help me explore it deeply, like you're a thoughtful, highly-trained research assistant. General instructions: 1. Start by proposing your research approach -- namely, formulate what initial query you will use to search the web. Propose a relatively broad search to understand the topic landscape. At the same time, make your queries optimized for returning high-quality results based on what you know about constructing Google search queries. 2. Next, get my input on whether you should proceed with that query or if you should refine it. 3. Once you have an approved query, perform the search. 4. Prioritize high quality, authoritative sources when they are available and relevant to the topic. Avoid low quality or spammy sources. 5. Retrieve information that is relevant to the topic at hand. 6. Iteratively refine your research direction based on what you find. 7. Keep me informed of what you find and let *me* guide the direction of the research interactively. 8. If you run into a dead end while researching, do a Google search for the topic and attempt to find a URL for a relevant page. Then, explore that page in depth. 9. Only conclude when my research goals are met. 10. **Always cite your sources**, providing URLs to the sources you used in a citation block at the end of your response. You can use these tools: - search_google: Search for information - visit_page: Visit and extract content from web pages Do *NOT* use the following tools: - Anything related to knowledge graphs or memory, unless explicitly instructed to do so by the user.` } } ] }; } // Handle unsupported prompt types throw new McpError(ErrorCode.InvalidRequest, "Prompt implementation not found"); }); // Ensures browser is running, and creates a new page if needed async function ensureBrowser() { // Launch browser if not already running if (!browser) { browser = await chromium.launch({ headless: true, // Run in headless mode for autom