scraperis-mcp
Version:
Model Context Protocol (MCP) integration for Scraper.is - A web scraping tool for AI assistants
256 lines (255 loc) • 11.3 kB
JavaScript
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema, ReadResourceRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import { ScraperAPI } from '../lib/scraper-api.js';
import { sendLoggingMessage } from '../utils/index.js';
import fetch from 'node-fetch';
/**
* ScraperMCPServer class for handling MCP server operations
*/
export class ScraperMCPServer {
server;
scraperApi;
screenshots;
tools;
/**
* Creates a new ScraperMCPServer
* @param apiKey The API key for Scraper.is
* @param apiBase The base URL for the Scraper.is API
* @param version The server version
*/
constructor(apiKey, apiBase = 'https://scraper.is/api', version = '0.1.0') {
this.scraperApi = new ScraperAPI(apiKey, apiBase);
this.screenshots = new Map();
// Initialize MCP server
this.server = new Server({
name: 'scraperis-mcp',
version: version,
}, {
capabilities: {
tools: {},
resources: {},
prompts: {},
logging: {}
},
});
// Define tools
this.tools = [
{
description: 'Scrape a single webpage with advanced options for content extraction. \n' +
'Always returns both markdown content and visual screenshot for rich context. \n' +
'Supports various formats including markdown, HTML, screenshots, JSON, and quick. \n' +
'The prompt should include the website URL and what data you want to extract. \n' +
"For example: 'Get me the top 10 products from producthunt.com' or \n" +
"'Extract all article titles and authors from techcrunch.com/news'",
name: 'scrape',
type: 'function',
inputSchema: {
type: 'object',
properties: {
prompt: {
type: 'string',
description: 'The prompt describing what to scrape, including the URL'
},
format: {
type: 'string',
enum: ['markdown', 'html', 'screenshot', 'json', 'quick'],
description: 'The format to return the content in'
}
},
required: ['prompt', 'format']
},
},
// Uncomment to enable screenshot tool
// {
// description: 'Take a screenshot of a webpage',
// name: 'screenshot',
// type: 'function',
// inputSchema: {
// type: 'object',
// properties: {
// url: {
// type: 'string',
// description: 'The URL to take a screenshot of'
// }
// },
// required: ['url']
// },
// }
];
// Set up request handlers
this.setupRequestHandlers();
}
/**
* Sets up the request handlers for the MCP server
*/
setupRequestHandlers() {
// Handler for listing available tools
this.server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: this.tools,
}));
// Handler for reading resources (screenshots)
this.server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
this.log('info', `ReadResourceRequestSchema: ${JSON.stringify(request.params)}`);
const screenshotURL = request.params.uri.replace('scraperis_screenshot://', '');
this.log('info', `screenshotURL: ${screenshotURL}`);
if (screenshotURL) {
try {
const imageBuffer = await fetch(screenshotURL);
const base64Image = await imageBuffer.arrayBuffer();
return {
content: [
{
uri: screenshotURL,
mimeType: 'image/png',
blob: base64Image
}
],
};
}
catch (error) {
this.log('error', `Error fetching screenshot: ${error instanceof Error ? error.message : String(error)}`);
throw new Error(`Failed to fetch screenshot: ${error instanceof Error ? error.message : String(error)}`);
}
}
this.log('error', 'Resource not found: No valid screenshot URL');
throw new Error('Resource not found');
});
// Handler for tool calls
this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
try {
const { name, arguments: args } = request.params;
this.log('info', `Received request for tool: ${name}`);
const progressToken = request.params._meta?.progressToken;
if (name === 'scrape') {
const { prompt, format } = args;
// Create progress callback
const onProgress = progressToken
? async (progress) => {
await this.server.notification({
method: 'notifications/progress',
params: {
progress: progress,
total: 100,
progressToken: progressToken
},
});
}
: undefined;
// Call the scraper API
const handlerData = await this.scraperApi.scrape(prompt, format, onProgress);
this.log('info', `Scrape completed for prompt: "${prompt.substring(0, 50)}${prompt.length > 50 ? '...' : ''}"`);
// Handle different format responses
if (format === 'markdown' && handlerData.markdown) {
return {
content: [
{
type: 'text',
text: handlerData.markdown,
}
],
isError: false
};
}
if (format === 'screenshot' && handlerData.screenshot && handlerData.screenshot.url) {
this.log('info', `Screenshot URL: ${handlerData.screenshot.url}`);
this.screenshots.set(handlerData.url || '', handlerData.screenshot.url);
this.server.notification({
method: 'notifications/resources/list_changed',
});
const resourceUri = `scraperis_screenshot://${handlerData.screenshot.url}`;
this.log('info', `Screenshot available at: ${handlerData.screenshot.url}`);
return {
content: [{
type: 'text',
text: `Screenshot taken successfully. You can view it via *MCP Resources* (Paperclip icon) @ URI: ${resourceUri}`
}],
isError: false
};
}
if (format === 'json' && handlerData.data) {
return {
content: [
{
type: 'text',
text: 'JSON Data:\n```json\n' + JSON.stringify(handlerData.data, null, 2) + '\n```',
}
],
isError: false
};
}
// Default response
return {
content: [
{
type: 'text',
text: JSON.stringify(handlerData),
},
],
isError: false
};
}
// Handle screenshot tool (if enabled)
if (name === 'screenshot') {
const { url } = args;
try {
const handlerData = await this.scraperApi.screenshot(url);
this.log('info', `Screenshot taken successfully for URL: ${url}`);
return {
content: [
{
type: 'text',
text: JSON.stringify(handlerData),
}
],
isError: false
};
}
catch (error) {
this.log('error', `Error taking screenshot for URL ${url}: ${error instanceof Error ? error.message : String(error)}`);
throw error;
}
}
throw new Error(`Unknown tool: ${name}`);
}
catch (error) {
this.log('error', `Error in tool operation: ${error}`);
return {
content: [
{
type: 'text',
text: `Error: ${error instanceof Error ? error.message : String(error)}`,
}
],
isError: true
};
}
});
}
/**
* Logs a message to the console and sends it to the MCP server
* @param level The log level ('error' or 'info')
* @param message The message to log
*/
log(level, message) {
sendLoggingMessage(this.server, level, message);
}
/**
* Starts the MCP server
*/
async start() {
try {
console.error('Initializing Scraperis MCP Server...');
const transport = new StdioServerTransport();
await this.server.connect(transport);
this.log('info', 'Scraperis MCP Server connected to stdio');
this.log('info', 'Scraperis MCP Server initialized successfully');
this.log('info', `Configuration: API URL: ${this.scraperApi['apiBase']}`);
}
catch (error) {
this.log('error', `Fatal error running server: ${error instanceof Error ? error.message : String(error)}`);
console.error('Fatal error running server:', error);
process.exit(1);
}
}
}