UNPKG

mcp-omnisearch

Version:

MCP server for integrating Omnisearch with LLMs

84 lines (83 loc) 3.65 kB
// Firecrawl Crawl Provider Implementation import { ErrorType, ProviderError, } from '../../common/types.js'; import { is_api_key_valid } from '../../common/utils.js'; import { config, FIRECRAWL_API_KEY } from '../../config/env.js'; export class FirecrawlCrawlProviderImpl { constructor() { this.name = 'firecrawl'; this.description = 'Crawl a website with Firecrawl, exploring multiple pages from a starting URL. Best for comprehensive content extraction from entire websites or sections.'; this.api_url = 'https://api.firecrawl.dev/v1/crawl'; if (!is_api_key_valid(FIRECRAWL_API_KEY, 'firecrawl')) { throw new Error('Invalid Firecrawl API key'); } } async crawl_url(url, options) { try { const max_depth = options?.max_depth || 2; const format = options?.format || 'markdown'; // Set timeout const timeout = config.firecrawl.crawl.timeout; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeout); // Prepare request body const request_body = { url, maxDepth: max_depth, formats: [format], blockAds: true, }; // Call Firecrawl API const response = await fetch(this.api_url, { method: 'POST', headers: { 'Authorization': `Bearer ${FIRECRAWL_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify(request_body), signal: controller.signal, }); clearTimeout(timeoutId); // Handle response if (!response.ok) { if (response.status === 429) { throw new ProviderError(ErrorType.RATE_LIMIT, 'Firecrawl rate limit exceeded', this.name); } throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${response.status} ${response.statusText}`, this.name); } const data = await response.json(); if (!data.success) { throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${data.error || 'Unknown error'}`, this.name); } // Process response // The API returns an array of pages with content const pages = data.data.pages || []; const content = pages .map((page) => page.content) .join('\n\n'); return { content, pages: pages.map((page) => ({ url: page.url, content: page.content, title: page.title, })), metadata: { root_url: url, pages_crawled: pages.length, max_depth, format, timestamp: new Date().toISOString(), }, source_provider: this.name, }; } catch (error) { if (error instanceof Error) { if (error.name === 'AbortError') { throw new ProviderError(ErrorType.API_ERROR, `Firecrawl crawl request timed out after ${config.firecrawl.crawl.timeout}ms`, this.name); } } throw new ProviderError(ErrorType.PROVIDER_ERROR, `Error crawling URL: ${error instanceof Error ? error.message : String(error)}`, this.name, error); } } }