UNPKG

mcp-omnisearch

Version:

MCP server for integrating Omnisearch with LLMs

93 lines (92 loc) 5.15 kB
import { ErrorType, ProviderError, } from '../../../common/types.js'; import { is_valid_url, retry_with_backoff, validate_api_key, } from '../../../common/utils.js'; import { config } from '../../../config/env.js'; export class FirecrawlMapProvider { constructor() { this.name = 'firecrawl_map'; this.description = 'Fast URL collection from websites for comprehensive site mapping using Firecrawl. Efficiently discovers all accessible URLs within a domain without extracting content. Best for site auditing, URL discovery, and preparing for targeted content extraction.'; } async process_content(url, extract_depth = 'basic') { // Map only works with a single URL (the starting point) const map_url = Array.isArray(url) ? url[0] : url; // Validate URL if (!is_valid_url(map_url)) { throw new ProviderError(ErrorType.INVALID_INPUT, `Invalid URL provided: ${map_url}`, this.name); } const map_request = async () => { const api_key = validate_api_key(config.processing.firecrawl_map.api_key, this.name); try { // Start the map operation const map_response = await fetch(config.processing.firecrawl_map.base_url, { method: 'POST', headers: { 'Authorization': `Bearer ${api_key}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ url: map_url, // Use advanced options if extract_depth is advanced limit: extract_depth === 'advanced' ? 200 : 50, ignoreSitemap: false, // Use sitemap for better coverage includeSubdomains: false, // Only include URLs from the same domain }), signal: AbortSignal.timeout(config.processing.firecrawl_map.timeout), }); if (!map_response.ok) { // Handle error responses based on status codes switch (map_response.status) { case 400: throw new ProviderError(ErrorType.INVALID_INPUT, 'Invalid request parameters', this.name); case 401: throw new ProviderError(ErrorType.API_ERROR, 'Invalid API key', this.name); case 403: throw new ProviderError(ErrorType.API_ERROR, 'API key does not have access to this endpoint', this.name); case 429: throw new ProviderError(ErrorType.RATE_LIMIT, 'Rate limit exceeded', this.name); case 500: throw new ProviderError(ErrorType.PROVIDER_ERROR, 'Firecrawl API internal error', this.name); default: throw new ProviderError(ErrorType.API_ERROR, `Unexpected error: ${map_response.statusText}`, this.name); } } const map_data = (await map_response.json()); // Check if there was an error in the response if (!map_data.success || map_data.error) { throw new ProviderError(ErrorType.PROVIDER_ERROR, `Error mapping website: ${map_data.error || 'Unknown error'}`, this.name); } // Check if we have links if (!map_data.links || map_data.links.length === 0) { throw new ProviderError(ErrorType.PROVIDER_ERROR, 'No URLs discovered during mapping', this.name); } // Format the links as a list with descriptions const formatted_content = `# Site Map for ${map_url}\n\n` + `Found ${map_data.links.length} URLs:\n\n` + map_data.links.map((url) => `- ${url}`).join('\n'); // Create a single raw_content entry with all URLs const raw_contents = [{ url: map_url, content: formatted_content, }]; return { content: formatted_content, raw_contents, metadata: { title: `Site Map for ${map_url}`, word_count: map_data.links.length, // Count URLs as "words" urls_processed: 1, // We only processed the starting URL successful_extractions: 1, extract_depth, }, source_provider: this.name, }; } catch (error) { if (error instanceof ProviderError) { throw error; } throw new ProviderError(ErrorType.API_ERROR, `Failed to map website: ${error instanceof Error ? error.message : 'Unknown error'}`, this.name); } }; return retry_with_backoff(map_request); } }