UNPKG

mcp-omnisearch

Version:

MCP server for integrating Omnisearch with LLMs

94 lines (93 loc) 4.13 kB
// Firecrawl Extract Provider Implementation import { ErrorType, ProviderError, } from '../../common/types.js'; import { is_api_key_valid } from '../../common/utils.js'; import { config, FIRECRAWL_API_KEY } from '../../config/env.js'; export class FirecrawlExtractProviderImpl { constructor() { this.name = 'firecrawl'; this.description = 'Extract structured data from webpages with Firecrawl, using AI to convert web content into structured JSON. Best for extracting specific information from pages.'; this.api_url = 'https://api.firecrawl.dev/v1/scrape'; if (!is_api_key_valid(FIRECRAWL_API_KEY, 'firecrawl')) { throw new Error('Invalid Firecrawl API key'); } } async extract_data(url, prompt) { try { // Set timeout const timeout = config.firecrawl.extract.timeout; const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeout); // Prepare request body with JSON extraction options const request_body = { url, formats: ['json'], blockAds: true, jsonOptions: { prompt: prompt, }, }; // Call Firecrawl API const response = await fetch(this.api_url, { method: 'POST', headers: { 'Authorization': `Bearer ${FIRECRAWL_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify(request_body), signal: controller.signal, }); clearTimeout(timeoutId); // Handle response if (!response.ok) { if (response.status === 429) { throw new ProviderError(ErrorType.RATE_LIMIT, 'Firecrawl rate limit exceeded', this.name); } throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${response.status} ${response.statusText}`, this.name); } const data = await response.json(); if (!data.success) { throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${data.error || 'Unknown error'}`, this.name); } // Process response let extracted_data = {}; // Try to parse the JSON data if it exists if (data.data.json) { try { // If it's already an object, use it directly if (typeof data.data.json === 'object') { extracted_data = data.data.json; } else { // Otherwise, try to parse it as a JSON string extracted_data = JSON.parse(data.data.json); } } catch (error) { // If parsing fails, use the raw string extracted_data = { raw: data.data.json }; } } else if (data.data.llm_extraction) { // If there's LLM extraction data, use that extracted_data = data.data.llm_extraction; } return { data: extracted_data, metadata: { url: data.data.metadata?.sourceURL || url, prompt: prompt, timestamp: new Date().toISOString(), }, source_provider: this.name, }; } catch (error) { if (error instanceof Error) { if (error.name === 'AbortError') { throw new ProviderError(ErrorType.API_ERROR, `Firecrawl extract request timed out after ${config.firecrawl.extract.timeout}ms`, this.name); } } throw new ProviderError(ErrorType.PROVIDER_ERROR, `Error extracting data: ${error instanceof Error ? error.message : String(error)}`, this.name, error); } } }