UNPKG

mcp-omnisearch

Version:

MCP server for integrating Omnisearch with LLMs

95 lines (94 loc) 4.85 kB
import { ErrorType, ProviderError, } from '../../../common/types.js'; import { is_valid_url, retry_with_backoff, validate_api_key, } from '../../../common/utils.js'; import { config } from '../../../config/env.js'; export class TavilyExtractProvider { constructor() { this.name = 'tavily_extract'; this.description = 'Extract web page content from single or multiple URLs using Tavily Extract. Efficiently converts web content into clean, processable text with configurable extraction depth and optional image extraction. Returns both combined and individual URL content. Best for content analysis, data collection, and research.'; } async process_content(url, extract_depth = 'basic') { const urls = Array.isArray(url) ? url : [url]; // Validate all URLs for (const u of urls) { if (!is_valid_url(u)) { throw new ProviderError(ErrorType.INVALID_INPUT, `Invalid URL provided: ${u}`, this.name); } } const extract_request = async () => { const api_key = validate_api_key(config.processing.tavily_extract.api_key, this.name); try { const response = await fetch(`${config.processing.tavily_extract.base_url}/extract`, { method: 'POST', headers: { Authorization: `Bearer ${api_key}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ urls: urls, include_images: false, extract_depth, }), signal: AbortSignal.timeout(config.processing.tavily_extract.timeout), }); if (!response.ok) { // Handle error responses based on status codes switch (response.status) { case 400: throw new ProviderError(ErrorType.INVALID_INPUT, 'Invalid request parameters', this.name); case 401: throw new ProviderError(ErrorType.API_ERROR, 'Invalid API key', this.name); case 403: throw new ProviderError(ErrorType.API_ERROR, 'API key does not have access to this endpoint', this.name); case 429: throw new ProviderError(ErrorType.RATE_LIMIT, 'Rate limit exceeded', this.name); case 500: throw new ProviderError(ErrorType.PROVIDER_ERROR, 'Tavily Extract API internal error', this.name); default: throw new ProviderError(ErrorType.API_ERROR, `Unexpected error: ${response.statusText}`, this.name); } } const data = (await response.json()); // Check if there are any results if (data.results.length === 0) { throw new ProviderError(ErrorType.PROVIDER_ERROR, 'No content extracted from URL', this.name); } // Map results to raw_contents array const raw_contents = data.results.map((result) => ({ url: result.url, content: result.raw_content, })); // Combine all results into a single content string const combined_content = raw_contents .map((result) => result.content) .join('\n\n'); // Calculate total word count const word_count = combined_content .split(/\s+/) .filter(Boolean).length; // Include any failed URLs in metadata const failed_urls = data.failed_results.length > 0 ? data.failed_results : undefined; return { content: combined_content, raw_contents, metadata: { word_count, failed_urls, urls_processed: urls.length, successful_extractions: data.results.length, extract_depth, }, source_provider: this.name, }; } catch (error) { if (error instanceof ProviderError) { throw error; } throw new ProviderError(ErrorType.API_ERROR, `Failed to extract content: ${error instanceof Error ? error.message : 'Unknown error'}`, this.name); } }; return retry_with_backoff(extract_request); } }