mcp-omnisearch
Version:
MCP server for integrating Omnisearch with LLMs
84 lines (83 loc) • 3.65 kB
JavaScript
// Firecrawl Crawl Provider Implementation
import { ErrorType, ProviderError, } from '../../common/types.js';
import { is_api_key_valid } from '../../common/utils.js';
import { config, FIRECRAWL_API_KEY } from '../../config/env.js';
export class FirecrawlCrawlProviderImpl {
constructor() {
this.name = 'firecrawl';
this.description = 'Crawl a website with Firecrawl, exploring multiple pages from a starting URL. Best for comprehensive content extraction from entire websites or sections.';
this.api_url = 'https://api.firecrawl.dev/v1/crawl';
if (!is_api_key_valid(FIRECRAWL_API_KEY, 'firecrawl')) {
throw new Error('Invalid Firecrawl API key');
}
}
async crawl_url(url, options) {
try {
const max_depth = options?.max_depth || 2;
const format = options?.format || 'markdown';
// Set timeout
const timeout = config.firecrawl.crawl.timeout;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
// Prepare request body
const request_body = {
url,
maxDepth: max_depth,
formats: [format],
blockAds: true,
};
// Call Firecrawl API
const response = await fetch(this.api_url, {
method: 'POST',
headers: {
'Authorization': `Bearer ${FIRECRAWL_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(request_body),
signal: controller.signal,
});
clearTimeout(timeoutId);
// Handle response
if (!response.ok) {
if (response.status === 429) {
throw new ProviderError(ErrorType.RATE_LIMIT, 'Firecrawl rate limit exceeded', this.name);
}
throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${response.status} ${response.statusText}`, this.name);
}
const data = await response.json();
if (!data.success) {
throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${data.error || 'Unknown error'}`, this.name);
}
// Process response
// The API returns an array of pages with content
const pages = data.data.pages || [];
const content = pages
.map((page) => page.content)
.join('\n\n');
return {
content,
pages: pages.map((page) => ({
url: page.url,
content: page.content,
title: page.title,
})),
metadata: {
root_url: url,
pages_crawled: pages.length,
max_depth,
format,
timestamp: new Date().toISOString(),
},
source_provider: this.name,
};
}
catch (error) {
if (error instanceof Error) {
if (error.name === 'AbortError') {
throw new ProviderError(ErrorType.API_ERROR, `Firecrawl crawl request timed out after ${config.firecrawl.crawl.timeout}ms`, this.name);
}
}
throw new ProviderError(ErrorType.PROVIDER_ERROR, `Error crawling URL: ${error instanceof Error ? error.message : String(error)}`, this.name, error);
}
}
}