mcp-omnisearch
Version:
MCP server for integrating Omnisearch with LLMs
94 lines (93 loc) • 4.13 kB
JavaScript
// Firecrawl Extract Provider Implementation
import { ErrorType, ProviderError, } from '../../common/types.js';
import { is_api_key_valid } from '../../common/utils.js';
import { config, FIRECRAWL_API_KEY } from '../../config/env.js';
export class FirecrawlExtractProviderImpl {
constructor() {
this.name = 'firecrawl';
this.description = 'Extract structured data from webpages with Firecrawl, using AI to convert web content into structured JSON. Best for extracting specific information from pages.';
this.api_url = 'https://api.firecrawl.dev/v1/scrape';
if (!is_api_key_valid(FIRECRAWL_API_KEY, 'firecrawl')) {
throw new Error('Invalid Firecrawl API key');
}
}
async extract_data(url, prompt) {
try {
// Set timeout
const timeout = config.firecrawl.extract.timeout;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
// Prepare request body with JSON extraction options
const request_body = {
url,
formats: ['json'],
blockAds: true,
jsonOptions: {
prompt: prompt,
},
};
// Call Firecrawl API
const response = await fetch(this.api_url, {
method: 'POST',
headers: {
'Authorization': `Bearer ${FIRECRAWL_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(request_body),
signal: controller.signal,
});
clearTimeout(timeoutId);
// Handle response
if (!response.ok) {
if (response.status === 429) {
throw new ProviderError(ErrorType.RATE_LIMIT, 'Firecrawl rate limit exceeded', this.name);
}
throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${response.status} ${response.statusText}`, this.name);
}
const data = await response.json();
if (!data.success) {
throw new ProviderError(ErrorType.API_ERROR, `Firecrawl API error: ${data.error || 'Unknown error'}`, this.name);
}
// Process response
let extracted_data = {};
// Try to parse the JSON data if it exists
if (data.data.json) {
try {
// If it's already an object, use it directly
if (typeof data.data.json === 'object') {
extracted_data = data.data.json;
}
else {
// Otherwise, try to parse it as a JSON string
extracted_data = JSON.parse(data.data.json);
}
}
catch (error) {
// If parsing fails, use the raw string
extracted_data = { raw: data.data.json };
}
}
else if (data.data.llm_extraction) {
// If there's LLM extraction data, use that
extracted_data = data.data.llm_extraction;
}
return {
data: extracted_data,
metadata: {
url: data.data.metadata?.sourceURL || url,
prompt: prompt,
timestamp: new Date().toISOString(),
},
source_provider: this.name,
};
}
catch (error) {
if (error instanceof Error) {
if (error.name === 'AbortError') {
throw new ProviderError(ErrorType.API_ERROR, `Firecrawl extract request timed out after ${config.firecrawl.extract.timeout}ms`, this.name);
}
}
throw new ProviderError(ErrorType.PROVIDER_ERROR, `Error extracting data: ${error instanceof Error ? error.message : String(error)}`, this.name, error);
}
}
}