UNPKG

@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

98 lines (83 loc) 2.34 kB
import { CrawlImpl, CrawlSuccessResult } from '../type'; import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType'; import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout'; interface FirecrawlMetadata { description: string; keywords: string; language: string; ogDescription?: string; ogImage?: string; ogLocaleAlternate?: string[]; ogSiteName?: string; ogTitle?: string; ogUrl?: string; robots: string; statusCode: number; sourceURL: string; title: string; } interface FirecrawlResults { html?: string; markdown?: string; metadata: FirecrawlMetadata; } interface FirecrawlResponse { success: boolean; data: FirecrawlResults; } export const firecrawl: CrawlImpl = async (url) => { // Get API key from environment variable const apiKey = process.env.FIRECRAWL_API_KEY; const baseUrl = process.env.FIRECRAWL_URL || 'https://api.firecrawl.dev/v1'; let res: Response; try { res = await withTimeout( fetch(`${baseUrl}/scrape`, { body: JSON.stringify({ formats: ["markdown"], // ["markdown", "html"] url, }), headers: { 'Authorization': !apiKey ? '' : `Bearer ${apiKey}`, 'Content-Type': 'application/json', }, method: 'POST', }), DEFAULT_TIMEOUT, ); } catch (e) { const error = e as Error; if (error.message === 'fetch failed') { throw new NetworkConnectionError(); } if (error instanceof TimeoutError) { throw error; } throw e; } if (!res.ok) { if (res.status === 404) { throw new PageNotFoundError(res.statusText); } throw new Error(`Firecrawl request failed with status ${res.status}: ${res.statusText}`); } try { const data = (await res.json()) as FirecrawlResponse; // Check if content is empty or too short if (!data.data.markdown || data.data.markdown.length < 100) { return; } return { content: data.data.markdown, contentType: 'text', description: data.data.metadata.description, length: data.data.markdown.length, siteName: new URL(url).hostname, title: data.data.metadata.title, url: url, } satisfies CrawlSuccessResult; } catch (error) { console.error(error); } return; };