UNPKG

@langchain/community

Version:
1 lines 5.64 kB
{"version":3,"file":"firecrawl.cjs","names":["BaseDocumentLoader","FirecrawlApp","Document"],"sources":["../../../src/document_loaders/web/firecrawl.ts"],"sourcesContent":["import FirecrawlApp from \"@mendable/firecrawl-js\";\nimport { Document, type DocumentInterface } from \"@langchain/core/documents\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\n/**\n * Interface representing the parameters for the Firecrawl loader. It\n * includes properties such as the URL to scrape or crawl and the API key.\n */\ninterface FirecrawlLoaderParameters {\n /**\n * URL to scrape or crawl\n */\n url: string;\n\n /**\n * API key for Firecrawl. If not provided, the default value is the value of the FIRECRAWL_API_KEY environment variable.\n */\n apiKey?: string;\n\n /**\n * API URL for Firecrawl.\n */\n apiUrl?: string;\n /**\n * Mode of operation. Can be \"crawl\", \"scrape\", or \"map\". If not provided, the default value is \"crawl\".\n */\n mode?: \"crawl\" | \"scrape\" | \"map\";\n params?: Record<string, unknown>;\n}\n\ninterface FirecrawlDocument {\n markdown?: string;\n html?: string;\n rawHtml?: string;\n metadata?: Record<string, unknown>;\n}\n\n/**\n * Class representing a document loader for loading data from\n * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.\n * @example\n * ```typescript\n * const loader = new FireCrawlLoader({\n * url: \"{url}\",\n * apiKey: \"{apiKey}\",\n * mode: \"crawl\"\n * });\n * const docs = await loader.load();\n * ```\n */\nexport class FireCrawlLoader extends BaseDocumentLoader {\n private apiKey: string;\n\n private apiUrl?: string;\n\n private url: string;\n\n private mode: \"crawl\" | \"scrape\" | \"map\";\n\n private params?: Record<string, unknown>;\n\n constructor(loaderParams: FirecrawlLoaderParameters) {\n super();\n const {\n apiKey = getEnvironmentVariable(\"FIRECRAWL_API_KEY\"),\n apiUrl,\n url,\n mode = \"crawl\",\n params,\n } = loaderParams;\n if (!apiKey) {\n throw new Error(\n \"Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.\"\n );\n }\n\n this.apiKey = apiKey;\n this.apiUrl = apiUrl;\n this.url = url;\n this.mode = mode;\n this.params = params;\n }\n\n /**\n * Loads data from Firecrawl.\n * @returns An array of Documents representing the retrieved data.\n * @throws An error if the data could not be loaded.\n */\n public async load(): Promise<DocumentInterface[]> {\n const params: ConstructorParameters<typeof FirecrawlApp>[0] = {\n apiKey: this.apiKey,\n };\n if (this.apiUrl !== undefined) {\n params.apiUrl = this.apiUrl;\n }\n const app = new FirecrawlApp(params);\n let firecrawlDocs: FirecrawlDocument[];\n\n if (this.mode === \"scrape\") {\n // oxlint-disable-next-line typescript/no-explicit-any\n const response = await app.scrapeUrl(this.url, this.params as any);\n if (!response.success) {\n throw new Error(\n `Firecrawl: Failed to scrape URL. Error: ${response.error}`\n );\n }\n firecrawlDocs = [response] as FirecrawlDocument[];\n } else if (this.mode === \"crawl\") {\n const response = await app.crawlUrl(this.url, this.params);\n if (!response.success) {\n throw new Error(\n `Firecrawl: Failed to crawl URL. Error: ${response.error}`\n );\n }\n firecrawlDocs = response.data as FirecrawlDocument[];\n } else if (this.mode === \"map\") {\n const response = await app.mapUrl(this.url, this.params);\n if (!response.success) {\n throw new Error(\n `Firecrawl: Failed to map URL. Error: ${response.error}`\n );\n }\n firecrawlDocs = response.links as FirecrawlDocument[];\n\n return firecrawlDocs.map(\n (doc) =>\n new Document({\n pageContent: JSON.stringify(doc),\n })\n );\n } else {\n throw new Error(\n `Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`\n );\n }\n\n return firecrawlDocs.map(\n (doc) =>\n new Document({\n pageContent: doc.markdown || doc.html || doc.rawHtml || \"\",\n metadata: doc.metadata || {},\n })\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAmDA,IAAa,kBAAb,cAAqCA,sCAAAA,mBAAmB;CACtD;CAEA;CAEA;CAEA;CAEA;CAEA,YAAY,cAAyC;AACnD,SAAO;EACP,MAAM,EACJ,UAAA,GAAA,0BAAA,wBAAgC,oBAAoB,EACpD,QACA,KACA,OAAO,SACP,WACE;AACJ,MAAI,CAAC,OACH,OAAM,IAAI,MACR,6GACD;AAGH,OAAK,SAAS;AACd,OAAK,SAAS;AACd,OAAK,MAAM;AACX,OAAK,OAAO;AACZ,OAAK,SAAS;;;;;;;CAQhB,MAAa,OAAqC;EAChD,MAAM,SAAwD,EAC5D,QAAQ,KAAK,QACd;AACD,MAAI,KAAK,WAAW,KAAA,EAClB,QAAO,SAAS,KAAK;EAEvB,MAAM,MAAM,IAAIC,uBAAAA,QAAa,OAAO;EACpC,IAAI;AAEJ,MAAI,KAAK,SAAS,UAAU;GAE1B,MAAM,WAAW,MAAM,IAAI,UAAU,KAAK,KAAK,KAAK,OAAc;AAClE,OAAI,CAAC,SAAS,QACZ,OAAM,IAAI,MACR,2CAA2C,SAAS,QACrD;AAEH,mBAAgB,CAAC,SAAS;aACjB,KAAK,SAAS,SAAS;GAChC,MAAM,WAAW,MAAM,IAAI,SAAS,KAAK,KAAK,KAAK,OAAO;AAC1D,OAAI,CAAC,SAAS,QACZ,OAAM,IAAI,MACR,0CAA0C,SAAS,QACpD;AAEH,mBAAgB,SAAS;aAChB,KAAK,SAAS,OAAO;GAC9B,MAAM,WAAW,MAAM,IAAI,OAAO,KAAK,KAAK,KAAK,OAAO;AACxD,OAAI,CAAC,SAAS,QACZ,OAAM,IAAI,MACR,wCAAwC,SAAS,QAClD;AAEH,mBAAgB,SAAS;AAEzB,UAAO,cAAc,KAClB,QACC,IAAIC,0BAAAA,SAAS,EACX,aAAa,KAAK,UAAU,IAAI,EACjC,CAAC,CACL;QAED,OAAM,IAAI,MACR,sBAAsB,KAAK,KAAK,uCACjC;AAGH,SAAO,cAAc,KAClB,QACC,IAAIA,0BAAAA,SAAS;GACX,aAAa,IAAI,YAAY,IAAI,QAAQ,IAAI,WAAW;GACxD,UAAU,IAAI,YAAY,EAAE;GAC7B,CAAC,CACL"}