UNPKG

@langchain/community

Version:
1 lines 2.67 kB
{"version":3,"file":"html.cjs","names":["BaseDocumentLoader","AsyncCaller","Document"],"sources":["../../../src/document_loaders/web/html.ts"],"sourcesContent":["import {\n AsyncCaller,\n AsyncCallerParams,\n} from \"@langchain/core/utils/async_caller\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport { Document } from \"@langchain/core/documents\";\nimport type { DocumentLoader } from \"@langchain/core/document_loaders/base\";\n\n/**\n * Represents the parameters for configuring WebBaseLoaders. It extends the\n * AsyncCallerParams interface and adds additional parameters specific to\n * web-based loaders.\n */\nexport interface WebBaseLoaderParams extends AsyncCallerParams {\n /**\n * The timeout in milliseconds for the fetch request. Defaults to 10s.\n */\n timeout?: number;\n\n /**\n * The text decoder to use to decode the response. Defaults to UTF-8.\n */\n textDecoder?: TextDecoder;\n /**\n * The headers to use in the fetch request.\n */\n headers?: HeadersInit;\n}\n\nexport interface WebBaseLoader extends DocumentLoader {\n timeout: number;\n\n caller: AsyncCaller;\n\n textDecoder?: TextDecoder;\n\n headers?: HeadersInit;\n}\n\nexport class HTMLWebBaseLoader\n extends BaseDocumentLoader\n implements WebBaseLoader\n{\n timeout: number;\n\n caller: AsyncCaller;\n\n textDecoder?: TextDecoder;\n\n headers?: HeadersInit;\n\n constructor(\n public webPath: string,\n fields?: WebBaseLoaderParams\n ) {\n super();\n const { timeout, textDecoder, headers, ...rest } = fields ?? {};\n this.timeout = timeout ?? 10000;\n this.caller = new AsyncCaller(rest);\n this.textDecoder = textDecoder;\n this.headers = headers;\n }\n\n async load(): Promise<Document[]> {\n const response = await this.caller.call(fetch, this.webPath, {\n signal: this.timeout ? AbortSignal.timeout(this.timeout) : undefined,\n headers: this.headers,\n });\n\n const html =\n this.textDecoder?.decode(await response.arrayBuffer()) ??\n (await response.text());\n\n return [new Document({ pageContent: html })];\n }\n}\n"],"mappings":";;;;;;;AAuCA,IAAa,oBAAb,cACUA,sCAAAA,mBAEV;CACE;CAEA;CAEA;CAEA;CAEA,YACE,SACA,QACA;AACA,SAAO;AAHA,OAAA,UAAA;EAIP,MAAM,EAAE,SAAS,aAAa,SAAS,GAAG,SAAS,UAAU,EAAE;AAC/D,OAAK,UAAU,WAAW;AAC1B,OAAK,SAAS,IAAIC,mCAAAA,YAAY,KAAK;AACnC,OAAK,cAAc;AACnB,OAAK,UAAU;;CAGjB,MAAM,OAA4B;EAChC,MAAM,WAAW,MAAM,KAAK,OAAO,KAAK,OAAO,KAAK,SAAS;GAC3D,QAAQ,KAAK,UAAU,YAAY,QAAQ,KAAK,QAAQ,GAAG,KAAA;GAC3D,SAAS,KAAK;GACf,CAAC;AAMF,SAAO,CAAC,IAAIC,0BAAAA,SAAS,EAAE,aAHrB,KAAK,aAAa,OAAO,MAAM,SAAS,aAAa,CAAC,IACrD,MAAM,SAAS,MAAM,EAEkB,CAAC,CAAC"}