UNPKG

@langchain/community

Version:
1 lines 1.59 kB
{"version":3,"file":"imsdb.cjs","names":["CheerioWebBaseLoader","Document"],"sources":["../../../src/document_loaders/web/imsdb.ts"],"sourcesContent":["import { Document } from \"@langchain/core/documents\";\nimport { CheerioWebBaseLoader } from \"./cheerio.js\";\n\n/**\n * A class that extends the CheerioWebBaseLoader class. It represents a\n * loader for loading web pages from the IMSDB (Internet Movie Script\n * Database) website.\n */\nexport class IMSDBLoader extends CheerioWebBaseLoader {\n constructor(public webPath: string) {\n super(webPath);\n }\n\n /**\n * An asynchronous method that loads the web page using the scrape()\n * method inherited from the base class. It selects the element with the\n * class 'scrtext' using the $ function provided by Cheerio and extracts\n * the text content. It creates a Document instance with the text content\n * as the page content and the source as metadata. It returns an array\n * containing the Document instance.\n * @returns An array containing a Document instance.\n */\n public async load(): Promise<Document[]> {\n const $ = await this.scrape();\n const text = $(\"td[class='scrtext']\").text().trim();\n const metadata = { source: this.webPath };\n return [new Document({ pageContent: text, metadata })];\n }\n}\n"],"mappings":";;;;;;;;;;;AAQA,IAAa,cAAb,cAAiCA,qCAAAA,qBAAqB;CACpD,YAAY,SAAwB;AAClC,QAAM,QAAQ;AADG,OAAA,UAAA;;;;;;;;;;;CAanB,MAAa,OAA4B;AAIvC,SAAO,CAAC,IAAIC,0BAAAA,SAAS;GAAE,cAHb,MAAM,KAAK,QAAQ,EACd,sBAAsB,CAAC,MAAM,CAAC,MAAM;GAET,UADzB,EAAE,QAAQ,KAAK,SAAS;GACW,CAAC,CAAC"}