@langchain/community
Version:
Third-party integrations for LangChain.js
1 lines • 1.73 kB
Source Map (JSON)
{"version":3,"file":"college_confidential.cjs","names":["CheerioWebBaseLoader","Document"],"sources":["../../../src/document_loaders/web/college_confidential.ts"],"sourcesContent":["import { Document } from \"@langchain/core/documents\";\nimport { CheerioWebBaseLoader } from \"./cheerio.js\";\n\n/**\n * A document loader specifically designed for loading documents from the\n * College Confidential website. It extends the CheerioWebBaseLoader.\n * @example\n * ```typescript\n * const loader = new CollegeConfidentialLoader(\"https:exampleurl.com\");\n * const docs = await loader.load();\n * console.log({ docs });\n * ```\n */\nexport class CollegeConfidentialLoader extends CheerioWebBaseLoader {\n constructor(webPath: string) {\n super(webPath);\n }\n\n /**\n * Overrides the base load() method to extract the text content from the\n * loaded document using a specific selector for the College Confidential\n * website. It creates a Document instance with the extracted text and\n * metadata, and returns an array containing the Document instance.\n * @returns An array containing a Document instance with the extracted text and metadata from the loaded College Confidential web document.\n */\n public async load(): Promise<Document[]> {\n const $ = await this.scrape();\n const text = $(\"main[class='skin-handler']\").text();\n const metadata = { source: this.webPath };\n return [new Document({ pageContent: text, metadata })];\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;AAaA,IAAa,4BAAb,cAA+CA,qCAAAA,qBAAqB;CAClE,YAAY,SAAiB;AAC3B,QAAM,QAAQ;;;;;;;;;CAUhB,MAAa,OAA4B;AAIvC,SAAO,CAAC,IAAIC,0BAAAA,SAAS;GAAE,cAHb,MAAM,KAAK,QAAQ,EACd,6BAA6B,CAAC,MAAM;GAET,UADzB,EAAE,QAAQ,KAAK,SAAS;GACW,CAAC,CAAC"}