UNPKG

@langchain/community

Version:
31 lines (30 loc) 1.12 kB
import type { Options } from "mozilla-readability"; import { MappingDocumentTransformer, Document } from "@langchain/core/documents"; /** * A transformer that uses the Mozilla Readability library to extract the * main content from a web page. * @example * ```typescript * const loader = new CheerioWebBaseLoader("https://example.com/article"); * const docs = await loader.load(); * * const splitter = new RecursiveCharacterTextSplitter({ * maxCharacterCount: 5000, * }); * const transformer = new MozillaReadabilityTransformer(); * * // The sequence processes the loaded documents through the splitter and then the transformer. * const sequence = splitter.pipe(transformer); * * // Invoke the sequence to transform the documents into a more readable format. * const newDocuments = await sequence.invoke(docs); * * console.log(newDocuments); * ``` */ export declare class MozillaReadabilityTransformer extends MappingDocumentTransformer { protected options: Options; static lc_name(): string; constructor(options?: Options); _transformDocument(document: Document): Promise<Document>; }