UNPKG

@langchain/community

Version:
40 lines (39 loc) 1.49 kB
import { CheerioWebBaseLoader, CheerioWebBaseLoaderParams } from "./cheerio.js"; import { Document, DocumentInterface } from "@langchain/core/documents"; //#region src/document_loaders/web/sitemap.d.ts /** * Interface representing the parameters for initializing a SitemapLoader. * @interface SitemapLoaderParams * @extends CheerioWebBaseLoaderParams */ interface SitemapLoaderParams extends CheerioWebBaseLoaderParams { /** * @property {(string | RegExp)[] | undefined} filterUrls - A list of regexes. Only URLs that match one of the filter URLs will be loaded. * WARNING: The filter URLs are interpreted as regular expressions. Escape special characters if needed. */ filterUrls?: (string | RegExp)[]; /** * The size to chunk the sitemap URLs into for scraping. * @default {300} */ chunkSize?: number; } type SiteMapElement = { loc: string; changefreq?: string; lastmod?: string; priority?: string; }; declare class SitemapLoader extends CheerioWebBaseLoader implements SitemapLoaderParams { webPath: string; allowUrlPatterns: (string | RegExp)[] | undefined; chunkSize: number; constructor(webPath: string, params?: SitemapLoaderParams); _checkUrlPatterns(url: string): boolean; parseSitemap(): Promise<SiteMapElement[]>; _loadSitemapUrls(elements: Array<SiteMapElement>): Promise<DocumentInterface[]>; load(): Promise<Document[]>; } //#endregion export { SitemapLoader, SitemapLoaderParams }; //# sourceMappingURL=sitemap.d.ts.map