UNPKG

html-content-processor

Version:

A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.

43 lines (42 loc) 1.19 kB
/** * 百度首页和搜索引擎页面专用过滤器插件 * 专门处理搜索引擎首页的大量样式代码、搜索建议等噪音内容 */ interface Logger { info: (message: string) => void; debug: (message: string, ...args: any[]) => void; } interface PluginContext { document: Document; options: any; logger?: Logger; } interface FilterPlugin { name: string; description: string; apply: (context: PluginContext) => void; hasValuableContent?: (element: Element) => boolean; processBaiduSearchForm?: (document: Document) => void; removeEmptyContainers?: (document: Document) => void; } export declare const baiduFilterPlugin: FilterPlugin; /** * 创建百度首页专用的处理器配置 */ export declare function createBaiduConfig(): { filter: { threshold: number; strategy: "dynamic"; ratio: number; removeElements: string[]; keepElements: string[]; plugins: FilterPlugin[]; }; converter: { ignoreImages: boolean; ignoreLinks: boolean; citations: boolean; format: "github"; }; }; export {};