html-content-processor
Version:
A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.
75 lines (74 loc) • 2.39 kB
TypeScript
/**
* HTML to text conversion utility, used for converting HTML to Markdown.
*/
import { Html2TextOptions } from './types';
export declare class CustomHtml2Text {
private options;
private baseUrl;
/**
* Creates an HTML to text converter instance.
* @param baseUrl Base URL for resolving relative links.
* @param options Conversion options.
*/
constructor(baseUrl?: string, options?: Html2TextOptions);
/**
* Updates conversion parameters.
* @param options Options to update.
*/
updateParams(options: Html2TextOptions): void;
/**
* Processes HTML and converts it to Markdown.
* @param html HTML string.
* @returns Converted Markdown.
*/
handle(html: string): Promise<string>;
/**
* Cleans the document by removing unwanted elements.
* @param doc Document object.
*/
private cleanDocument;
/**
* Converts a DOM element to Markdown.
* @param element DOM element.
* @param _level Current heading level (unused in this simplified version, but kept for potential future use).
* @returns Converted Markdown text.
*/
private domToMarkdown;
/**
* Gets the text content of an element, trimmed.
* @param element DOM element.
* @returns Trimmed text content.
*/
private getTextContent;
/**
* Processes the children of an element.
* @param element Parent element.
* @returns Processed Markdown for children.
*/
private processChildren;
/**
* Processes a list element (ul or ol) and converts it to Markdown.
* @param element List element.
* @param marker List marker ('*' for ul, '1.' for ol).
* @returns Processed Markdown for list.
*/
private processList;
/**
* Processes a blockquote element and converts it to Markdown.
* @param element Blockquote element.
* @returns Processed Markdown for blockquote.
*/
private processBlockquote;
/**
* Processes a table element and converts it to Markdown.
* @param table Table element.
* @returns Processed Markdown for table.
*/
private processTable;
/**
* Resolves a URL against the base URL.
* @param url URL to resolve.
* @returns Absolute URL.
*/
private resolveUrl;
}