defuddle

Version:

Extract article content and metadata from web pages.

38 lines (37 loc) • 1.39 kB

TypeScript

/** * Move all child nodes from source to target. * Clears target first, then moves each child node from source. */ export declare function transferContent(source: Node, target: Node): void; /** * Read an element's inner HTML. */ export declare function serializeHTML(el: { innerHTML: string; }): string; /** * Decode HTML entities in a string (e.g. `&` → `&`). * Uses a <textarea> element which is safe for entity decoding. */ export declare function decodeHTMLEntities(doc: Document, text: string): string; /** * Escape HTML special characters in a string. */ export declare function escapeHtml(text: string): string; /** * Check if a URL uses a dangerous protocol (javascript:, data:text/html). * Strips whitespace and control characters before checking. */ export declare function isDangerousUrl(url: string): boolean; /** * Check if an element belongs directly to an ancestor table, * not to an intervening nested TABLE. */ export declare function isDirectTableChild(el: Node, ancestor: Node): boolean; /** * Parse an HTML string into a DocumentFragment. * Uses a <template> element when available (safer: no script execution, * no resource loading). Falls back to a <div> for environments that * don't support template.content (e.g. some server-side DOM libraries). */ export declare function parseHTML(doc: Document, html: string): DocumentFragment;