defuddle
Version:
Extract article content and metadata from web pages.
45 lines (44 loc) • 1.59 kB
TypeScript
/**
* Standardized comment HTML construction.
*
* Used by Reddit, Hacker News, GitHub, and other extractors to produce
* consistent comment markup.
*
* Metadata format (in markdown): **author** · date · score
* - date is linked if a url is provided
* - score is omitted if not provided
*/
export interface CommentData {
/** Comment author name */
author: string;
/** Display date (e.g. "2025-01-15") */
date: string;
/** Comment body HTML */
content: string;
/** Nesting depth (0 = top-level). Omit for flat lists. */
depth?: number;
/** Score text (e.g. "42 points", "25 points") */
score?: string;
/** Permalink URL for the comment */
url?: string;
}
/**
* Build the full content HTML for a post with optional comments section.
* @param site - Site identifier for wrapper class (e.g. "reddit", "hackernews", "github")
* @param postContent - The main post body HTML
* @param comments - Pre-built comments HTML string (from buildCommentTree)
*/
export declare function buildContentHtml(site: string, postContent: string, comments: string): string;
/**
* Build a nested comment tree from a flat list of comments with depth.
* Uses <blockquote> elements to represent reply hierarchy.
*/
export declare function buildCommentTree(comments: CommentData[]): string;
/**
* Build a single comment div with metadata and content.
*
* Metadata order: author · date · score
* - date is wrapped in a link if url is provided
* - score is omitted if not provided
*/
export declare function buildComment(comment: CommentData): string;