defuddle

Version:

Extract article content and metadata from web pages.

26 lines (25 loc) • 924 B

TypeScript

export interface ContentScore { score: number; element: Element; } export declare class ContentScorer { private doc; private debug; constructor(doc: Document, debug?: boolean); static scoreElement(element: Element): number; static findBestElement(elements: Element[], minScore?: number): Element | null; /** * Scores blocks based on their content and structure * and removes those that are likely not content */ static scoreAndRemove(doc: Document, debug?: boolean): void; /** * Determines if an element is likely to be content based on its structure and attributes. */ private static isLikelyContent; /** * Scores a block element based on various criteria to determine if it's likely not content. * Returns a negative score if the element is likely not content, a positive score if it is. */ private static scoreNonContentBlock; }