defuddle
Version:
Extract article content and metadata from web pages.
37 lines (36 loc) • 1.05 kB
TypeScript
import { DefuddleOptions, DefuddleResponse } from './types';
export declare class Defuddle {
private readonly doc;
private options;
private debug;
/**
* Create a new Defuddle instance
* @param doc - The document to parse
* @param options - Options for parsing
*/
constructor(doc: Document, options?: DefuddleOptions);
/**
* Parse the document and extract its main content
*/
parse(): DefuddleResponse;
/**
* Internal parse method that does the actual work
*/
private parseInternal;
private countWords;
private _log;
private _evaluateMediaQueries;
private applyMobileStyles;
private removeHiddenElements;
private removeBySelector;
private findSmallImages;
private removeSmallImages;
private getElementIdentifier;
private findMainContent;
private findTableBasedContent;
private findContentByScoring;
private getElementSelector;
private getComputedStyle;
private _extractSchemaOrgData;
private _decodeHTMLEntities;
}