UNPKG

@akira108sys/html-rewriter-readability

Version:

A library to extract readable content with Mozilla/Readability algorithm using Cloudflare HTMLRewriter.

57 lines (56 loc) 1.57 kB
/** Interface for storing HTML element attributes */ export interface ElementAttributes { [key: string]: string; } /** Interface for element information collected in Phase 1 */ export interface ElementInfo { id: number; parentId: number | null; tagName: string; attributes: ElementAttributes; textChunks?: string[]; finalTextContent?: string; isVisibleBasedOnAttrs: boolean; role: string | null; isDataTableLikely: boolean; isCodeBlock: boolean; readability?: { contentScore: number; }; } /** Metadata collected in Phase 1 */ export interface Metadata { title?: string; byline?: string; excerpt?: string; siteName?: string; publishedTime?: string; lang?: string; dir?: string; jsonLd?: any; } /** Formatting options used in Phase 4 */ export interface FormattingOptions { debug: boolean; allowedVideoRegex?: RegExp; } /** Phase 4 formatting options (for Handler) */ export interface Phase4HandlerOptions { baseURI: URL; keepClasses?: boolean; classesToPreserve?: string[]; formattingOptions?: FormattingOptions; } /** Options for HtmlRewriterReadability constructor */ export interface ReadabilityOptions { debug?: boolean; maxElemsToParse?: number; nbTopCandidates?: number; charThreshold?: number; classesToPreserve?: string[]; keepClasses?: boolean; allowedVideoRegex?: RegExp; linkDensityModifier?: number; } /** Type for function that returns the next unique element ID */ export type NextElementIdGetter = () => number;