@akira108sys/html-rewriter-readability
Version:
A library to extract readable content with Mozilla/Readability algorithm using Cloudflare HTMLRewriter.
57 lines (56 loc) • 1.57 kB
TypeScript
/** Interface for storing HTML element attributes */
export interface ElementAttributes {
[key: string]: string;
}
/** Interface for element information collected in Phase 1 */
export interface ElementInfo {
id: number;
parentId: number | null;
tagName: string;
attributes: ElementAttributes;
textChunks?: string[];
finalTextContent?: string;
isVisibleBasedOnAttrs: boolean;
role: string | null;
isDataTableLikely: boolean;
isCodeBlock: boolean;
readability?: {
contentScore: number;
};
}
/** Metadata collected in Phase 1 */
export interface Metadata {
title?: string;
byline?: string;
excerpt?: string;
siteName?: string;
publishedTime?: string;
lang?: string;
dir?: string;
jsonLd?: any;
}
/** Formatting options used in Phase 4 */
export interface FormattingOptions {
debug: boolean;
allowedVideoRegex?: RegExp;
}
/** Phase 4 formatting options (for Handler) */
export interface Phase4HandlerOptions {
baseURI: URL;
keepClasses?: boolean;
classesToPreserve?: string[];
formattingOptions?: FormattingOptions;
}
/** Options for HtmlRewriterReadability constructor */
export interface ReadabilityOptions {
debug?: boolean;
maxElemsToParse?: number;
nbTopCandidates?: number;
charThreshold?: number;
classesToPreserve?: string[];
keepClasses?: boolean;
allowedVideoRegex?: RegExp;
linkDensityModifier?: number;
}
/** Type for function that returns the next unique element ID */
export type NextElementIdGetter = () => number;