UNPKG

article-parser

Version:

To extract main article from given URL

67 lines (55 loc) 1.57 kB
// Type definitions import {IOptions as SanitizeOptions} from "sanitize-html"; export interface Transformation { patterns: Array<RegExp>, pre?: (document: Document) => Document post?: (document: Document) => Document } /** * @param input url or html */ export function extract(input: string): Promise<ArticleData>; export function addTransformations(transformations: Array<Transformation>): Number; export function removeTransformations(options: Array<RegExp>): Number; export function setParserOptions(options: ParserOptions): void; export function setSanitizeHtmlOptions(options: SanitizeOptions): void; export function getParserOptions(): ParserOptions; export function getSanitizeHtmlOptions(): SanitizeOptions; export interface ParserOptions { /** * For estimating "time to read". * Default: 300 */ wordsPerMinute: number /** * To find the best url from list */ urlsCompareAlgorithm: 'levenshtein' | 'cosine' | 'diceCoefficient' | 'jaccardIndex' | 'lcs' | 'mlcs' /** * Min num of chars required for description * Default: 40 */ descriptionLengthThreshold: number /** * Max num of chars generated for description * Default: 156 */ descriptionTruncateLen: number /** * Min num of chars required for content * Default: 200 */ contentLengthThreshold: number } export interface ArticleData { url?: string; links?: string[]; title?: string; description?: string; image?: string; author?: string; content?: string; source?: string; published?: string; ttr?: number; }