UNPKG

defuddle

Version:

Extract article content and metadata from web pages.

53 lines (52 loc) 1.22 kB
export interface DefuddleMetadata { title: string; description: string; domain: string; favicon: string; image: string; parseTime: number; published: string; author: string; site: string; schemaOrgData: any; wordCount: number; } export interface DefuddleResponse extends DefuddleMetadata { content: string; extractorType?: string; } export interface DefuddleOptions { /** * Enable debug logging */ debug?: boolean; /** * URL of the page being parsed */ url?: string; /** * Convert output to markdown */ markdown?: boolean; /** * Whether to remove elements matching exact selectors like ads, social buttons, etc. * Defaults to true. */ removeExactSelectors?: boolean; /** * Whether to remove elements matching partial selectors like ads, social buttons, etc. * Defaults to true. */ removePartialSelectors?: boolean; } export interface ExtractorVariables { [key: string]: string; } export interface ExtractedContent { title?: string; author?: string; published?: string; content?: string; contentHtml?: string; variables?: ExtractorVariables; }