defuddle

Version:

Extract article content and metadata from web pages.

64 lines (63 loc) • 1.49 kB

TypeScript

export interface DefuddleMetadata { title: string; description: string; domain: string; favicon: string; image: string; parseTime: number; published: string; author: string; site: string; schemaOrgData: any; wordCount: number; } export interface MetaTagItem { name?: string | null; property?: string | null; content: string | null; } export interface DefuddleResponse extends DefuddleMetadata { content: string; contentMarkdown?: string; extractorType?: string; metaTags?: MetaTagItem[]; } export interface DefuddleOptions { /** * Enable debug logging */ debug?: boolean; /** * URL of the page being parsed */ url?: string; /** * Convert output to Markdown */ markdown?: boolean; /** * Include Markdown in the response */ separateMarkdown?: boolean; /** * Whether to remove elements matching exact selectors like ads, social buttons, etc. * Defaults to true. */ removeExactSelectors?: boolean; /** * Whether to remove elements matching partial selectors like ads, social buttons, etc. * Defaults to true. */ removePartialSelectors?: boolean; } export interface ExtractorVariables { [key: string]: string; } export interface ExtractedContent { title?: string; author?: string; published?: string; content?: string; contentHtml?: string; variables?: ExtractorVariables; }