defuddle
Version:
Extract article content and metadata from web pages.
18 lines (17 loc) • 606 B
TypeScript
import { DefuddleMetadata } from './types';
export declare class MetadataExtractor {
static extract(doc: Document, schemaOrgData: any): DefuddleMetadata;
private static getAuthor;
private static getSite;
private static getTitle;
private static cleanTitle;
private static getDescription;
private static getImage;
private static getFavicon;
private static getPublished;
private static getMetaContent;
private static getTimeElement;
private static decodeHTMLEntities;
private static getSchemaProperty;
static extractSchemaOrgData(doc: Document): any;
}