defuddle
Version:
Extract article content and metadata from web pages.
26 lines (25 loc) • 884 B
TypeScript
import { DefuddleMetadata, MetaTagItem } from './types';
export declare class MetadataExtractor {
static extract(doc: Document, schemaOrgData: any, metaTags: MetaTagItem[]): DefuddleMetadata;
private static getAuthor;
private static extractByline;
private static getSiteName;
private static getSite;
private static getTitle;
private static cleanTitle;
private static getDescription;
private static getImage;
private static getLanguage;
/**
* Normalize language codes to BCP 47 format (e.g. en_US -> en-US)
*/
private static normalizeLangCode;
private static getFavicon;
private static getPublished;
private static getMetaContent;
private static getMetaContents;
private static getTimeElement;
private static readonly MONTH_MAP;
private static parseDateText;
private static getSchemaProperty;
}