@devmehq/open-graph-extractor
Version:
Fast, lightweight Open Graph, Twitter Card, and structured data extractor for Node.js with caching and validation
291 lines (290 loc) • 8.61 kB
TypeScript
export type OGType = "article" | "book" | "books.author" | "books.book" | "books.genre" | "business.business" | "fitness.course" | "music.album" | "music.playlist" | "music.radio_station" | "music.song" | "place" | "product" | "product.group" | "product.item" | "profile" | "restaurant.menu" | "restaurant.menu_item" | "restaurant.menu_section" | "restaurant.restaurant" | "video.episode" | "video.movie" | "video.other" | "video.tv_show" | "website";
export type TwitterCardType = "summary" | "summary_large_image" | "app" | "player";
export type ImageFormat = "jpeg" | "jpg" | "png" | "gif" | "webp" | "avif" | "svg" | "bmp" | "ico";
export type CacheStorage = "memory" | "redis" | "custom";
export type ConfidenceLevel = "high" | "medium" | "low";
export type ErrorSeverity = "critical" | "error" | "warning" | "info";
export interface IStructuredData {
jsonLD: any[];
schemaOrg: any;
microdata: any;
rdfa: any;
dublinCore: any;
}
export interface ICacheStorage {
get(key: string): Promise<unknown>;
set(key: string, value: unknown, ttl?: number): Promise<void>;
delete(key: string): Promise<void>;
clear(): Promise<void>;
has(key: string): Promise<boolean>;
}
export interface ICacheOptions {
enabled: boolean;
ttl: number;
storage: CacheStorage;
maxSize: number;
keyGenerator?: (url: string) => string;
customStorage?: ICacheStorage;
}
export interface IError {
code: string;
message: string;
severity: ErrorSeverity;
field?: string;
suggestion?: string;
timestamp: Date;
}
export interface IWarning {
code: string;
message: string;
field?: string;
suggestion?: string;
}
export interface IValidationResult {
valid: boolean;
errors: IError[];
warnings: IWarning[];
score: number;
recommendations: string[];
}
export interface IExtractionResult {
data: IOGResult;
structuredData: IStructuredData;
errors: IError[];
warnings: IWarning[];
confidence: number;
confidenceLevel: ConfidenceLevel;
fallbacksUsed: string[];
metrics: IMetrics;
validation?: IValidationResult;
socialScore?: ISocialScore;
}
export interface IMetrics {
extractionTime: number;
htmlSize: number;
metaTagsFound: number;
structuredDataFound: number;
imagesFound: number;
videosFound: number;
fallbacksUsed: string[];
performance: IPerformanceMetrics;
}
export interface IPerformanceMetrics {
htmlParseTime: number;
metaExtractionTime: number;
structuredDataExtractionTime: number;
validationTime: number;
totalTime: number;
}
export interface IVideoMetadata {
url: string;
secureUrl?: string;
type?: string;
width?: string | number;
height?: string | number;
duration?: number;
thumbnails?: IThumbnail[];
chapters?: IChapter[];
captions?: ICaption[];
embedUrl?: string;
uploadDate?: string;
views?: number;
likes?: number;
}
export interface IThumbnail {
url: string;
width?: number;
height?: number;
format?: ImageFormat;
}
export interface IChapter {
title: string;
startTime: number;
endTime?: number;
thumbnail?: string;
}
export interface ICaption {
language: string;
url: string;
kind: "subtitles" | "captions" | "descriptions" | "chapters" | "metadata";
}
export interface IImageMetadata {
url: string;
secureUrl?: string;
type?: ImageFormat;
width?: string | number;
height?: string | number;
alt?: string;
caption?: string;
srcset?: ISrcSetImage[];
isLazyLoaded?: boolean;
isResponsive?: boolean;
dominantColor?: string;
aspectRatio?: number;
}
export interface ISrcSetImage {
url: string;
width: number;
descriptor: string;
}
export interface IBulkOptions {
urls: string[];
concurrency?: number;
rateLimit?: IRateLimit;
onProgress?: (completed: number, total: number, url: string) => void;
onError?: (url: string, error: Error) => void;
continueOnError?: boolean;
}
export interface IRateLimit {
requests: number;
window: number;
}
export interface ISocialScore {
overall: number;
openGraph: IScoreDetails;
twitter: IScoreDetails;
schema: IScoreDetails;
seo: IScoreDetails;
recommendations: string[];
missingRequired: string[];
missingRecommended: string[];
}
export interface IScoreDetails {
score: number;
present: string[];
missing: string[];
issues: string[];
}
export interface ISecurityOptions {
sanitizeHtml?: boolean;
detectPII?: boolean;
maskPII?: boolean;
validateUrls?: boolean;
maxRedirects?: number;
timeout?: number;
allowedDomains?: string[];
blockedDomains?: string[];
}
export interface IExtractOpenGraphOptions {
customMetaTags?: Array<{
multiple: boolean;
property: string;
fieldName: string;
}>;
allMedia?: boolean;
onlyGetOpenGraphInfo?: boolean;
ogImageFallback?: boolean;
cache?: ICacheOptions;
security?: ISecurityOptions;
extractStructuredData?: boolean;
validateData?: boolean;
generateScore?: boolean;
extractArticleContent?: boolean;
detectLanguage?: boolean;
normalizeUrls?: boolean;
}
export interface IOGResult {
ogTitle?: string;
ogType?: OGType;
ogUrl?: string;
ogDescription?: string;
ogSiteName?: string;
ogLocale?: string;
ogLocaleAlternate?: string[];
ogLogo?: string;
ogDate?: string;
ogImage?: IImageMetadata | IImageMetadata[] | string | string[];
ogVideo?: IVideoMetadata | IVideoMetadata[] | string | string[];
ogAudio?: string | string[];
ogAudioSecureURL?: string;
ogAudioType?: string;
twitterCard?: TwitterCardType;
twitterSite?: string;
twitterSiteId?: string;
twitterCreator?: string;
twitterCreatorId?: string;
twitterTitle?: string;
twitterDescription?: string;
twitterImage?: IImageMetadata | IImageMetadata[] | string | string[];
twitterImageAlt?: string | string[];
twitterPlayer?: string | string[];
twitterPlayerWidth?: string | string[];
twitterPlayerHeight?: string | string[];
twitterPlayerStream?: string | string[];
alAndroidAppName?: string;
alAndroidPackage?: string;
alAndroidUrl?: string;
alIosAppName?: string;
alIosAppStoreId?: string;
alIosUrl?: string;
alWebUrl?: string;
alWebShouldFallback?: string;
articlePublishedTime?: string;
articleModifiedTime?: string;
articleExpirationTime?: string;
articleAuthor?: string | string[];
articleSection?: string;
articleTag?: string | string[];
articlePublisher?: string;
ogProductRetailerItemId?: string;
ogProductPriceAmount?: string;
ogProductPriceCurrency?: string;
ogProductAvailability?: string;
ogProductCondition?: string;
musicSong?: string | string[];
musicSongUrl?: string | string[];
musicMusician?: string | string[];
musicAlbum?: string | string[];
musicReleaseDate?: string;
musicDuration?: number;
bookAuthor?: string | string[];
bookIsbn?: string;
bookReleaseDate?: string;
bookTag?: string | string[];
profileFirstName?: string;
profileLastName?: string;
profileUsername?: string;
profileGender?: string;
placeLocationLatitude?: string;
placeLocationLongitude?: string;
restaurantMenu?: string;
restaurantSection?: string;
restaurantVariationPriceAmount?: string;
restaurantVariationPriceCurrency?: string;
dcTitle?: string;
dcCreator?: string;
dcDescription?: string;
dcPublisher?: string;
dcDate?: string;
dcType?: string;
dcFormat?: string;
dcIdentifier?: string;
dcSource?: string;
dcLanguage?: string;
dcRelation?: string;
dcCoverage?: string;
dcRights?: string;
favicon?: string;
charset?: string;
author?: string;
keywords?: string[];
robots?: string;
viewport?: string;
generator?: string;
applicationName?: string;
themeColor?: string;
canonical?: string;
ampUrl?: string;
manifest?: string;
maskIcon?: string;
appleTouchIcon?: string;
articleContent?: string;
readingTime?: number;
wordCount?: number;
language?: string;
textDirection?: "ltr" | "rtl";
[key: string]: any;
}
export type ExtractorFunction = (html: string | Buffer, options?: IExtractOpenGraphOptions) => Promise<IExtractionResult>;
export type ValidatorFunction = (data: IOGResult) => IValidationResult;
export type ScorerFunction = (data: IOGResult) => ISocialScore;