UNPKG

@devmehq/open-graph-extractor

Version:

Fast, lightweight Open Graph, Twitter Card, and structured data extractor for Node.js with caching and validation

291 lines (290 loc) 8.61 kB
export type OGType = "article" | "book" | "books.author" | "books.book" | "books.genre" | "business.business" | "fitness.course" | "music.album" | "music.playlist" | "music.radio_station" | "music.song" | "place" | "product" | "product.group" | "product.item" | "profile" | "restaurant.menu" | "restaurant.menu_item" | "restaurant.menu_section" | "restaurant.restaurant" | "video.episode" | "video.movie" | "video.other" | "video.tv_show" | "website"; export type TwitterCardType = "summary" | "summary_large_image" | "app" | "player"; export type ImageFormat = "jpeg" | "jpg" | "png" | "gif" | "webp" | "avif" | "svg" | "bmp" | "ico"; export type CacheStorage = "memory" | "redis" | "custom"; export type ConfidenceLevel = "high" | "medium" | "low"; export type ErrorSeverity = "critical" | "error" | "warning" | "info"; export interface IStructuredData { jsonLD: any[]; schemaOrg: any; microdata: any; rdfa: any; dublinCore: any; } export interface ICacheStorage { get(key: string): Promise<unknown>; set(key: string, value: unknown, ttl?: number): Promise<void>; delete(key: string): Promise<void>; clear(): Promise<void>; has(key: string): Promise<boolean>; } export interface ICacheOptions { enabled: boolean; ttl: number; storage: CacheStorage; maxSize: number; keyGenerator?: (url: string) => string; customStorage?: ICacheStorage; } export interface IError { code: string; message: string; severity: ErrorSeverity; field?: string; suggestion?: string; timestamp: Date; } export interface IWarning { code: string; message: string; field?: string; suggestion?: string; } export interface IValidationResult { valid: boolean; errors: IError[]; warnings: IWarning[]; score: number; recommendations: string[]; } export interface IExtractionResult { data: IOGResult; structuredData: IStructuredData; errors: IError[]; warnings: IWarning[]; confidence: number; confidenceLevel: ConfidenceLevel; fallbacksUsed: string[]; metrics: IMetrics; validation?: IValidationResult; socialScore?: ISocialScore; } export interface IMetrics { extractionTime: number; htmlSize: number; metaTagsFound: number; structuredDataFound: number; imagesFound: number; videosFound: number; fallbacksUsed: string[]; performance: IPerformanceMetrics; } export interface IPerformanceMetrics { htmlParseTime: number; metaExtractionTime: number; structuredDataExtractionTime: number; validationTime: number; totalTime: number; } export interface IVideoMetadata { url: string; secureUrl?: string; type?: string; width?: string | number; height?: string | number; duration?: number; thumbnails?: IThumbnail[]; chapters?: IChapter[]; captions?: ICaption[]; embedUrl?: string; uploadDate?: string; views?: number; likes?: number; } export interface IThumbnail { url: string; width?: number; height?: number; format?: ImageFormat; } export interface IChapter { title: string; startTime: number; endTime?: number; thumbnail?: string; } export interface ICaption { language: string; url: string; kind: "subtitles" | "captions" | "descriptions" | "chapters" | "metadata"; } export interface IImageMetadata { url: string; secureUrl?: string; type?: ImageFormat; width?: string | number; height?: string | number; alt?: string; caption?: string; srcset?: ISrcSetImage[]; isLazyLoaded?: boolean; isResponsive?: boolean; dominantColor?: string; aspectRatio?: number; } export interface ISrcSetImage { url: string; width: number; descriptor: string; } export interface IBulkOptions { urls: string[]; concurrency?: number; rateLimit?: IRateLimit; onProgress?: (completed: number, total: number, url: string) => void; onError?: (url: string, error: Error) => void; continueOnError?: boolean; } export interface IRateLimit { requests: number; window: number; } export interface ISocialScore { overall: number; openGraph: IScoreDetails; twitter: IScoreDetails; schema: IScoreDetails; seo: IScoreDetails; recommendations: string[]; missingRequired: string[]; missingRecommended: string[]; } export interface IScoreDetails { score: number; present: string[]; missing: string[]; issues: string[]; } export interface ISecurityOptions { sanitizeHtml?: boolean; detectPII?: boolean; maskPII?: boolean; validateUrls?: boolean; maxRedirects?: number; timeout?: number; allowedDomains?: string[]; blockedDomains?: string[]; } export interface IExtractOpenGraphOptions { customMetaTags?: Array<{ multiple: boolean; property: string; fieldName: string; }>; allMedia?: boolean; onlyGetOpenGraphInfo?: boolean; ogImageFallback?: boolean; cache?: ICacheOptions; security?: ISecurityOptions; extractStructuredData?: boolean; validateData?: boolean; generateScore?: boolean; extractArticleContent?: boolean; detectLanguage?: boolean; normalizeUrls?: boolean; } export interface IOGResult { ogTitle?: string; ogType?: OGType; ogUrl?: string; ogDescription?: string; ogSiteName?: string; ogLocale?: string; ogLocaleAlternate?: string[]; ogLogo?: string; ogDate?: string; ogImage?: IImageMetadata | IImageMetadata[] | string | string[]; ogVideo?: IVideoMetadata | IVideoMetadata[] | string | string[]; ogAudio?: string | string[]; ogAudioSecureURL?: string; ogAudioType?: string; twitterCard?: TwitterCardType; twitterSite?: string; twitterSiteId?: string; twitterCreator?: string; twitterCreatorId?: string; twitterTitle?: string; twitterDescription?: string; twitterImage?: IImageMetadata | IImageMetadata[] | string | string[]; twitterImageAlt?: string | string[]; twitterPlayer?: string | string[]; twitterPlayerWidth?: string | string[]; twitterPlayerHeight?: string | string[]; twitterPlayerStream?: string | string[]; alAndroidAppName?: string; alAndroidPackage?: string; alAndroidUrl?: string; alIosAppName?: string; alIosAppStoreId?: string; alIosUrl?: string; alWebUrl?: string; alWebShouldFallback?: string; articlePublishedTime?: string; articleModifiedTime?: string; articleExpirationTime?: string; articleAuthor?: string | string[]; articleSection?: string; articleTag?: string | string[]; articlePublisher?: string; ogProductRetailerItemId?: string; ogProductPriceAmount?: string; ogProductPriceCurrency?: string; ogProductAvailability?: string; ogProductCondition?: string; musicSong?: string | string[]; musicSongUrl?: string | string[]; musicMusician?: string | string[]; musicAlbum?: string | string[]; musicReleaseDate?: string; musicDuration?: number; bookAuthor?: string | string[]; bookIsbn?: string; bookReleaseDate?: string; bookTag?: string | string[]; profileFirstName?: string; profileLastName?: string; profileUsername?: string; profileGender?: string; placeLocationLatitude?: string; placeLocationLongitude?: string; restaurantMenu?: string; restaurantSection?: string; restaurantVariationPriceAmount?: string; restaurantVariationPriceCurrency?: string; dcTitle?: string; dcCreator?: string; dcDescription?: string; dcPublisher?: string; dcDate?: string; dcType?: string; dcFormat?: string; dcIdentifier?: string; dcSource?: string; dcLanguage?: string; dcRelation?: string; dcCoverage?: string; dcRights?: string; favicon?: string; charset?: string; author?: string; keywords?: string[]; robots?: string; viewport?: string; generator?: string; applicationName?: string; themeColor?: string; canonical?: string; ampUrl?: string; manifest?: string; maskIcon?: string; appleTouchIcon?: string; articleContent?: string; readingTime?: number; wordCount?: number; language?: string; textDirection?: "ltr" | "rtl"; [key: string]: any; } export type ExtractorFunction = (html: string | Buffer, options?: IExtractOpenGraphOptions) => Promise<IExtractionResult>; export type ValidatorFunction = (data: IOGResult) => IValidationResult; export type ScorerFunction = (data: IOGResult) => ISocialScore;