site-metadata-extractor
Version:
web(site) resource metadata extractor
57 lines (56 loc) • 1.8 kB
TypeScript
/// <reference types="cheerio" />
import { NewsArticle, Article } from "schema-dts";
import { LinkObj, VideoAttrs } from "./extractor";
export interface PageData {
author: string[];
canonicalLink: string;
copyright: string;
date: string;
description: string;
favicon: string;
image: string;
jsonld: NewsArticle | Article | null;
keywords: string;
lang: string;
links?: LinkObj[];
locale: string;
origin: string;
publisher: string;
siteName: string;
softTitle: string;
tags: string[];
text?: string;
title: string;
type: string;
videos?: VideoAttrs[];
}
export interface LazyExtractor {
author: () => string[];
canonicalLink: () => string;
copyright: () => string;
date: () => string;
description: () => string;
favicon: () => string;
image: () => string;
jsonld: () => NewsArticle | Article | null;
keywords: () => string;
lang: () => string;
links: () => LinkObj[];
locale: () => string;
origin: () => string;
publisher: () => string;
siteName: () => string;
softTitle: () => string;
tags: () => string[];
text: () => string;
title: () => string;
type: () => string;
videos: () => VideoAttrs[];
}
declare const siteMetadataExtractor: (markup: string, resourceUrl: string, lang?: string) => PageData;
export default siteMetadataExtractor;
export declare const lazy: (html: string, resourceUrl: string, language?: string) => LazyExtractor;
export declare function getCleanedDoc(html: string): cheerio.Root;
export declare function getParsedDoc(html: string): cheerio.Root;
export declare function getParsedDocForText(html: string): cheerio.Root;
export declare function getTopNode(doc: cheerio.Root, lang: string): cheerio.Cheerio;