site-metadata-extractor
Version:
web(site) resource metadata extractor
39 lines (38 loc) • 1.52 kB
TypeScript
import type { Cheerio, CheerioAPI } from "cheerio";
import type { AnyNode } from "domhandler";
import { NewsArticle, Article } from "schema-dts";
import { URL } from "url";
export interface LinkObj {
href: string;
text: string;
}
export interface VideoAttrs {
height?: string;
src?: string;
width?: string;
}
export interface Extractor {
author: (doc: CheerioAPI) => string[];
calculateBestNode: (doc: CheerioAPI, lang: string) => Cheerio<AnyNode>;
canonicalLink: (doc: CheerioAPI, resourceUrl: string) => string;
copyright: (doc: CheerioAPI) => string;
date: (doc: CheerioAPI) => string;
description: (doc: CheerioAPI) => string;
favicon: (doc: CheerioAPI, resourceUrlObj: URL) => string;
image: (doc: CheerioAPI) => string;
jsonld: (doc: CheerioAPI) => NewsArticle | Article | null;
keywords: (doc: CheerioAPI) => string;
lang: (doc: CheerioAPI) => string;
links: (doc: CheerioAPI, topNode: Cheerio<AnyNode>, lang: string) => LinkObj[];
locale: (doc: CheerioAPI) => string;
publisher: (doc: CheerioAPI) => string;
siteName: (doc: CheerioAPI) => string;
softTitle: (doc: CheerioAPI) => string;
tags: (doc: CheerioAPI) => string[];
text: (doc: CheerioAPI, topNode: Cheerio<AnyNode>, lang: string) => string;
title: (doc: CheerioAPI) => string;
type: (doc: CheerioAPI) => string;
videos: (doc: CheerioAPI, topNode: Cheerio<AnyNode>) => VideoAttrs[];
}
declare const extractor: Extractor;
export default extractor;