metascraper
Version:
A library to easily scrape metadata from an article on the web using Open Graph, JSON+LD, regular HTML metadata, and series of fallbacks.
146 lines (137 loc) • 4.74 kB
TypeScript
/**
* It creates a [metascraper](https://metascraper.js.org/) instance, declaring the rules bundle to be used explicitly.
* @param rules - The [rules bundles](https://metascraper.js.org/#/?id=rules-bundles) to be applied for metadata extraction.
*/
declare function createMetascraper(
rules: createMetascraper.Rules[]
): createMetascraper.Metascraper;
export = createMetascraper;
declare namespace createMetascraper {
export interface MetascraperOptions {
/**
* The URL associated with the HTML markup.
* It is used for resolve relative links that can be present in the HTML markup.
* it can be used as fallback field for different rules as well.
*
*/
url: string;
/**
* The HTML markup for extracting the content.
*/
html?: string;
/**
* The Cheerio instance for extracting the content.
*/
htmlDom?: import("cheerio").CheerioAPI;
/**
* You can pass additional rules to add on execution time.
* These rules will be merged with your loaded rules at the beginning.
*/
rules?: Rules[];
/**
* Ensure the URL provided is validated as a WHATWG URL API compliant.
*/
validateUrl?: boolean;
/**
* A Set of property names to omit from the metadata extraction process.
* These properties will be filtered out before processing the rules.
*/
omitPropNames?: Set<string>;
/**
* A Set of property names to pick for the metadata extraction process.
* When provided, only rules for these properties will be executed.
* Takes precedence over omitPropNames when both are specified.
*/
pickPropNames?: Set<string>;
}
export interface Metadata {
/**
* Get audio property from HTML markup
* The package [metascraper-audio](https://example.com/metascraper-audio) needs to be loaded.
*/
audio?: string;
/**
* Get author property from HTML markup.
* The package [metascraper-author](https://example.com/metascraper-author) needs to be loaded.
*/
author?: string;
/**
* Get date property from HTML markup.
* The package [metascraper-date](https://example.com/metascraper-date) needs to be loaded.
*/
date?: string;
/**
* Get description property from HTML markup.
* The package [metascraper-description](https://example.com/metascraper-description) needs to be loaded.
*/
description?: string;
/**
* Get image property from HTML markup.
* The package [metascraper-image](https://example.com/metascraper-image) needs to be loaded.
*/
image?: string;
/**
* Get lang property from HTML markup
* The package [metascraper-lang](https://example.com/metascraper-lang) needs to be loaded.
*/
lang?: string;
/**
* Get logo property from HTML markup
* The package [metascraper-logo](https://example.com/metascraper-logo) needs to be loaded.
*/
logo?: string;
/**
* Get publisher property from HTML markup
* The package [metascraper-publisher](https://example.com/metascraper-publisher) needs to be loaded.
*/
publisher?: string;
/**
* Get title property from HTML markup
* The package [metascraper-title](https://example.com/metascraper-title) needs to be loaded.
*/
title?: string;
/**
* Get url property from HTML markup
* The package [metascraper-url](https://example.com/metascraper-url) needs to be loaded.
*/
url?: string;
/**
* Get video property from HTML markup
* The package [metascraper-video](https://example.com/metascraper-video) needs to be loaded.
*/
video?: string;
[key: string]: string | undefined;
}
type NamedRules = {
[C in keyof Metadata as string extends C ? never : C]?: Array<RulesOptions> | RulesOptions;
};
export interface Rules extends NamedRules {
/**
* The test function to be executed for skipping rules that doesn't return `true`.
*/
test?: (options: RulesTestOptions) => boolean;
/**
* The package name associated with the rule, used for debugging purposes.
*/
pkgName?: string;
/**
* allow any other string key to be
* a rule-function (for ad-hoc metadata),
* or the two special keys above.
**/
[key: string]:
| Array<RulesOptions>
| RulesOptions
| ((options: RulesTestOptions) => boolean)
| string
| undefined;
}
export type RulesOptions = (
options: RulesTestOptions
) => string | null | undefined;
export interface RulesTestOptions {
htmlDom: import("cheerio").CheerioAPI;
url: string;
}
export type Metascraper = (options: MetascraperOptions) => Promise<Metadata>;
}