defuddle
Version:
Extract article content and metadata from web pages.
16 lines (15 loc) • 593 B
TypeScript
import { BaseExtractor } from './extractors/_base';
type ExtractorConstructor = new (document: Document, url: string, schemaOrgData?: any) => BaseExtractor;
interface ExtractorMapping {
patterns: (string | RegExp)[];
extractor: ExtractorConstructor;
}
export declare class ExtractorRegistry {
private static mappings;
private static domainCache;
static initialize(): void;
static register(mapping: ExtractorMapping): void;
static findExtractor(document: Document, url: string, schemaOrgData?: any): BaseExtractor | null;
static clearCache(): void;
}
export {};