defuddle
Version:
Extract article content and metadata from web pages.
17 lines (16 loc) • 789 B
TypeScript
import { BaseExtractor } from './extractors/_base';
type ExtractorConstructor = new (document: Document, url: string, schemaOrgData?: any) => BaseExtractor;
interface ExtractorMapping {
patterns: (string | RegExp)[];
extractor: ExtractorConstructor;
}
export declare class ExtractorRegistry {
private static mappings;
static initialize(): void;
static register(mapping: ExtractorMapping): void;
static findExtractor(document: Document, url: string, schemaOrgData?: any): BaseExtractor | null;
static findAsyncExtractor(document: Document, url: string, schemaOrgData?: any): BaseExtractor | null;
static findPreferredAsyncExtractor(document: Document, url: string, schemaOrgData?: any): BaseExtractor | null;
private static findByPredicate;
}
export {};