defuddle
Version:
Extract article content and metadata from web pages.
18 lines (17 loc) • 690 B
TypeScript
import { ExtractorResult } from '../types/extractors';
export declare abstract class BaseExtractor {
protected document: Document;
protected url: string;
protected schemaOrgData?: any;
constructor(document: Document, url: string, schemaOrgData?: any);
abstract canExtract(): boolean;
abstract extract(): ExtractorResult;
canExtractAsync(): boolean;
/**
* When true, parseAsync() will prefer extractAsync() over extract(),
* even if sync extraction produces content. Use this when the async
* path provides strictly better results (e.g. YouTube transcripts).
*/
prefersAsync(): boolean;
extractAsync(): Promise<ExtractorResult>;
}