@crawlee/playwright
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
39 lines • 1.72 kB
TypeScript
import type { RecoverableStatePersistenceOptions, Request } from '@crawlee/core';
export type RenderingType = 'clientOnly' | 'static';
type URLComponents = string[];
type FeatureVector = [staticResultsSimilarity: number, clientOnlyResultsSimilarity: number];
export interface RenderingTypePredictorOptions {
/** A number between 0 and 1 that determines the desired ratio of rendering type detections */
detectionRatio: number;
persistenceOptions?: Partial<RecoverableStatePersistenceOptions>;
}
/**
* Stores rendering type information for previously crawled URLs and predicts the rendering type for URLs that have yet to be crawled and recommends when rendering type detection should be performed.
*
* @experimental
*/
export declare class RenderingTypePredictor {
private detectionRatio;
private state;
constructor({ detectionRatio, persistenceOptions }: RenderingTypePredictorOptions);
/**
* Initialize the predictor by restoring persisted state.
*/
initialize(): Promise<void>;
/**
* Predict the rendering type for a given URL and request label.
*/
predict({ url, loadedUrl, label }: Request): {
renderingType: RenderingType;
detectionProbabilityRecommendation: number;
};
/**
* Store the rendering type for a given URL and request label. This updates the underlying prediction model, which may be costly.
*/
storeResult(requests: Request | Request[], renderingType: RenderingType): void;
private resultCount;
protected calculateFeatureVector(url: URLComponents, label: string | undefined): FeatureVector;
protected retrain(): void;
}
export {};
//# sourceMappingURL=rendering-type-prediction.d.ts.map