ts-webcrawler
Version:
A typescript webcrawler library for downloading and parsing webpages
48 lines (47 loc) • 1.32 kB
TypeScript
import { Page } from '../Class/Page';
import { Asset } from '../Class/Asset';
import { Url } from '../Class/Url';
export declare class BaseCrawler {
private _settings;
private _pages;
private _assets;
private _pagesCrawled;
private _assetsCrawled;
private _pagesToFollow;
private _running;
private _activeThreads;
private hooks;
constructor(settings: CrawlerSettings);
toString(): string;
getCrawledPages(): Page[];
getCrawledAssets(): Asset[];
getPages(): Page[];
getAssets(): Asset[];
run(hooks: CrawlerHooks): void;
/**
* Crawls the starting page's robots.txt and sitemap.xml files.
* Calls the appropriate hooks when they are loaded.
*/
private crawlingMeta;
private crawlingPageRunner;
private crawlingAssetRunner;
private startPageThread;
private startAssetThread;
private canPageCrawl;
private canAssetCrawl;
canRun(): boolean;
/**
* Kill the crawler (stop all activity)
*/
kill(): void;
/**
* Enqueue a page to be crawled
* @param url Url to enqueue
*/
enquequePage(url: Url): void;
/**
* Enqueue an asset to be crawled
* @param url Url to enqueue
*/
enquequeAsset(url: Url): void;
}