UNPKG

crawler-ts

Version:

Lightweight crawler written in TypeScript using ES6 generators.

57 lines (56 loc) 2.12 kB
declare type ValueOrPromise<T> = T | Promise<T>; export interface Logger { info: (...args: any[]) => void; error: (...args: any[]) => void; } export interface PreParseProps<L, R> { location: L; response: R; } export interface PostParseProps<L, R, P> extends PreParseProps<L, R> { parsed: P; } /** * @type {L} The type of the locations to crawl, e.g. `URL` or `string` that represents a path. * @type {R} The type of the response at the location that is crawler, e.g. Cheerio object, file system `fs.Stats`. * @type {P} The intermediate parsed result that can be parsed from the response and generated by the crawler. */ export interface Options<L, R, P> { /** * This function should return the response for the given location. */ requester(location: L): ValueOrPromise<R | undefined>; /** * This function should return true if the crawler should parse the response, or false if not. */ shouldParse(props: PreParseProps<L, R>): ValueOrPromise<boolean>; /** * This function should parse the response and convert the response to the parsed type. */ parser(props: PreParseProps<L, R>): ValueOrPromise<P | undefined>; /** * This function should return true if the crawler should yield the parsed result, or false if not. */ shouldYield(props: PostParseProps<L, R, P>): ValueOrPromise<boolean>; /** * This function should yield all the locations to follow in the given parsed result. */ follower(props: PostParseProps<L, R, P>): AsyncGenerator<L>; /** * This function should return true if the crawler should queue the location for crawling, or false if not. */ shouldQueue(props: { location: L; origin: L; response: R; parsed: P; }): ValueOrPromise<boolean>; /** * The logger can be set to `console` to output debug information to the `console`. * * @default undefined */ logger?: Logger; } export declare function createCrawler<L, R, P>(options: Options<L, R, P>): (start: L) => AsyncGenerator<PostParseProps<L, R, P>>; export {};