UNPKG

spider-node

Version:

爬虫工具,根据传入的配置和规则爬取数据

102 lines (97 loc) 2.62 kB
import Rule from './../src/rule'; import Spider from '../src/spider'; import request, { RequestResponse } from 'request'; export namespace ISpider { type ErrorMiddleware = ( url: string, Error: Error, config: IHttp.HttpConfig, spider: Spider ) => void; export interface SpiderHttpConfig extends IHttp.HttpConfig { maxConnect?: number; log?: boolean; delay?: number; meta?: { [key: string]: any; }; } export interface SpiderRuleConfig { name?: string; test: string | RegExp; config?: IRule.RuleConfig; parse?: IRule.RuleParse; pipeline?: IRule.RulePipeline; error?: IRule.RuleError; } type urlsFn = () => string | string[] | Set<string>; export interface Config { name?: string; rules?: Array<SpiderRuleConfig>; http?: IHttp.HttpConstructorConfig; open?: (spider: Spider) => Promise<any>; close?: (spider: Spider) => Promise<any>; downloadMiddleware?: IHttp.DownloadMiddleware[]; errorMiddleware?: ErrorMiddleware[]; log?: boolean; } } export namespace IRule { export interface RuleConfig { baseUrl?: string; include?: boolean; http?: IHttp.HttpConstructorConfig; charset?: string; maxCollect?: number; delay?: number; [key: string]: any; } type RuleError = ISpider.ErrorMiddleware; export interface RuleHttpConfig extends IHttp.HttpResultConfig { meta: { [key: string]: any; }; } type RuleParse = ( url: string, data: string | any, selector: CheerioSelector, config: RuleHttpConfig, spider: Spider ) => any; type RulePipeline = (item: any, spider: Spider) => any; } export namespace IHttp { export interface HttpRuleConfig extends IRule.RuleConfig { rule: Rule; } export interface HttpConstructorConfig extends request.CoreOptions { spider?: Spider; name?: string; url?: string; retry?: number; meta?: { [key: string]: any; }; charset?: string; [key: string]: any; } export interface HttpConfig extends HttpConstructorConfig { rule: Rule; } export interface HttpMiddlewareConfig extends HttpConfig { url: string; rootConfig: HttpConstructorConfig; } export interface HttpResultConfig extends HttpConfig { response: RequestResponse; } export interface Result { url: string; data: any; config: HttpResultConfig; } type DownloadMiddleware = ( config: HttpMiddlewareConfig ) => Promise<HttpMiddlewareConfig | false>; }