UNPKG

crawler

Version:

Crawler is a ready-to-use web spider that works with proxies, asynchrony, rate limit, configurable request pools, jQuery, and HTTP/2 support.

70 lines 2.76 kB
import { multiPriorityQueue } from "../lib/index.js"; class RateLimiter { constructor({ maxConnections, rateLimit, priorityLevels = 1, defaultPriority = 0, cluster }) { if (!Number.isInteger(maxConnections) || !Number.isInteger(rateLimit) || !Number.isInteger(priorityLevels)) { throw new Error("maxConnections, rateLimit and priorityLevels must be positive integers"); } this.maxConnections = maxConnections; this.priorityLevels = priorityLevels; this.defaultPriority = Number(defaultPriority); this.defaultPriority = Number.isInteger(defaultPriority) ? Math.min(Math.max(defaultPriority, 0), priorityLevels - 1) : Math.floor(priorityLevels / 2); this.nextRequestTime = Date.now(); this._waitingTasks = new multiPriorityQueue(priorityLevels); this._cluster = cluster; this.rateLimit = rateLimit; this.runningSize = 0; } get waitingSize() { return this._waitingTasks.size(); } hasWaitingTasks() { return this.waitingSize > 0 || (this._cluster !== void 0 && this._cluster.hasWaitingTasks()); } setId(id) { this.id = id; } setRateLimit(rateLimit) { if (!Number.isInteger(rateLimit) || rateLimit < 0) { throw new Error("rateLimit must be non negative integers"); } this.rateLimit = rateLimit; if (this.rateLimit > 0) this.maxConnections = 1; } submit(options, task) { let priority = typeof options === "number" ? options : options.priority; priority = Number.isInteger(priority) ? priority : this.defaultPriority; priority = Math.min(priority, this.priorityLevels - 1); this._waitingTasks.enqueue(task, priority); this._schedule(); } _schedule() { if (this.runningSize < this.maxConnections && this.hasWaitingTasks()) { ++this.runningSize; const delay = Math.max(this.nextRequestTime - Date.now(), 0); this.nextRequestTime = Date.now() + delay + this.rateLimit; const { next, rateLimiterId } = this.dequeue(); setTimeout(() => { const done = () => { --this.runningSize; this._schedule(); }; next(done, rateLimiterId); }, delay); } } directDequeue() { return this._waitingTasks.dequeue(); } dequeue() { if (this.waitingSize) { return { next: this._waitingTasks.dequeue(), rateLimiterId: undefined, }; } return this._cluster?.dequeue(); } } export default RateLimiter; //# sourceMappingURL=rateLimiter.js.map