@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
151 lines (150 loc) • 5.21 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SnapshotStore = void 0;
exports.evaluateLoadSignalSample = evaluateLoadSignalSample;
const utils_1 = require("@crawlee/utils");
const utilities_1 = require("@apify/utilities");
/**
* A time-pruning, time-windowed store for `LoadSnapshot` values.
* Signals compose with this instead of inheriting from a base class.
*/
class SnapshotStore {
constructor(historyMillis = 30000) {
Object.defineProperty(this, "snapshots", {
enumerable: true,
configurable: true,
writable: true,
value: []
});
Object.defineProperty(this, "historyMillis", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.historyMillis = historyMillis;
}
/**
* Add a snapshot and prune entries older than the history window.
*/
push(snapshot, now = snapshot.createdAt) {
// Inline pruning to avoid private-method transpilation issues
let oldCount = 0;
for (let i = 0; i < this.snapshots.length; i++) {
const { createdAt } = this.snapshots[i];
if (now.getTime() - new Date(createdAt).getTime() > this.historyMillis)
oldCount++;
else
break;
}
if (oldCount)
this.snapshots.splice(0, oldCount);
this.snapshots.push(snapshot);
}
/**
* Return all snapshots, or only those within the given time window.
*/
getSample(sampleDurationMillis) {
if (!sampleDurationMillis)
return this.snapshots;
const sample = [];
let idx = this.snapshots.length;
if (!idx)
return sample;
const latestTime = this.snapshots[idx - 1].createdAt;
while (idx--) {
const snapshot = this.snapshots[idx];
if (+latestTime - +snapshot.createdAt <= sampleDurationMillis) {
sample.unshift(snapshot);
}
else {
break;
}
}
return sample;
}
/**
* Direct access to the underlying array (for backward-compat getters).
*/
getAll() {
return this.snapshots;
}
/**
* Create a `LoadSignal` that snapshots on a `betterSetInterval` tick.
*
* The `handler` receives the store (to read previous snapshots) and the
* interval callback (which it **must** call when done). It should call
* `store.push()` to record a snapshot.
*/
static fromInterval(options) {
const store = new SnapshotStore(options.snapshotHistoryMillis);
let interval = null;
const handle = (cb) => options.handler(store, cb);
return {
name: options.name,
overloadedRatio: options.overloadedRatio,
store,
handle,
getSample: (ms) => store.getSample(ms),
async start() {
interval = (0, utilities_1.betterSetInterval)(handle, options.intervalMillis);
},
async stop() {
(0, utilities_1.betterClearInterval)(interval);
},
};
}
/**
* Create a `LoadSignal` that snapshots in response to an `EventManager` event.
*
* The `handler` receives the event payload and the store. It should call
* `store.push()` to record a snapshot.
*/
static fromEvent(options) {
const store = new SnapshotStore(options.snapshotHistoryMillis);
const handle = (payload) => options.handler(store, payload);
return {
name: options.name,
overloadedRatio: options.overloadedRatio,
store,
handle,
getSample: (ms) => store.getSample(ms),
async start() {
options.events.on(options.event, handle);
},
async stop() {
options.events.off(options.event, handle);
},
};
}
}
exports.SnapshotStore = SnapshotStore;
/**
* Evaluate whether a sample of `LoadSnapshot` values exceeds the given
* overloaded ratio, using a time-weighted average. This is the shared
* evaluation logic used by `SystemStatus` for all signal types.
*/
function evaluateLoadSignalSample(sample, overloadedRatio) {
if (sample.length === 0) {
return {
isOverloaded: false,
limitRatio: overloadedRatio,
actualRatio: 0,
};
}
const weights = [];
const values = [];
for (let i = 1; i < sample.length; i++) {
const previous = sample[i - 1];
const current = sample[i];
const weight = +current.createdAt - +previous.createdAt;
weights.push(weight || 1); // Prevent errors from 0ms long intervals (sync) between snapshots.
values.push(+current.isOverloaded);
}
const wAvg = sample.length === 1 ? +sample[0].isOverloaded : (0, utils_1.weightedAvg)(values, weights);
return {
isOverloaded: wAvg > overloadedRatio,
limitRatio: overloadedRatio,
actualRatio: Math.round(wAvg * 1000) / 1000,
};
}