website-scrap-engine
Version:
Configurable website scraper in typescript
124 lines • 4.94 kB
JavaScript
import { Worker } from 'node:worker_threads';
import * as logger from '../logger/logger.js';
import { WorkerMessageType } from './types.js';
export class WorkerInfoImpl {
constructor(worker) {
this.worker = worker;
this.load = 0;
this.id = worker.threadId;
}
}
function defaultWorkerFactory(filename, options) {
return new Worker(filename, options);
}
export class WorkerPool {
constructor(coreSize, workerScript, workerData, maxLoad = -1, factory = defaultWorkerFactory) {
this.coreSize = coreSize;
this.workerScript = workerScript;
this.workerData = workerData;
this.maxLoad = maxLoad;
this.factory = factory;
this.workers = [];
this.pendingTasks = [];
this.workingTasks = {};
this.taskIdCounter = 0;
const ready = [];
for (let i = 0; i < coreSize; i++) {
this.workers[i] = new WorkerInfoImpl(factory(workerScript, { workerData }));
this.workers[i].worker.addListener('message', msg => this.onMessage(this.workers[i], msg));
this.workers[i].worker.addListener('error', err => this.workerOnError(this.workers[i], err));
ready.push(new Promise(resolve => this.workers[i].worker.addListener('online', resolve)));
}
this.ready = Promise.all(ready).then(() => undefined);
}
workerOnError(info, err) {
logger.error.error('worker error', info.id, err);
}
onMessage(info, message) {
if (message.type === WorkerMessageType.Complete) {
this.complete(info, message);
}
this.takeLog(info, message);
}
takeLog(info, message) {
var _a, _b, _c, _d, _e;
if (!(message === null || message === void 0 ? void 0 : message.body)) {
logger.error.warn('Invalid formatted log', info.id);
return;
}
const content = (_a = message === null || message === void 0 ? void 0 : message.body) === null || _a === void 0 ? void 0 : _a.content;
if (content === null || content === void 0 ? void 0 : content.length) {
(_c = (_b = logger === null || logger === void 0 ? void 0 : logger[message.body.logger]) === null || _b === void 0 ? void 0 : _b[message.body.level]) === null || _c === void 0 ? void 0 : _c.call(_b, info.id, ...content);
}
else {
(_e = (_d = logger === null || logger === void 0 ? void 0 : logger[message.body.logger]) === null || _d === void 0 ? void 0 : _d[message.body.level]) === null || _e === void 0 ? void 0 : _e.call(_d, info.id);
}
}
complete(info, message) {
--info.load;
setImmediate(() => this.nextTask());
const pending = this.workingTasks[message.taskId];
delete this.workingTasks[message.taskId];
if (!pending)
return;
pending.resolve(message);
}
submitTask(taskBody, transferList) {
return new Promise((resolve, reject) => {
const task = {
taskId: ++this.taskIdCounter,
resolve,
reject,
body: taskBody,
transferList
};
this.pendingTasks.push(task);
setImmediate(() => this.nextTask());
});
}
nextTask() {
if (!this.pendingTasks.length) {
return;
}
// hopefully there would not be too many workers
const sorted = this.workers.sort((a, b) => a.load - b.load);
for (let i = 0, l = sorted.length, ll = l - 1, n, curr; i < l; i++) {
curr = sorted[i];
n = i + 1;
while ((this.maxLoad <= 0 || curr.load < this.maxLoad) &&
(i == ll || curr.load <= sorted[n].load)) {
const task = this.pendingTasks.shift();
if (!task) {
break;
}
try {
curr.worker.postMessage({
taskId: task.taskId,
body: task.body
}, task.transferList);
this.workingTasks[task.taskId] = task;
}
catch (e) {
delete this.workingTasks[task.taskId];
task.reject(e);
continue;
}
++sorted[i].load;
}
if (!this.pendingTasks.length) {
break;
}
}
}
async dispose() {
const numbers = await Promise.all(this.workers.map(info => info.worker.terminate()));
for (const taskId in this.workingTasks) {
// noinspection JSUnfilteredForInLoop
this.workingTasks[taskId].reject(new Error('disposed'));
// noinspection JSUnfilteredForInLoop
delete this.workingTasks[taskId];
}
return numbers;
}
}
//# sourceMappingURL=worker-pool.js.map