task-farmer
Version:
A simple multi-core task scheduler that works well with promises. Great for doing parallel data processing.
349 lines • 16.2 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
Object.defineProperty(exports, "__esModule", { value: true });
var cluster = require("cluster");
var uuid_1 = require("uuid");
var task_1 = require("./task");
//
// An implementation of a scheduler that creates worker processes for running tasks.
// Uses the Node.js cluster module.
//
var ClusterScheduler = /** @class */ (function () {
function ClusterScheduler(numWorkers, options) {
//
// A list of all worker processes created.
//
this.workers = [];
//
// A lookup table of workers by unique ID.
//
this.workerMap = {};
//
// A queue of tasks to be run.
//
this.taskQueue = [];
//
// Tasks currenlty executing.
//
this.pendingTasks = {};
//
// Records if I'm master or worker.
//
this.whoami = "unknown";
this.numWorkers = numWorkers;
this.enableVerboseLogging = options && options.verbose || false;
this.maxAllocations = options && options.maxAllocations || 1;
}
//
// Call this function to initialize the cluster and fork worker processes.
// Calls user-defined "mainFn" to run in the master process.
//
ClusterScheduler.prototype.init = function (mainFn) {
return __awaiter(this, void 0, void 0, function () {
var workerIndex, workerId, worker;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
if (!cluster.isMaster) return [3 /*break*/, 2];
this.whoami = "MASTER";
// Running on the master.
this.verbose("Starting " + this.numWorkers + " worker processes.");
for (workerIndex = 0; workerIndex < this.numWorkers; ++workerIndex) {
workerId = uuid_1.v4();
worker = cluster.fork({
WORKER_ID: workerId,
WORKER_INDEX: workerIndex,
});
this.trackWorker(workerIndex, workerId, worker, this.maxAllocations);
}
return [4 /*yield*/, mainFn()];
case 1:
_a.sent();
this.shutdownWorkers();
this.verbose("Master done.");
return [3 /*break*/, 3];
case 2:
this.whoami = "WORKER[" + process.env.WORKER_INDEX + "]";
this.initWorker();
this.verbose("Worker online.");
_a.label = 3;
case 3: return [2 /*return*/];
}
});
});
};
//
// Run a task when possible and resolve promise when completed.
// Rejects the promise if the task throws an error.
//
ClusterScheduler.prototype.runTask = function (inputs, task) {
if (cluster.isWorker) {
return this.sendTask(task, inputs);
}
else {
var taskId = task.getTaskId();
var taskName = task.getTaskDef().getTaskName();
return this.queueTask(inputs, taskId, taskName);
}
};
//
// Send a task to the master to be queued.
//
ClusterScheduler.prototype.sendTask = function (task, inputs) {
var _this = this;
if (!cluster.isWorker) {
throw new Error("Expect sendTask to only run on a worker.");
}
var taskId = task.getTaskId();
var taskName = task.getTaskDef().getTaskName();
this.verbose("Sending task " + taskName + " (" + taskId + ") to master.");
// Track the task so that the promise can be resolved once it's done.
var taskPromise = new Promise(function (resolve, reject) {
_this.pendingTasks[taskId] = {
inputs: inputs,
taskId: taskId,
taskName: taskName,
resolve: resolve,
reject: reject,
};
});
// Send the task to the master.
process.send({
type: "queue-task",
inputs: inputs,
taskId: taskId,
taskName: taskName,
workerId: process.env.WORKER_ID,
});
return taskPromise;
};
//
// Queue a task on the master.
//
ClusterScheduler.prototype.queueTask = function (inputs, taskId, taskName) {
var _this = this;
if (!cluster.isMaster) {
throw new Error("Expect queueTask to only run on the master.");
}
this.verbose("Queuing task " + taskName + " (" + taskId + ") on master.");
var taskPromise = new Promise(function (resolve, reject) {
_this.taskQueue.push({
inputs: inputs,
taskId: taskId,
taskName: taskName,
resolve: resolve,
reject: reject,
});
});
this.scheduleTasks(); // Run tasks if workers are currently free.
return taskPromise;
};
//
// Optional verbose logging.
//
ClusterScheduler.prototype.verbose = function (msg) {
if (this.enableVerboseLogging) {
console.log(this.whoami + ": " + msg);
}
};
//
// Track a worker process that was created.
//
ClusterScheduler.prototype.trackWorker = function (workerIndex, workerId, worker, maxAllocations) {
var _this = this;
this.workers.push(worker);
this.workerMap[workerId] = {
workerIndex: workerIndex,
workerId: workerId,
allocations: 0,
maxAllocations: maxAllocations,
worker: worker,
};
worker.on("message", function (msg) {
if (msg.type === "task-complete") { // Worker notifying master that a task has completed.
_this.workerMap[workerId].allocations -= 1;
var taskRecord = _this.pendingTasks[msg.taskId];
_this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has completed.");
delete _this.pendingTasks[msg.taskId];
taskRecord.resolve(msg.result); // Resolve the task's promise.
_this.scheduleTasks(); // Worker is now free, schedule more tasks.
}
else if (msg.type === "task-error") { // Worker notifying master that a task has thrown an error.
_this.workerMap[workerId].allocations -= 1;
var taskRecord = _this.pendingTasks[msg.taskId];
_this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has thrown error:");
_this.verbose(msg.error);
delete _this.pendingTasks[msg.taskId];
taskRecord.reject(msg.error); // Reject the task's promise.
_this.scheduleTasks(); // Worker is now free, schedule more tasks.
}
else if (msg.type === "queue-task") { // Worker requesting master to queue a task.
_this.workerMap[msg.workerId].maxAllocations += 1; // When a worker requests a task be queued we increase its max allocations by 1 to help avoid deadlocks.
_this.queueTask(msg.inputs, msg.taskId, msg.taskName)
.then(function (result) {
worker.send({
type: "task-complete",
taskId: msg.taskId,
result: result,
});
})
.catch(function (err) {
worker.send({
type: "task-error",
taskId: msg.taskId,
error: err && err.stack || err.toString(),
});
})
.then(function () {
_this.workerMap[msg.workerId].maxAllocations -= 1;
});
}
else {
throw new Error("Unrecognised message " + msg.type + " from worker.");
}
});
};
//
// Code to run in the work process to initalized.
//
ClusterScheduler.prototype.initWorker = function () {
var _this = this;
process.on("message", function (msg) {
if (msg.type === "exit") { // Master has instructed the worker to shutdown.
_this.verbose("Exiting worker.");
process.exit(0);
}
else if (msg.type === "run-task") { // Master has instructed the worker to run a task.
_this.verbose("Running task " + msg.taskName + " (" + msg.taskId + ") on worker.");
var taskDef = task_1.Task.lookup(msg.taskName); // Look up the task by name.
var taskFn = taskDef.getTaskFn();
taskFn.apply(void 0, msg.inputs.concat([_this])).then(function (result) {
_this.verbose("Task " + msg.taskName + " (" + msg.taskId + ") has completed on worker.");
process.send({
type: "task-complete",
taskId: msg.taskId,
result: result,
workerId: process.env.WORKER_ID,
});
})
.catch(function (err) {
_this.verbose("Task " + msg.taskName + " (" + msg.taskId + ") has errored on worker.");
_this.verbose(err && err.stack || err.toString());
process.send({
type: "task-error",
taskId: msg.taskId,
error: err && err.stack || err.toString(),
workerId: process.env.WORKER_ID,
});
});
}
else if (msg.type === "task-complete") { // Master telling process that a task it queued has completed.
var taskRecord = _this.pendingTasks[msg.taskId];
_this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has completed.");
delete _this.pendingTasks[msg.taskId];
taskRecord.resolve(msg.result); // Resolve the task's promise.
}
else if (msg.type === "task-error") { // Master telling process that a task it queued has errorred.
var taskRecord = _this.pendingTasks[msg.taskId];
_this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has thrown error:");
_this.verbose(msg.error);
delete _this.pendingTasks[msg.taskId];
taskRecord.reject(msg.error); // Reject the task's promise.
}
else {
throw new Error("Unrecognised message " + msg.type + " from master.");
}
});
};
//
// Run a task if there is a task to run and there is a worker available to run it.
//
ClusterScheduler.prototype.scheduleTask = function () {
var _this = this;
if (!cluster.isMaster) {
throw new Error("Can only schedule tasks on the master.");
}
if (this.taskQueue.length <= 0) {
// No tasks to be executed.
return false;
}
var freeWorkers = Object.keys(this.workerMap)
.filter(function (workerId) { return _this.workerMap[workerId].allocations < _this.workerMap[workerId].maxAllocations; })
.map(function (workerId) { return _this.workerMap[workerId]; });
if (freeWorkers.length <= 0) {
// No worker is available.
this.verbose("Task are ready, no workers are free.");
return false;
}
// Sort workers so that workers with less allocations come first.
freeWorkers.sort(function (worker1, worker2) { return worker1.allocations - worker2.allocations; });
var nextTask = this.taskQueue.shift(); // Remove the next task.
this.pendingTasks[nextTask.taskId] = nextTask;
var nextFreeWorker = freeWorkers[0]; // Get the next free worker.
++nextFreeWorker.allocations;
this.verbose("Scheduling task " + nextTask.taskName + " (" + nextTask.taskId + ") on worker.");
nextFreeWorker.worker.send({
type: "run-task",
taskId: nextTask.taskId,
taskName: nextTask.taskName,
inputs: nextTask.inputs,
});
return true;
};
//
// Schedule one or more tasks to run on worker processes.
//
ClusterScheduler.prototype.scheduleTasks = function () {
while (true) { // Keep scheduling tasks as long as there are tasks to run and workers free.
if (!this.scheduleTask()) {
break; // No more tasks or no more workers. Just have to wait now.
}
}
};
//
// Shutdown all the workers.
//
ClusterScheduler.prototype.shutdownWorkers = function () {
for (var _i = 0, _a = this.workers; _i < _a.length; _i++) {
var worker = _a[_i];
worker.send({ type: "exit" }); // Instruct workers to shutdown.
}
};
return ClusterScheduler;
}());
exports.ClusterScheduler = ClusterScheduler;
//# sourceMappingURL=cluster-scheduler.js.map