UNPKG

task-farmer

Version:

A simple multi-core task scheduler that works well with promises. Great for doing parallel data processing.

349 lines 16.2 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; Object.defineProperty(exports, "__esModule", { value: true }); var cluster = require("cluster"); var uuid_1 = require("uuid"); var task_1 = require("./task"); // // An implementation of a scheduler that creates worker processes for running tasks. // Uses the Node.js cluster module. // var ClusterScheduler = /** @class */ (function () { function ClusterScheduler(numWorkers, options) { // // A list of all worker processes created. // this.workers = []; // // A lookup table of workers by unique ID. // this.workerMap = {}; // // A queue of tasks to be run. // this.taskQueue = []; // // Tasks currenlty executing. // this.pendingTasks = {}; // // Records if I'm master or worker. // this.whoami = "unknown"; this.numWorkers = numWorkers; this.enableVerboseLogging = options && options.verbose || false; this.maxAllocations = options && options.maxAllocations || 1; } // // Call this function to initialize the cluster and fork worker processes. // Calls user-defined "mainFn" to run in the master process. // ClusterScheduler.prototype.init = function (mainFn) { return __awaiter(this, void 0, void 0, function () { var workerIndex, workerId, worker; return __generator(this, function (_a) { switch (_a.label) { case 0: if (!cluster.isMaster) return [3 /*break*/, 2]; this.whoami = "MASTER"; // Running on the master. this.verbose("Starting " + this.numWorkers + " worker processes."); for (workerIndex = 0; workerIndex < this.numWorkers; ++workerIndex) { workerId = uuid_1.v4(); worker = cluster.fork({ WORKER_ID: workerId, WORKER_INDEX: workerIndex, }); this.trackWorker(workerIndex, workerId, worker, this.maxAllocations); } return [4 /*yield*/, mainFn()]; case 1: _a.sent(); this.shutdownWorkers(); this.verbose("Master done."); return [3 /*break*/, 3]; case 2: this.whoami = "WORKER[" + process.env.WORKER_INDEX + "]"; this.initWorker(); this.verbose("Worker online."); _a.label = 3; case 3: return [2 /*return*/]; } }); }); }; // // Run a task when possible and resolve promise when completed. // Rejects the promise if the task throws an error. // ClusterScheduler.prototype.runTask = function (inputs, task) { if (cluster.isWorker) { return this.sendTask(task, inputs); } else { var taskId = task.getTaskId(); var taskName = task.getTaskDef().getTaskName(); return this.queueTask(inputs, taskId, taskName); } }; // // Send a task to the master to be queued. // ClusterScheduler.prototype.sendTask = function (task, inputs) { var _this = this; if (!cluster.isWorker) { throw new Error("Expect sendTask to only run on a worker."); } var taskId = task.getTaskId(); var taskName = task.getTaskDef().getTaskName(); this.verbose("Sending task " + taskName + " (" + taskId + ") to master."); // Track the task so that the promise can be resolved once it's done. var taskPromise = new Promise(function (resolve, reject) { _this.pendingTasks[taskId] = { inputs: inputs, taskId: taskId, taskName: taskName, resolve: resolve, reject: reject, }; }); // Send the task to the master. process.send({ type: "queue-task", inputs: inputs, taskId: taskId, taskName: taskName, workerId: process.env.WORKER_ID, }); return taskPromise; }; // // Queue a task on the master. // ClusterScheduler.prototype.queueTask = function (inputs, taskId, taskName) { var _this = this; if (!cluster.isMaster) { throw new Error("Expect queueTask to only run on the master."); } this.verbose("Queuing task " + taskName + " (" + taskId + ") on master."); var taskPromise = new Promise(function (resolve, reject) { _this.taskQueue.push({ inputs: inputs, taskId: taskId, taskName: taskName, resolve: resolve, reject: reject, }); }); this.scheduleTasks(); // Run tasks if workers are currently free. return taskPromise; }; // // Optional verbose logging. // ClusterScheduler.prototype.verbose = function (msg) { if (this.enableVerboseLogging) { console.log(this.whoami + ": " + msg); } }; // // Track a worker process that was created. // ClusterScheduler.prototype.trackWorker = function (workerIndex, workerId, worker, maxAllocations) { var _this = this; this.workers.push(worker); this.workerMap[workerId] = { workerIndex: workerIndex, workerId: workerId, allocations: 0, maxAllocations: maxAllocations, worker: worker, }; worker.on("message", function (msg) { if (msg.type === "task-complete") { // Worker notifying master that a task has completed. _this.workerMap[workerId].allocations -= 1; var taskRecord = _this.pendingTasks[msg.taskId]; _this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has completed."); delete _this.pendingTasks[msg.taskId]; taskRecord.resolve(msg.result); // Resolve the task's promise. _this.scheduleTasks(); // Worker is now free, schedule more tasks. } else if (msg.type === "task-error") { // Worker notifying master that a task has thrown an error. _this.workerMap[workerId].allocations -= 1; var taskRecord = _this.pendingTasks[msg.taskId]; _this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has thrown error:"); _this.verbose(msg.error); delete _this.pendingTasks[msg.taskId]; taskRecord.reject(msg.error); // Reject the task's promise. _this.scheduleTasks(); // Worker is now free, schedule more tasks. } else if (msg.type === "queue-task") { // Worker requesting master to queue a task. _this.workerMap[msg.workerId].maxAllocations += 1; // When a worker requests a task be queued we increase its max allocations by 1 to help avoid deadlocks. _this.queueTask(msg.inputs, msg.taskId, msg.taskName) .then(function (result) { worker.send({ type: "task-complete", taskId: msg.taskId, result: result, }); }) .catch(function (err) { worker.send({ type: "task-error", taskId: msg.taskId, error: err && err.stack || err.toString(), }); }) .then(function () { _this.workerMap[msg.workerId].maxAllocations -= 1; }); } else { throw new Error("Unrecognised message " + msg.type + " from worker."); } }); }; // // Code to run in the work process to initalized. // ClusterScheduler.prototype.initWorker = function () { var _this = this; process.on("message", function (msg) { if (msg.type === "exit") { // Master has instructed the worker to shutdown. _this.verbose("Exiting worker."); process.exit(0); } else if (msg.type === "run-task") { // Master has instructed the worker to run a task. _this.verbose("Running task " + msg.taskName + " (" + msg.taskId + ") on worker."); var taskDef = task_1.Task.lookup(msg.taskName); // Look up the task by name. var taskFn = taskDef.getTaskFn(); taskFn.apply(void 0, msg.inputs.concat([_this])).then(function (result) { _this.verbose("Task " + msg.taskName + " (" + msg.taskId + ") has completed on worker."); process.send({ type: "task-complete", taskId: msg.taskId, result: result, workerId: process.env.WORKER_ID, }); }) .catch(function (err) { _this.verbose("Task " + msg.taskName + " (" + msg.taskId + ") has errored on worker."); _this.verbose(err && err.stack || err.toString()); process.send({ type: "task-error", taskId: msg.taskId, error: err && err.stack || err.toString(), workerId: process.env.WORKER_ID, }); }); } else if (msg.type === "task-complete") { // Master telling process that a task it queued has completed. var taskRecord = _this.pendingTasks[msg.taskId]; _this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has completed."); delete _this.pendingTasks[msg.taskId]; taskRecord.resolve(msg.result); // Resolve the task's promise. } else if (msg.type === "task-error") { // Master telling process that a task it queued has errorred. var taskRecord = _this.pendingTasks[msg.taskId]; _this.verbose("Task " + taskRecord.taskName + " (" + taskRecord.taskId + ") has thrown error:"); _this.verbose(msg.error); delete _this.pendingTasks[msg.taskId]; taskRecord.reject(msg.error); // Reject the task's promise. } else { throw new Error("Unrecognised message " + msg.type + " from master."); } }); }; // // Run a task if there is a task to run and there is a worker available to run it. // ClusterScheduler.prototype.scheduleTask = function () { var _this = this; if (!cluster.isMaster) { throw new Error("Can only schedule tasks on the master."); } if (this.taskQueue.length <= 0) { // No tasks to be executed. return false; } var freeWorkers = Object.keys(this.workerMap) .filter(function (workerId) { return _this.workerMap[workerId].allocations < _this.workerMap[workerId].maxAllocations; }) .map(function (workerId) { return _this.workerMap[workerId]; }); if (freeWorkers.length <= 0) { // No worker is available. this.verbose("Task are ready, no workers are free."); return false; } // Sort workers so that workers with less allocations come first. freeWorkers.sort(function (worker1, worker2) { return worker1.allocations - worker2.allocations; }); var nextTask = this.taskQueue.shift(); // Remove the next task. this.pendingTasks[nextTask.taskId] = nextTask; var nextFreeWorker = freeWorkers[0]; // Get the next free worker. ++nextFreeWorker.allocations; this.verbose("Scheduling task " + nextTask.taskName + " (" + nextTask.taskId + ") on worker."); nextFreeWorker.worker.send({ type: "run-task", taskId: nextTask.taskId, taskName: nextTask.taskName, inputs: nextTask.inputs, }); return true; }; // // Schedule one or more tasks to run on worker processes. // ClusterScheduler.prototype.scheduleTasks = function () { while (true) { // Keep scheduling tasks as long as there are tasks to run and workers free. if (!this.scheduleTask()) { break; // No more tasks or no more workers. Just have to wait now. } } }; // // Shutdown all the workers. // ClusterScheduler.prototype.shutdownWorkers = function () { for (var _i = 0, _a = this.workers; _i < _a.length; _i++) { var worker = _a[_i]; worker.send({ type: "exit" }); // Instruct workers to shutdown. } }; return ClusterScheduler; }()); exports.ClusterScheduler = ClusterScheduler; //# sourceMappingURL=cluster-scheduler.js.map