forkfriend
Version:
dead simple worker child process manager. respawn children. load balance work amongst children.
416 lines (354 loc) • 11.2 kB
JavaScript
var util = require('util')
, fork = require('child_process').fork
, through = require('through')
, balance = require('./lib/balance')
, methods
;
module.exports = function(config){
// i am a stream!!
// though i may be better represented in the future by a duplex stream.
var manager = through(function(data){
this.send(data);
});
_ext(manager,methods);
manager.config = config = config||{};
// General Config:
// maxQueue represents the maximum number of pending messages the manager will buffer while spawning new workers or waiting for pause.
// any dropped data is emitted as a drop event so you can and should handle this in your application.
manager.config.maxQueue = config.maxQueue||1000;
//
// processes will not be spawned as a rate faster than this for all scripts.
// prevents thrashing.
manager.config.respawnInterval = config.respawnInterval||500;
//
// Stream Mode Specific Config:
// by default a paused child will be allowed to stay paused for 10 minutes
// if you pass -1 for this config option the child will be allowed to pause forever.
// if you choose forever make sure you have a plan just in case one worker stops the show forever
manager.config.pausedWorkerTimeout = manager.config.lostInTheWoods = config.pausedWorkerTimeout||config.lostInTheWoods||-1;
manager.stats = {paused:0,messages:0,length:0,start:Date.now(),pauses:0,drains:0};
manager.workers = {};
manager.workerStreams = {};
manager.stopped = false;
// if end is fired i need to stop the show.
manager.on('end',function(){
this.stop();
});
manager.on('pause',function(){
manager.stats.pauses++;
manager.stats.pauseStart = Date.now();
// i am a stream now.
});
manager.on('drain',function(){
var p = Date.now()-manager.stats.pauseStart;
manager.stats.paused += p;
manager.stats.drains++;
});
return manager;
};
//manager api.
methods = {
workers:{},
workerStreams:{},
stopped:false,
send:function(data,key){
var z = this;
var toLoop = key?[key]:Object.keys(this.workers);
toLoop.forEach(function(k){
var worker = z.workers[k]
,msg
,lastWorker
;
// stats.
z.stats.messages+=1;
if(data && data.length) {
z.stats.length += data.length;
}
if(!worker) return;
if( typeof data != 'undefined' ) worker.buffer.push(data);
if(!worker.buffer.length) return;
try{
var unsent = [];
while(worker.buffer.length) {
msg = worker.buffer.shift();
var pool = z.filter(worker.process);
lastWorker = z.balance(pool,msg);
if(lastWorker) {
var buffered = !lastWorker.send(msg);
if(buffered && !lastWorker.buffering) {
z._childBuffering(k,lastWorker);
}
}
else unsent.push(msg);
}
} catch (e) {
if(lastWorker) lastWorker.kill();
worker.errors++;
worker.lastError = Date.now();
process.nextTick(function(){
z.send(msg,k);
});
}
// add any unsent to the buffer again.
if(unsent.length) worker.buffer.push.apply(worker.buffer,unsent);
if(worker.buffer.length > z.config.maxQueue){
z.emit('drop',key,worker.buffer.shift());
}
});
if(this.paused){
this.pause();
}
return !this.paused;
},
add:function(worker,args,num){
var z = this;
if(worker.forEach) {
worker.forEach(function(w){
z.add(w,args,pool);
});
return;
}
if(args && typeof args == 'number') {
num = +args;
args = [];
}
if(!isNaN(num)){
num = +num;
if(num > 0 && !isNaN(num)) {
while(num > 0) {
num--;
z.add(worker,args);
}
return true;
}
return;
}
if(!z.workers[worker]){
z.workers[worker] = {
runCount:0,
args:args,
process:[],
buffer:[],
stream:[],
errors:0,
lastFork:0,
lastError:0
};
}
var timeout = this.config.respawnInterval-(Date.now()-z.workers[worker].lastFork);
if(timeout < 0) timeout = 0;
z.workers[worker].runCount++;
z.workers[worker].lastFork = Date.now();
setTimeout(function(){
if(z.stopped) return;
if(!z.workers[worker] || z.workers[worker].runCount <= z.workers[worker].process.length){
// before this worker process was started the process count was changed.
// this is no longer needed.
z.emit('worker-aborted',worker);
return;
}
var cp = fork(worker,args);
z.workers[worker].process.push(cp);
var removed = false;
z.emit('worker',worker,args,cp);
cp.on('error',function(e){
z.emit('worker-error',e,worker,args,cp);
});
var handleExit = function(code){
// the there is a bad exit code even if the child has run exit cleanup already i want to let you know.
if(code) z.emit('worker-error',code,worker,args,cp);
if(removed) return false;
removed = true;
if(!z.workers[worker]) return;
var i = z.workers[worker].process.indexOf(cp);
z.workers[worker].process.splice(i,1);
z.emit('worker-exit',code,worker,args,cp);
z._childDrained(worker,null);
if(z.stopped) return;
z.add(worker,args)
};
cp.on('disconnect',function(){
//if i cant talk to it im just gonna kill it
//child can handle and not die if it really wants
z.emit('worker-disconnect',worker,args,cp);
cp.kill();
handleExit(0);
});
cp.on('exit',function(code){
handleExit(code);
});
cp.on('message',function(message){
if(message && message.__forkfriend){
var action = message.__forkfriend;
if(action === 'end') {
// kill and respawn child.
cp.kill();
handleExit(0);
} else if(action === 'drain') {
// this child wants more data
cp.paused = 0;
z._childDrained(worker,cp);
} else if(action === 'pause'){
// this child wants pause
cp.paused = Date.now();
z._childPaused(worker,cp);
}
} else {
z.emit('message',message,worker,cp);
if(z.workerStreams[worker]) {
z.workerStreams[worker].emit('data',message);
}
z.emit('data',message);
}
});
// drain any messages that were queued.
z.send(undefined);
},timeout);
},
remove:function(key,cp){
var z = this;
if(!z.workers[key]) return;
// if child process is not defined just remove one.
if(cp === undefined) {
cp = z.workers[key].process[0];
} else if(typeof cp == 'number'){
while(cp > 0) {
cp--;
z.remove(key);
}
return;
}
var i = z.workers[key].process.indexOf(cp);
if(z.workers[key].runCount) z.workers[key].runCount--;
if(!z.workers[key].runCount) delete z.workers[key];
if(i === -1) return;
if(z.workers[key]) z.workers[key].process.splice(i,1);
process.nextTick(function(){
cp.kill();
});
},
get:function(key){
return (this.workers[key]||{}).process;
},
stop:function(){
var z = this;
if(this.stopped) return;
this.stopped = true;
Object.keys(this.workers).forEach(function(k){
var w = z.workers[k];
w.process.forEach(function(cp,i){
z.remove(k,w.runCount);
});
});
Object.keys(z.workerStreams).forEach(function(k){
z.workerStreams[k].end();
});
if(!this.ended) this.emit('end');
},
refork:function(key,cp){
var args = this.workers[key].args;
var procs = this.workers[key].runCount;
this.remove(key,procs);
this.add(key,args,procs);
},
getWorkerStream:function(key){
var stream = false;
if(this.workers[key]){
stream = this.workerStreams[key];
if(!stream) stream = this.workerStreams[key] = through();
}
return stream;
},
_childDrained:function(worker,cp){
// when a worker is unpaused we need need to loop available workers
var paused = 0,z = this;
if(cp) cp.paused = false;
Object.keys(this.workers).forEach(function(name){
var workerData = z.workers[name];
if(name === worker) {
var pausedWorkers = 0;
pausedWorkers = z._checkWorkersPaused(workerData);
if(pausedWorkers) paused = true;
} else {
if(workerData.paused) paused = true;
}
});
if(paused && !this.paused) {
this.pause();
} else if(this.paused){
this.resume();
}
},
_childBuffering:function(worker,cp){
var workerData = this.workers[worker],z = this;
if(!cp.buffering) cp.buffering = Date.now();
if(cp.bufferingPoll) clearInterval(cp.bufferingPoll);
// yes i really have to do this. because there is no drained event for child processes ipc channel
cp.bufferingPoll = setInterval(function(){
if(cp.killed || cp.disconnected || cp.exitCode){
clearInterval(cp.bufferingPoll);
return;
}
//TODO || writeQueue is < limit
if(!cp._channel.buffering){
cp.buffering = false;
z._childDrained(worker);
}
},100);
return this._checkWorkersPaused(workerData);
},
_childPaused:function(worker,cp){
// if any worker is completely paused we have to pause the parent.
var workerData = this.workers[worker];
if(!cp.paused) cp.paused = Date.now();
return this._checkWorkersPaused(workerData);
},
_checkWorkersPaused:function(workerData){
var pausedWorkers = 0,paused = false,z = this;
workerData.process.forEach(function(cp){
if(cp.paused) {
// recover lost children
if(z.config.pausedWorkerTimeout > -1 && Date.now()-cp.paused > z.config.pausedWorkerTimeout){
cp.kill();
}
++pausedWorkers;
}
// if the ipc channel is saturated i set this flag.
else if(cp.buffering) ++pausedWorkers;
});
if(pausedWorkers == workerData.process.length) paused = true;
if(paused && !this.paused) {
this.pause();
}
return paused;
},
getStats:function(){
var elapsed = Date.now()-this.stats.start;
var report = {};
report.messagesPerMs = this.stats.messages/elapsed;
report.bytesPerMs = this.stats.length/elapsed;
report.percentPaused = (this.stats.paused?(this.stats.paused*100/elapsed):0);
report.elapsed = elapsed;
report.paused = this.stats.paused;
report.pauses = this.stats.pauses;
report.drains = this.stats.drains;
return report;
},
balance:balance,
filter:function(processes){
// right now buffering status on a child process is not evented. this sucks.
var a = [];
for(var i=0;i<processes.length;++i){
if(!processes[i].paused && !processes[i].buffering) {
a.push(processes[i]);
}
}
return a;
}
};
function _ext(o1,o2){
Object.keys(o2).forEach(function(k){
o1[k] = o2[k];
});
return o1;
}