backendless-coderunner
Version:
Backendless CodeRunner for Node.js
622 lines (467 loc) • 15.9 kB
JavaScript
'use strict'
const cluster = require('cluster')
const EventEmitter = require('events')
const OS = require('os')
const path = require('path')
const ServerCodeEvents = require('../events')
const logger = require('../../util/logger')
const LongWaiter = require('../../util/long-waiter')
const WORKER_TEARDOWN_TIME = 2000 //2 seconds
const WARN_STATUS_TIMEOUT = 5000 //5 seconds
const Events = {
READY_FOR_NEXT_TASK: 'READY_FOR_NEXT_TASK',
TASK_PROCESSED : 'TASK_PROCESSED',
}
const Status = {
CRITICAL: 'critical',
WARNING : 'warning',
GOOD : 'good',
}
const StatusThresholds = {
[Status.CRITICAL]: 0.9,
[Status.WARNING] : 0.7,
}
class WorkersBroker extends EventEmitter {
constructor(options, codeRunnerOptions) {
super()
cluster.setupMaster({
exec : path.resolve(__dirname, './cloud-worker.js'),
args : [],
silent: !!logger.winston
})
this.status = Status.GOOD
this.WORKER_RUN_OPTIONS = JSON.stringify(codeRunnerOptions)
this.options = options
this.rateLimit = codeRunnerOptions.rateLimit
this.defaultHeartbeatTimeout = this.options.heartbeat.timeout * 1000
this.codeInspectionHeartbeatTimeout = this.options.heartbeat.codeInspectionTimeout * 1000
this.concurrentWorkersLimit = this.options.concurrent || OS.cpus().length
this.minIdleWorkersCount = this.options.minIdle || 0
this.minIdleWorkersCount = Math.max(this.minIdleWorkersCount, 0)
this.minIdleWorkersCount = Math.min(this.minIdleWorkersCount, this.concurrentWorkersLimit)
if (this.options.cache) {
if (this.options.cache === true) {
this.options.cache = { limit: 4 * this.concurrentWorkersLimit }
}
this.cachedWorkersLimit = this.options.cache.limit
if (this.options.cachedIdleTTL) {
this.cachedWorkersIdleTTL = this.options.cachedIdleTTL * 1000
}
}
this.startingWorkersCount = 0
this.idleWorkers = []
this.cachedWorkers = []
this.busyWorkers = []
this.appsLoad = {}
cluster.on('exit', this.onWorkerExit.bind(this))
cluster.on('message', this.onWorkerMessage.bind(this))
this.keepMinIdleWorkersCount()
if (options.heartbeat.enabled) {
this.startWorkersHeartbeatTimer()
}
this.startStatusWatcher()
}
keepMinIdleWorkersCount() {
const wantingWorkersCount = this.minIdleWorkersCount - this.idleWorkers.length
const allowedWorkersCount = this.concurrentWorkersLimit - this.getStartedWorkersCount()
const requiredWorkersCount = Math.min(wantingWorkersCount, allowedWorkersCount) - this.startingWorkersCount
if (requiredWorkersCount > 0) {
for (let i = 0; i < requiredWorkersCount; i++) {
this.startNewWorker()
}
}
}
async stop() {
clearInterval(this.statusWatcherTimer)
logger.info('Wait until each worker finished processing its current task')
this.stopper = new LongWaiter(() => {
logger.info(`remains ${ this.busyWorkers.length } busy workers`)
return !this.busyWorkers.length
})
await this.stopper.wait()
logger.info('pull of the busy workers is empty')
}
isCacheEnabled() {
return !!this.cachedWorkersLimit
}
startWorkersHeartbeatTimer() {
setInterval(() => {
getAllWorkers().forEach(worker => {
const heartbeatTimeout = this.getWorkerHeartbeatTimeout(worker)
if (worker.heartbeat + heartbeatTimeout < Date.now()) {
this.killWorker(worker, (
`Worker expired due to heartbeat timeout (${ heartbeatTimeout }ms), ` +
`the last ping was ${ timeFromLastPing(worker) }ms ago` +
` [${ getKilledWorkerDetails(worker) }]`
))
}
})
}, this.defaultHeartbeatTimeout)
}
getWorkerHeartbeatTimeout(worker) {
if (worker.task && worker.task.actionType === 'ANALYSE_SERVER_CODE') {
return this.codeInspectionHeartbeatTimeout
}
return this.defaultHeartbeatTimeout
}
updateStatus() {
const currentWorkersLoad = this.getCurrentLoad()
if (currentWorkersLoad > StatusThresholds[Status.CRITICAL]) {
this.status = Status.CRITICAL
} else if (currentWorkersLoad > StatusThresholds[Status.WARNING]) {
this.status = Status.WARNING
} else {
this.status = Status.GOOD
}
}
startStatusWatcher() {
let delayShowingMessage = null
this.statusWatcherTimer = setInterval(() => {
this.updateStatus()
if (this.status !== Status.GOOD) {
if (delayShowingMessage) {
const startedTimeAgo = Date.now() - delayShowingMessage
if (startedTimeAgo > WARN_STATUS_TIMEOUT) {
logger.warn(
`CodeRunner status is [${ this.status }] ` +
`started ${ Math.floor(startedTimeAgo / 1000) } seconds ago ${ JSON.stringify(this.getStats()) }`
)
}
} else {
delayShowingMessage = Date.now()
}
} else {
delayShowingMessage = null
}
}, 1000)
}
async startNewWorker() {
const time = timeMarker()
this.startingWorkersCount++
const worker = cluster.fork({
RUN_OPTIONS: this.WORKER_RUN_OPTIONS
})
worker.heartbeat = Date.now()
try {
await new Promise(resolve => {
function onMessageFromWorker(message) {
if (message === 'started') {
worker.process.removeListener('message', onMessageFromWorker)
resolve()
}
}
worker.process.on('message', onMessageFromWorker)
if (logger.winston && !logger.ignoreWorkersLog) {
worker.process.stdout
.pipe(logger.winston.createLogStream('info'))
.pipe(logger.winston)
worker.process.stderr
.pipe(logger.winston.createLogStream('error'))
.pipe(logger.winston)
}
})
worker.startedIn = time()
logger.info(`[${ worker.process.pid }] Worker started in ${ worker.startedIn }`)
this.relocateWorker(worker, this.idleWorkers)
} catch (error) {
this.killWorker(worker, error.message)
}
this.startingWorkersCount--
}
processTask(task, worker) {
if (task.timeout) {
this.createExpirationTimer(worker, task.timeout)
}
if (!this.isCacheEnabled()) {
task.cacheable = false
}
task.workerPID = worker.process.pid
worker.task = task
worker.process.send({ task })
delete worker.startIdlingTime
this.relocateWorker(worker, this.busyWorkers)
}
createExpirationTimer(worker, timeout) {
worker.expireTimer = setTimeout(() => {
this.killWorker(worker, `Worker expired due to task timeout (${ timeout }ms)`)
}, timeout + WORKER_TEARDOWN_TIME)
}
destroyExpirationTimer(worker) {
if (worker.expireTimer) {
clearTimeout(worker.expireTimer)
delete worker.expireTimer
}
}
createIdlingTimer(worker) {
if (this.cachedWorkersIdleTTL) {
worker.startIdlingTime = Date.now()
worker.idlingTimer = setInterval(() => {
if (this.cachedWorkers.length > this.minIdleWorkersCount) {
const idlingTime = Math.floor((Date.now() - worker.startIdlingTime) / 1000)
this.killWorker(
worker,
`Destroy the cached worker because if was idling more than ${ idlingTime } seconds`
)
logger.info(`After killing cached idle worker... (${ this.getStatsString() })`)
}
}, this.cachedWorkersIdleTTL)
}
}
destroyIdlingTimer(worker) {
if (worker.idlingTimer) {
clearTimeout(worker.idlingTimer)
delete worker.idlingTimer
}
delete worker.startIdlingTime
}
getCurrentLoad() {
return this.busyWorkers.length / this.concurrentWorkersLimit
}
getAvailableWorkersCount() {
return this.concurrentWorkersLimit - this.busyWorkers.length
}
isAppRateLimitReached(appId) {
if (!this.rateLimit.enabled) {
return false
}
const limit = this.getAppRateLimit(appId)
const current = this.appsLoad[appId]
const isReached = current >= limit
logger[isReached ? 'info' : 'debug'](
`Rate limit for app ${ appId } ${ isReached ? 'is' : 'is not' } reached, current=${ current }, limit=${ limit }`
)
return isReached
}
getAppRateLimit(appId) {
if (this.rateLimit.overrides[appId]) {
return this.rateLimit.overrides[appId]
}
return this.rateLimit.default
}
getStartedWorkersCount() {
return Object.keys(cluster.workers).length
}
async getWorkerForTask(appId, isTaskCacheable) {
const isCacheEnabled = this.isCacheEnabled()
if (isCacheEnabled && isTaskCacheable) {
const cachedWorker = this.cachedWorkers.find(worker => worker.appId === appId)
if (cachedWorker) {
return cachedWorker
}
}
const idleWorker = this.idleWorkers.pop()
if (idleWorker) {
if (isTaskCacheable) {
idleWorker.appId = appId
}
return idleWorker
}
if (isCacheEnabled && this.cachedWorkersLimit <= this.cachedWorkers.length) {
const leastActiveWorker = this.cachedWorkers.pop()
if (leastActiveWorker) {
this.killWorker(leastActiveWorker, 'Killed the least active cached worker, because cached pool is full')
}
}
await this.startNewWorker()
return this.getWorkerForTask(appId, isTaskCacheable)
}
relocateWorker(worker, newPlace) {
const oldPlace = worker.currentPlace
const fromBusyWorkers = oldPlace === this.busyWorkers
const toBusyWorkers = newPlace === this.busyWorkers
const toCachedWorkers = newPlace === this.cachedWorkers
const fromIdleWorkers = oldPlace === this.idleWorkers
const fromCachedWorkers = oldPlace === this.cachedWorkers
if (toCachedWorkers) {
this.createIdlingTimer(worker)
} else if (fromCachedWorkers) {
this.destroyIdlingTimer(worker)
}
if (oldPlace) {
const index = oldPlace.indexOf(worker)
if (index !== -1) {
oldPlace.splice(index, 1)
}
}
worker.currentPlace = newPlace
if (newPlace) {
newPlace.unshift(worker)
}
if (fromBusyWorkers || toBusyWorkers) {
this.appsLoad[worker.appId] = (this.appsLoad[worker.appId] || 0)
this.appsLoad[worker.appId] += (toBusyWorkers ? 1 : -1)
if (this.stopper) {
this.stopper.check()
}
if (this.isAvailableForTaskPrecessing()) {
setImmediate(() => {
this.emit(Events.READY_FOR_NEXT_TASK)
})
}
}
if (!this.stopper && (fromIdleWorkers || fromBusyWorkers || toBusyWorkers)) {
this.keepMinIdleWorkersCount()
}
}
isAvailableForTaskPrecessing() {
return !this.stopper && this.busyWorkers.length < this.concurrentWorkersLimit
}
killAllAppWorkers(appId, reason) {
const workers = getAllWorkers()
workers.forEach(worker => {
if (worker.appId === appId) {
worker.toBeRemoved = true
worker.toBeRemovedReason = reason
}
})
this.cachedWorkers.forEach(worker => {
if (worker.appId === appId) {
this.killWorker(worker, reason)
}
})
}
killWorker(worker, reason) {
this.relocateWorker(worker)
if (logger.winston && !logger.ignoreWorkersLog) {
worker.process.stdout.unpipe()
worker.process.stderr.unpipe()
}
worker.killed = true
// we use this instead of worker.kill(...) because we do not need to stop it gracefully
worker.process.kill('SIGKILL')
logger.info(`[${ worker.process.pid }] Worker killed.`, reason || '')
}
getStats() {
const started = this.getStartedWorkersCount()
const load = this.getCurrentLoad() * 100
return {
status : this.status,
started,
load,
loadStr : `${ Math.floor(load * 100) / 100 }%`,
busyConcurrent : this.busyWorkers.length,
limitConcurrent : this.concurrentWorkersLimit,
availableConcurrent: this.getAvailableWorkersCount(),
starting : this.startingWorkersCount,
idle : this.idleWorkers.length,
cached : this.cachedWorkers.length,
appsLoad : this.appsLoad,
}
}
getAppsLoad() {
const result = {}
Object
.keys(this.appsLoad)
.map(appId => ([appId, this.appsLoad[appId]]))
.sort((a, b) => (a[1] - b[1]))
.forEach(([appId, value]) => (result[appId] = value))
return result
}
getStatsString() {
const {
loadStr,
busyConcurrent,
availableConcurrent,
started,
starting,
idle,
cached,
limitConcurrent
} = this.getStats()
if (this.options.debug) {
return (
`load: ${ loadStr }, ` +
`busyConcurrent: ${ busyConcurrent }, ` +
`availableConcurrent: ${ availableConcurrent }, ` +
`limitConcurrent: ${ limitConcurrent }, ` +
`started: ${ started }, ` +
`starting: ${ starting }, ` +
`idle: ${ idle }, ` +
(this.isCacheEnabled() ? `cached: ${ cached } ` : '')
)
}
return (
`load: ${ loadStr }, ` +
`busyConcurrent: ${ busyConcurrent }, ` +
`limitConcurrent: ${ limitConcurrent }, ` +
`idle: ${ idle }, ` +
(this.isCacheEnabled() ? `cached: ${ cached } ` : '')
)
}
onWorkerMessage(worker, message) {
if (message.processed) {
this.onWorkerTaskProcessed(worker, message)
} else if (message.criticalError) {
worker.toBeRemoved = true
worker.toBeRemovedReason = `The worker has been failed with a critical error: ${ message.criticalError }`
} else if (message === 'idling') {
if (worker.task.cacheable && !worker.toBeRemoved) {
delete worker.task
this.onWorkerIdling(worker)
} else {
this.killWorker(worker, worker.toBeRemovedReason)
}
} else if (message === 'heartbeat') {
this.onWorkerHeartbeat(worker)
}
}
onWorkerTaskProcessed(worker, message) {
const task = worker.task
const taskResult = message.taskResult
this.emit(Events.TASK_PROCESSED, task, taskResult)
}
onWorkerIdling(worker) {
this.destroyExpirationTimer(worker)
this.relocateWorker(worker, this.cachedWorkers)
}
onWorkerHeartbeat(worker) {
worker.heartbeat = Date.now()
}
onWorkerExit(worker) {
this.destroyExpirationTimer(worker)
if (!worker.exitedAfterDisconnect && !worker.killed) {
logger.info(`[${ worker.process.pid }] Worker exited`)
this.relocateWorker(worker)
}
}
}
function getAllWorkers() {
return Object.keys(cluster.workers).map(id => cluster.workers[id])
}
function timeMarker() {
const time = process.hrtime()
return () => {
const duration = process.hrtime(time)
const ms = duration[0] * 1000 + duration[1] / 1e6
return `${ ms.toFixed(3) }ms`
}
}
function getKilledWorkerDetails(worker) {
const { appId, task } = worker
const details = [`appId: ${ appId }`]
if (task) {
if (task.deploymentModelName) {
details.push(`model: ${ task.deploymentModelName }`)
}
if (task.className) {
const service = task.className.replace('services.', '')
details.push(`service: ${ service }`)
}
if (task.method) {
details.push(`method: ${ task.method }`)
}
if (task.eventId) {
const event = ServerCodeEvents.get(task.eventId)
details.push(`handler: ${ event.provider.id }(${ event.provider.id === 'timer' ? task.target : event.name })`)
}
if (task.provider) {
details.push(`file-path: ${ task.provider }`)
}
}
return details.join(', ')
}
function timeFromLastPing(worker) {
return worker.heartbeat && Date.now() - worker.heartbeat
}
WorkersBroker.Events = Events
module.exports = WorkersBroker