UNPKG

ca-apm-probe

Version:

CA APM Node.js Agent monitors real-time health and performance of Node.js applications

243 lines (202 loc) 7.43 kB
/** * Copyright (c) 2015 CA. All rights reserved. * * This software and all information contained therein is confidential and proprietary and * shall not be duplicated, used, disclosed or disseminated in any way except as authorized * by the applicable license agreement, without the express written permission of CA. All * authorized reproductions must be marked with this language. * * EXCEPT AS SET FORTH IN THE APPLICABLE LICENSE AGREEMENT, TO THE EXTENT * PERMITTED BY APPLICABLE LAW, CA PROVIDES THIS SOFTWARE WITHOUT WARRANTY * OF ANY KIND, INCLUDING WITHOUT LIMITATION, ANY IMPLIED WARRANTIES OF * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT WILL CA BE * LIABLE TO THE END USER OR ANY THIRD PARTY FOR ANY LOSS OR DAMAGE, DIRECT OR * INDIRECT, FROM THE USE OF THIS SOFTWARE, INCLUDING WITHOUT LIMITATION, LOST * PROFITS, BUSINESS INTERRUPTION, GOODWILL, OR LOST DATA, EVEN IF CA IS * EXPRESSLY ADVISED OF SUCH LOSS OR DAMAGE. */ var proc = require('./proc'); var platform = require('os').platform(); var util = require('util'); var logger = require('./logger.js'); var last = {valid: false, all: 0, ptime: 0, utime: 0, stime: 0, uptime: 0, active: 0}; function reportMetrics(proc, user, syst, onMetric) { // There's a weird bug, on OS X if not elsewhere, either from floating-point // calculations, or clock drift, or something that's leading to negative CPU // readings in some cases; -2.960594732333751e-15 and so on. // Pretty sure this only affects the system time, but we're going to round // them all because having a negative CPU % doesn't make sense. logger.debug('reportMetrics - proc: %s, user: %s, syst: %s', proc, user, syst); if ((proc < 0) || !isFinite(proc)) { proc = 0; } if ((user < 0) || !isFinite(user)) { user = 0; } if ((syst < 0) || !isFinite(syst)) { syst = 0; } if (onMetric) { logger.debug('actually reported - proc: %s, user: %s, syst: %s', proc, user, syst); onMetric(proc, user, syst); } } function calculateMetrics(utime, stime, ptime, all, onMetric) { logger.debug('calculateMetrics - utime: %s, ptime: %s, stime: %s, all: %s', utime, ptime, stime, all); if (last.valid) { var tickDelta = (all - last.all); if (tickDelta == 0) { // no ticks have elapsed, so these readings are all invalid; just return return; } var proc = (ptime - last.ptime) / tickDelta * 100; var user = (utime - last.utime) / tickDelta * 100; var syst = (stime - last.stime) / tickDelta * 100; reportMetrics(proc, user, syst, onMetric); } else { logger.debug('last not valid, not reporting'); } last.valid = true; last.all = all; last.ptime = ptime; last.utime = utime; last.stime = stime; } // used for OS X and FreeBSD function parsePs(fields, pid, onMetric) { // Use try/catch for // - spawn because of https://github.com/joyent/node/issues/7453 // - console.error because of https://github.com/joyent/node/issues/7455 try { var ps = require('child_process').spawn('/bin/ps', ['-p', pid, '-o', fields]); } catch (err) { try { logger.error('ca-apm-probe failed to get cpu usage:', err); } catch (err) { // On OS X, sometimes you can't write to stderr without allocating a // kqueue fd. } return; } if (ps.unref) { ps.unref(); } var res = ''; ps.stdout.on('data', function(data) { res += data; }); ps.on('error', function(err) { logger.error('ca-apm-probe failed to get cpu usage:', err); }); ps.on('close', function() { var m = res.match( /ELAPSED\s*(\d*):(\d*\.\d*)\s*(\d*):(\d*\.\d*)\s*(?:(\d*)-)?(?:(\d*):)?(\d*):(\d*)/); //logger.debug('ps is:\n%s', res); if (m) { m.shift(); // toss the full match var keys = [ 'uMinutes', 'uSeconds', 'pMinutes', 'pSeconds', 'rDays', 'rHours', 'rMinutes', 'rSeconds' ]; var data = m.reduce(function(memo, val) { memo[keys.shift()] = parseFloat(val) || 0; return memo; }, {}); var utime = data.uMinutes * 60 + data.uSeconds; var ptime = data.pMinutes * 60 + data.pSeconds; var stime = ptime - utime; var all = data.rDays * 86400 + data.rHours * 3600 + data.rMinutes * 60 + data.rSeconds; calculateMetrics(utime, stime, ptime, all, onMetric); } else { logger.debug('unparsable ps data'); } }); } exports.cpuutil = function(onMetric) { var pid = process.pid; var metrics = process.cpuUsage(); if (metrics) { var utime = metrics.user / 1000000; var stime = metrics.system / 1000000; var ptime = utime + stime; var all = process.uptime(); calculateMetrics(utime, stime, ptime, all, onMetric); return; } if (platform === 'linux') { logger.debug('platform is linux'); proc.stat(pid, function(err, stat) { if (err) { return logger.error('ca-apm-probe failed to get cpu usage:', err); } //logger.debug('stat is\n', util.inspect(stat)); calculateMetrics(stat.utime, stat.stime, stat.ptime, stat.all, onMetric); }); } else if (platform === 'sunos' || platform === 'solaris') { logger.debug('platform is sunos/solaris'); proc.usage(pid, function(err, usage) { if (err) { return logger.error('ca-apm-probe failed to get cpu usage:', err); } var utime = usage.utime; var stime = usage.stime; var ptime = utime + stime; var all = usage.rtime; calculateMetrics(utime, stime, ptime, all, onMetric); }); } else if (platform === 'darwin') { logger.debug('platform is darwin'); parsePs('utime,time,etime', pid, onMetric); } else if (platform === 'freebsd') { logger.debug('platform is freebsd'); parsePs('usertime,time,etime', pid, onMetric); } else if (platform === 'win32') { logger.debug('platform is win32'); if (last.uptime) { var ps = require('child_process').exec('tasklist /v', function(err, stdout, stderr) { if (!err) { stdout.split('\n').forEach(function(item) { var items = item.split(/\s+/); var pid = parseInt(items[1]); if (pid === process.pid) { var times = items[8].split(/[:.]/); var hour = parseInt(times[0]); var mins = parseInt(times[1]); var secs = parseInt(times[2]); // Total CPU Time of Process in Seconds var active = hour * 3600 + mins * 60 + secs; // Total Uptime of Process in Seconds var uptime = process.uptime(); var uptimeDelta = uptime - last.uptime; var activeDelta = active - last.active; last.active = active; last.uptime = uptime; var usage = activeDelta / uptimeDelta; if (uptimeDelta > 0) { if (usage > 1) { // Spike Alert } reportMetrics(usage * 100, usage * 100, 0, onMetric); } } }) } }); ps.on('error', function(err) { logger.error('ca-apm-probe failed to get cpu usage:', err); }); } else { last.uptime = process.uptime(); } } // if win32 }