@cocalc/hub
Version:
CoCalc: Backend webserver component
261 lines (229 loc) • 9.67 kB
JavaScript
// Generated by CoffeeScript 2.5.1
(function() {
//########################################################################
// This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
// License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
//########################################################################
// This is a small helper class to record real-time metrics about the hub.
// It is designed for the hub, such that a local process can easily check its health.
// After an initial version, this has been repurposed to use prometheus.
// It wraps its client elements and adds some instrumentation to some hub components.
/*
* there is more than just continuous values
* cont: continuous (like number of changefeeds), will be smoothed
* disc: discrete, like blocked, will be recorded with timestamp
* in a queue of length DISC_LEN
exports.TYPE = TYPE =
COUNT: 'counter' # strictly non-decrasing integer
GAUGE: 'gauge' # only the most recent value is recorded
LAST : 'latest' # only the most recent value is recorded
DISC : 'discrete' # timeseries of length DISC_LEN
CONT : 'continuous' # continuous with exponential decay
MAX : 'contmax' # like CONT, reduces buffer to max value
SUM : 'contsum' # like CONT, reduces buffer to sum of values divided by FREQ_s
*/
var CLK_TCK, DELAY_s, FREQ_s, MetricsRecorder, PREFIX, defaults, err, execSync, fs, metricsRecorder, misc, new_counter, new_gauge, new_histogram, new_quantile, path, prom_client, underscore;
fs = require('fs');
path = require('path');
underscore = require('underscore');
({execSync} = require('child_process'));
({defaults} = misc = require('@cocalc/util/misc'));
// Prometheus client setup -- https://github.com/siimon/prom-client
prom_client = require('prom-client');
// some constants
FREQ_s = 5; // update stats every FREQ seconds
DELAY_s = 10; // with an initial delay of DELAY seconds
// collect some recommended default metrics
prom_client.collectDefaultMetrics({
timeout: FREQ_s * 1000
});
try {
// CLK_TCK (usually 100, but maybe not ...)
CLK_TCK = parseInt(execSync('getconf CLK_TCK', {
encoding: 'utf8'
}));
} catch (error) {
err = error;
CLK_TCK = null;
}
PREFIX = 'cocalc_hub_';
exports.new_counter = new_counter = function(name, help, labels) {
// a prometheus counter -- https://github.com/siimon/prom-client#counter
// use it like counter.labels(labelA, labelB).inc([positive number or default is 1])
if (!name.endsWith('_total')) {
throw `Counter metric names have to end in [_unit]_total but I got '${name}' -- https://prometheus.io/docs/practices/naming/`;
}
return new prom_client.Counter({
name: PREFIX + name,
help: help,
labelNames: labels != null ? labels : []
});
};
exports.new_gauge = new_gauge = function(name, help, labels) {
// a prometheus gauge -- https://github.com/siimon/prom-client#gauge
// basically, use it like gauge.labels(labelA, labelB).set(value)
return new prom_client.Gauge({
name: PREFIX + name,
help: help,
labelNames: labels != null ? labels : []
});
};
exports.new_quantile = new_quantile = function(name, help, config = {}) {
// invoked as quantile.observe(value)
config = defaults(config, {
// a few more than the default, in particular including the actual min and max
percentiles: [0.0, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999, 1.0],
labels: []
});
return new prom_client.Summary({
name: PREFIX + name,
help: help,
labelNames: config.labels,
percentiles: config.percentiles
});
};
exports.new_histogram = new_histogram = function(name, help, config = {}) {
// invoked as histogram.observe(value)
config = defaults(config, {
buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
labels: []
});
return new prom_client.Histogram({
name: PREFIX + name,
help: help,
labelNames: config.labels,
buckets: config.buckets
});
};
// This is modified by the Client class (in client.coffee) when metrics
// get pushed from browsers. It's a map from client_id to
// an array of metrics objects, which are already labeled with extra
// information about the client_id and account_id.
exports.client_metrics = {};
MetricsRecorder = class MetricsRecorder {
constructor(dbg1, cb) {
this.client_metrics = this.client_metrics.bind(this);
this.metrics = this.metrics.bind(this);
this.register_collector = this.register_collector.bind(this);
this.setup_monitoring = this.setup_monitoring.bind(this);
this._collect = this._collect.bind(this);
this.dbg = dbg1;
/*
* @dbg: reporting via winston, instance with configuration passed in from hub.coffee
*/
// stores the current state of the statistics
this._stats = {};
this._types = {}; // key → TYPE.T mapping
// the full statistic
this._data = {};
this._collectors = [];
// initialization finished
this.setup_monitoring();
if (typeof cb === "function") {
cb(void 0, this);
}
}
async client_metrics() {
/*
exports.client_metrics is a mapping of client id to the json exported metric.
The AggregatorRegistry is supposed to work with a list of metrics, and by default,
it sums them up. `aggregate` is a static method and hence it should be ok to use it directly.
*/
var _, m, metrics, registry;
metrics = (function() {
var ref, results;
ref = exports.client_metrics;
results = [];
for (_ in ref) {
m = ref[_];
results.push(m);
}
return results;
})();
registry = prom_client.AggregatorRegistry.aggregate(metrics);
return (await registry.metrics());
}
async metrics() {
/*
get a serialized representation of the metrics status
(was a dict that should be JSON, now it is for prometheus)
it's only called by the HTTP stuff in servers for the /metrics endpoint
*/
var clients, hub;
hub = (await prom_client.register.metrics());
clients = (await this.client_metrics());
return hub + clients;
}
register_collector(collector) {
// The added collector functions will be evaluated periodically to gather metrics
return this._collectors.push(collector);
}
setup_monitoring() {
var num_clients_gauge, number_of_clients;
// setup monitoring of some components
// called by the hub *after* setting up the DB, etc.
num_clients_gauge = new_gauge('clients_count', 'Number of connected clients');
({number_of_clients} = require('./hub_register'));
this.register_collector(function() {
try {
return num_clients_gauge.set(number_of_clients());
} catch (error) {
return num_clients_gauge.set(0);
}
});
// our own CPU metrics monitor, separating user and sys!
// it's actually a counter, since it is non-decreasing, but we'll use .set(...)
this._cpu_seconds_total = new_gauge('process_cpu_categorized_seconds_total', 'Total number of CPU seconds used', ['type']);
this._collect_duration = new_histogram('metrics_collect_duration_s', 'How long it took to gather the metrics', {
buckets: [0.0001, 0.001, 0.01, 1]
});
this._collect_duration_last = new_gauge('metrics_collect_duration_s_last', 'How long it took the last time to gather the metrics');
// init periodically calling @_collect
return setTimeout((() => {
return setInterval(this._collect, FREQ_s * 1000);
}), DELAY_s * 1000);
}
_collect() {
var c, endG, endH, i, len, ref;
endG = this._collect_duration_last.startTimer();
endH = this._collect_duration.startTimer();
ref = this._collectors;
// called by @_update to evaluate the collector functions
//@dbg('_collect called')
for (i = 0, len = ref.length; i < len; i++) {
c = ref[i];
c();
}
// linux specific: collecting this process and all its children sys+user times
// http://man7.org/linux/man-pages/man5/proc.5.html
return fs.readFile(path.join('/proc', '' + process.pid, 'stat'), 'utf8', (err, infos) => {
if (err || (CLK_TCK == null)) {
this.dbg(`_collect err: ${err}`);
return;
}
// there might be spaces in the process name, hence split after the closing bracket!
infos = infos.slice(infos.lastIndexOf(')') + 2).split(' ');
this._cpu_seconds_total.labels('user').set(parseFloat(infos[11]) / CLK_TCK);
this._cpu_seconds_total.labels('system').set(parseFloat(infos[12]) / CLK_TCK);
// time spent waiting on child processes
this._cpu_seconds_total.labels('chld_user').set(parseFloat(infos[13]) / CLK_TCK);
this._cpu_seconds_total.labels('chld_system').set(parseFloat(infos[14]) / CLK_TCK);
// END: the timings for this run.
endG();
return endH();
});
}
};
metricsRecorder = null;
exports.init = function(winston, cb) {
var dbg;
dbg = function(msg) {
return winston.info(`MetricsRecorder: ${msg}`);
};
return metricsRecorder = new MetricsRecorder(dbg, cb);
};
exports.get = function() {
return metricsRecorder;
};
}).call(this);
//# sourceMappingURL=metrics-recorder.js.map