@cocalc/project
Version:
CoCalc: project daemon
375 lines (343 loc) • 12.6 kB
JavaScript
// Generated by CoffeeScript 2.5.1
(function() {
//########################################################################
// This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
// License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
//########################################################################
/*
Some code specific to running a project in the KuCalc environment.
*/
var PREFIX, PROJECT_ID, async, cgroup_stats, compute_status, compute_status_disk, compute_status_tmp, current_status, defaults, disk_usage, execSync, fs, get_bugs_total, misc, misc_node, path, processes_info, prom_client, session_id, start_ts, update_project_status;
fs = require('fs');
async = require('async');
misc = require('@cocalc/util/misc');
misc_node = require('@cocalc/backend/misc_node');
path = require('path');
({execSync} = require('child_process'));
({defaults} = misc = require('@cocalc/util/misc'));
({start_ts, session_id} = require('./consts'));
// global variable
PROJECT_ID = void 0;
PREFIX = 'cocalc_project_';
// Prometheus client setup -- https://github.com/siimon/prom-client
prom_client = require('prom-client');
// additionally, record GC statistics
// https://www.npmjs.com/package/prometheus-gc-stats
//# I'm commenting this out because the package prometheus-gc-stats
//# on npm very explicitly says it does not support prom-client
//# version 13, which is what we have installed everywhere. That
//# version is a significant breaking change from version 12, so
//# I'm also not comfortable reverting back. Harald I think force
//# upgraded prom-client to version 13 in this commit: b31e087ea2c640f494db15b652d9d0f86e7bd8a5
// require('prometheus-gc-stats')()()
// collect some recommended default metrics every 10 seconds
prom_client.collectDefaultMetrics({
timeout: 10 * 1000
});
// --- end prometheus setup
// This gets **changed** to true, if a certain
// command line flag is passed in.
exports.IN_KUCALC = false;
// status information
current_status = {};
exports.init = function(client) {
var f;
// update project status every 30s
// TODO: could switch to faster when it's changing and slower when it isn't.
f = function() {
return update_project_status(client);
};
f();
return setInterval(f, 30000);
};
update_project_status = function(client, cb) {
var dbg, status;
dbg = client.dbg("update_status");
dbg();
status = void 0;
return async.series([
function(cb) {
return compute_status(function(err,
s) {
status = s;
if (!err) {
current_status = s;
}
return cb(err);
});
},
function(cb) {
return client.query({
query: {
projects: {
project_id: client.client_id(),
status: status
}
},
cb: cb
});
}
], function(err) {
return typeof cb === "function" ? cb(err) : void 0;
});
};
exports.compute_status = compute_status = function(cb) {
var status;
status = {
time: (new Date()).getTime(),
memory: {
rss: 0
},
disk_MB: 0,
cpu: {},
start_ts: start_ts,
session_id: session_id,
processes: {}
};
return async.parallel([
function(cb) {
return compute_status_disk(status,
cb);
},
function(cb) {
return cgroup_stats(status,
cb);
},
function(cb) {
return processes_info(status,
cb);
},
function(cb) {
return compute_status_tmp(status,
cb);
}
], function(err) {
return cb(err, status);
});
};
compute_status_disk = function(status, cb) {
return disk_usage("$HOME", function(err, x) {
status.disk_MB = x;
return cb(err);
});
};
processes_info = function(status, cb) {
var cols;
cols = ['pid', 'lstart', 'time', 'rss', 'args'];
return misc_node.execute_code({
command: 'ps',
args: ['--no-header', '-o', cols.join(','), '-u', 'user'],
bash: false,
cb: function(err, out) {
var cnt, i, len, line, ref;
if (err || out.exit_code !== 0) {
return cb(err);
} else {
cnt = -1; // no need to account for the ps process itself!
ref = out.stdout.split('\n');
// TODO parsing anything out of ps is really hard :-(
// but we want to know how many sage, jupyter, console, etc. instances are running.
for (i = 0, len = ref.length; i < len; i++) {
line = ref[i];
if (line.length > 0) {
cnt += 1;
}
}
status.processes.count = cnt;
return cb();
}
}
});
};
// NOTE: we use tmpfs for /tmp, so RAM usage is the **sum** of /tmp and what
// processes use.
compute_status_tmp = function(status, cb) {
return disk_usage("/tmp", function(err, x) {
status.memory.rss += 1000 * x;
return cb(err);
});
};
// this grabs the memory stats directly from the sysfs cgroup files
// the actual usage is the sum of the rss values plus cache, but we leave cache aside
cgroup_stats = function(status, cb) {
return async.parallel({
memory: function(cb) {
return fs.readFile('/sys/fs/cgroup/memory/memory.stat', 'utf8', function(err, data) {
var i, key, len, line, ref, stats, value;
if (err) {
cb(err);
return;
}
stats = {};
ref = data.split('\n');
for (i = 0, len = ref.length; i < len; i++) {
line = ref[i];
[key, value] = line.split(' ');
try {
stats[key] = parseInt(value);
} catch (error) {}
}
return cb(null, stats);
});
},
cpu: function(cb) {
return fs.readFile('/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage', 'utf8', function(err, data) {
if (err) {
cb(err);
return;
}
try {
return cb(null, parseFloat(data) / Math.pow(10, 9));
} catch (error) {
return cb(null, 0.0);
}
});
},
oom: function(cb) {
return fs.readFile('/sys/fs/cgroup/memory/memory.oom_control', 'utf8', function(err, data) {
var i, len, line, ref;
if (err) {
cb(err);
return;
}
try {
ref = data.split('\n');
for (i = 0, len = ref.length; i < len; i++) {
line = ref[i];
// search string includes a trailing space, otherwise it matches 'oom_kill_disable'!
if (misc.startswith(line, 'oom_kill ')) {
cb(null, parseInt(line.split(' ')[1]));
return;
}
}
} catch (error) {}
return cb(null, 0);
});
}
}, function(err, res) {
var kib, ref, ref1, ref2;
kib = 1024; // convert to kibibyte
// total_rss includes total_rss_huge
// Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
status.memory.rss += ((ref = res.memory.total_rss) != null ? ref : 0) / kib;
status.memory.cache = ((ref1 = res.memory.total_cache) != null ? ref1 : 0) / kib;
status.memory.limit = ((ref2 = res.memory.hierarchical_memory_limit) != null ? ref2 : 0) / kib;
status.cpu.usage = res.cpu;
status.oom_kills = res.oom;
return cb();
});
};
disk_usage = function(path, cb) {
return misc_node.execute_code({
command: `df -BM ${path} | tail -1 | awk '{gsub(\"M\",\"\");print $3}'`,
bash: true,
cb: function(err, out) {
if (err) {
return cb(err);
} else {
return cb(void 0, parseInt(out.stdout));
}
}
});
};
// Every 60s, check if we can reach google's internal network -- in kucalc on GCE, this must be blocked.
// If we receive some information, exit with status code 99.
exports.init_gce_firewall_test = function(logger, interval_ms = 60 * 1000) { // temporarily disabled
var URI, test_firewall;
return;
if (!exports.IN_KUCALC) {
if (logger != null) {
logger.warn("not running firewall test -- not in kucalc");
}
return;
}
URI = 'http://metadata.google.internal/computeMetadata/v1/';
test_firewall = function() {
var request;
if (logger != null) {
logger.log("test_firewall");
}
request = require('request');
return request({
timeout: 3000,
headers: {
'Metadata-Flavor': 'Google'
},
uri: URI,
method: 'GET'
}, function(err, res, body) {
if ((err != null ? err.code : void 0) === 'ETIMEDOUT') {
return logger != null ? logger.log('test_firewall: timeout -> no action') : void 0;
} else {
if (logger != null) {
logger.warn('test_firewall', res);
}
if (logger != null) {
logger.warn('test_firewall', body);
}
if ((res != null) || (body != null)) {
if (logger != null) {
logger.warn('test_firewall: request went through and got a response -> exiting with code 99');
}
return process.exit(99);
} else {
return logger != null ? logger.warn('test_firewall: request went through with no response -> no action') : void 0;
}
}
});
};
test_firewall();
setInterval(test_firewall, interval_ms);
};
get_bugs_total = require('./bug-counter').default;
exports.prometheus_metrics = function(project_id) {
var labels, ref, ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9;
labels = `project_id=\"${project_id}\",session_id=\"${session_id}\"`;
return `# HELP cocalc_project_bugs_total The total number of caught bugs.
# TYPE cocalc_project_bugs_total counter
cocalc_project_bugs_total{${labels}} ${get_bugs_total()}
# HELP cocalc_project_start_time when the project/session started
# TYPE cocalc_project_start_time counter
cocalc_project_start_time{${labels}} ${start_ts}
# HELP cocalc_project_cpu_usage_seconds
# TYPE cocalc_project_cpu_usage_seconds counter
cocalc_project_cpu_usage_seconds{${labels}} ${(ref = (ref1 = current_status.cpu) != null ? ref1.usage : void 0) != null ? ref : 0.0}
# HELP cocalc_project_disk_usage_mb
# TYPE cocalc_project_disk_usage_mb gauge
cocalc_project_disk_usage_mb{${labels}} ${(ref2 = current_status.disk_MB) != null ? ref2 : 0.0}
# HELP cocalc_project_memory_usage_ki
# TYPE cocalc_project_memory_usage_ki gauge
cocalc_project_memory_usage_ki{${labels}} ${(ref3 = (ref4 = current_status.memory) != null ? ref4.rss : void 0) != null ? ref3 : 0.0}
# HELP cocalc_project_memory_limit_ki
# TYPE cocalc_project_memory_limit_ki gauge
cocalc_project_memory_limit_ki{${labels}} ${(ref5 = (ref6 = current_status.memory) != null ? ref6.limit : void 0) != null ? ref5 : 0.0}
# HELP cocalc_project_running_processes_total
# TYPE cocalc_project_running_processes_total gauge
cocalc_project_running_processes_total{${labels}} ${(ref7 = (ref8 = current_status.processes) != null ? ref8.count : void 0) != null ? ref7 : 0}
# HELP cocalc_project_oom_kills_total
# TYPE cocalc_project_oom_kills_total counter
cocalc_project_oom_kills_total{${labels}} ${(ref9 = current_status.oom_kills) != null ? ref9 : 0}` + '\n'; // makes sure the response ends with a newline!
};
// called inside raw_server
exports.init_health_metrics = function(raw_server, project_id) {
if (!exports.IN_KUCALC) {
return;
}
PROJECT_ID = project_id;
// Setup health and metrics (no url base prefix needed)
raw_server.use('/health', function(req, res) {
res.setHeader("Content-Type", "text/plain");
res.setHeader('Cache-Control', 'no-cache, no-store');
return res.send('OK');
});
// prometheus text format -- https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details
return raw_server.use('/metrics', async function(req, res) {
var part1;
res.setHeader("Content-Type", "text/plain; version=0.0.4");
res.header('Cache-Control', 'no-cache, no-store');
part1 = exports.prometheus_metrics(project_id);
return res.send(part1 + '\n' + ((await prom_client.register.metrics())) + '\n');
});
};
}).call(this);
//# sourceMappingURL=kucalc.js.map