UNPKG

@cocalc/project

Version:
375 lines (343 loc) 12.6 kB
// Generated by CoffeeScript 2.5.1 (function() { //######################################################################## // This file is part of CoCalc: Copyright © 2020 Sagemath, Inc. // License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details //######################################################################## /* Some code specific to running a project in the KuCalc environment. */ var PREFIX, PROJECT_ID, async, cgroup_stats, compute_status, compute_status_disk, compute_status_tmp, current_status, defaults, disk_usage, execSync, fs, get_bugs_total, misc, misc_node, path, processes_info, prom_client, session_id, start_ts, update_project_status; fs = require('fs'); async = require('async'); misc = require('@cocalc/util/misc'); misc_node = require('@cocalc/backend/misc_node'); path = require('path'); ({execSync} = require('child_process')); ({defaults} = misc = require('@cocalc/util/misc')); ({start_ts, session_id} = require('./consts')); // global variable PROJECT_ID = void 0; PREFIX = 'cocalc_project_'; // Prometheus client setup -- https://github.com/siimon/prom-client prom_client = require('prom-client'); // additionally, record GC statistics // https://www.npmjs.com/package/prometheus-gc-stats //# I'm commenting this out because the package prometheus-gc-stats //# on npm very explicitly says it does not support prom-client //# version 13, which is what we have installed everywhere. That //# version is a significant breaking change from version 12, so //# I'm also not comfortable reverting back. Harald I think force //# upgraded prom-client to version 13 in this commit: b31e087ea2c640f494db15b652d9d0f86e7bd8a5 // require('prometheus-gc-stats')()() // collect some recommended default metrics every 10 seconds prom_client.collectDefaultMetrics({ timeout: 10 * 1000 }); // --- end prometheus setup // This gets **changed** to true, if a certain // command line flag is passed in. exports.IN_KUCALC = false; // status information current_status = {}; exports.init = function(client) { var f; // update project status every 30s // TODO: could switch to faster when it's changing and slower when it isn't. f = function() { return update_project_status(client); }; f(); return setInterval(f, 30000); }; update_project_status = function(client, cb) { var dbg, status; dbg = client.dbg("update_status"); dbg(); status = void 0; return async.series([ function(cb) { return compute_status(function(err, s) { status = s; if (!err) { current_status = s; } return cb(err); }); }, function(cb) { return client.query({ query: { projects: { project_id: client.client_id(), status: status } }, cb: cb }); } ], function(err) { return typeof cb === "function" ? cb(err) : void 0; }); }; exports.compute_status = compute_status = function(cb) { var status; status = { time: (new Date()).getTime(), memory: { rss: 0 }, disk_MB: 0, cpu: {}, start_ts: start_ts, session_id: session_id, processes: {} }; return async.parallel([ function(cb) { return compute_status_disk(status, cb); }, function(cb) { return cgroup_stats(status, cb); }, function(cb) { return processes_info(status, cb); }, function(cb) { return compute_status_tmp(status, cb); } ], function(err) { return cb(err, status); }); }; compute_status_disk = function(status, cb) { return disk_usage("$HOME", function(err, x) { status.disk_MB = x; return cb(err); }); }; processes_info = function(status, cb) { var cols; cols = ['pid', 'lstart', 'time', 'rss', 'args']; return misc_node.execute_code({ command: 'ps', args: ['--no-header', '-o', cols.join(','), '-u', 'user'], bash: false, cb: function(err, out) { var cnt, i, len, line, ref; if (err || out.exit_code !== 0) { return cb(err); } else { cnt = -1; // no need to account for the ps process itself! ref = out.stdout.split('\n'); // TODO parsing anything out of ps is really hard :-( // but we want to know how many sage, jupyter, console, etc. instances are running. for (i = 0, len = ref.length; i < len; i++) { line = ref[i]; if (line.length > 0) { cnt += 1; } } status.processes.count = cnt; return cb(); } } }); }; // NOTE: we use tmpfs for /tmp, so RAM usage is the **sum** of /tmp and what // processes use. compute_status_tmp = function(status, cb) { return disk_usage("/tmp", function(err, x) { status.memory.rss += 1000 * x; return cb(err); }); }; // this grabs the memory stats directly from the sysfs cgroup files // the actual usage is the sum of the rss values plus cache, but we leave cache aside cgroup_stats = function(status, cb) { return async.parallel({ memory: function(cb) { return fs.readFile('/sys/fs/cgroup/memory/memory.stat', 'utf8', function(err, data) { var i, key, len, line, ref, stats, value; if (err) { cb(err); return; } stats = {}; ref = data.split('\n'); for (i = 0, len = ref.length; i < len; i++) { line = ref[i]; [key, value] = line.split(' '); try { stats[key] = parseInt(value); } catch (error) {} } return cb(null, stats); }); }, cpu: function(cb) { return fs.readFile('/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage', 'utf8', function(err, data) { if (err) { cb(err); return; } try { return cb(null, parseFloat(data) / Math.pow(10, 9)); } catch (error) { return cb(null, 0.0); } }); }, oom: function(cb) { return fs.readFile('/sys/fs/cgroup/memory/memory.oom_control', 'utf8', function(err, data) { var i, len, line, ref; if (err) { cb(err); return; } try { ref = data.split('\n'); for (i = 0, len = ref.length; i < len; i++) { line = ref[i]; // search string includes a trailing space, otherwise it matches 'oom_kill_disable'! if (misc.startswith(line, 'oom_kill ')) { cb(null, parseInt(line.split(' ')[1])); return; } } } catch (error) {} return cb(null, 0); }); } }, function(err, res) { var kib, ref, ref1, ref2; kib = 1024; // convert to kibibyte // total_rss includes total_rss_huge // Ref: https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt status.memory.rss += ((ref = res.memory.total_rss) != null ? ref : 0) / kib; status.memory.cache = ((ref1 = res.memory.total_cache) != null ? ref1 : 0) / kib; status.memory.limit = ((ref2 = res.memory.hierarchical_memory_limit) != null ? ref2 : 0) / kib; status.cpu.usage = res.cpu; status.oom_kills = res.oom; return cb(); }); }; disk_usage = function(path, cb) { return misc_node.execute_code({ command: `df -BM ${path} | tail -1 | awk '{gsub(\"M\",\"\");print $3}'`, bash: true, cb: function(err, out) { if (err) { return cb(err); } else { return cb(void 0, parseInt(out.stdout)); } } }); }; // Every 60s, check if we can reach google's internal network -- in kucalc on GCE, this must be blocked. // If we receive some information, exit with status code 99. exports.init_gce_firewall_test = function(logger, interval_ms = 60 * 1000) { // temporarily disabled var URI, test_firewall; return; if (!exports.IN_KUCALC) { if (logger != null) { logger.warn("not running firewall test -- not in kucalc"); } return; } URI = 'http://metadata.google.internal/computeMetadata/v1/'; test_firewall = function() { var request; if (logger != null) { logger.log("test_firewall"); } request = require('request'); return request({ timeout: 3000, headers: { 'Metadata-Flavor': 'Google' }, uri: URI, method: 'GET' }, function(err, res, body) { if ((err != null ? err.code : void 0) === 'ETIMEDOUT') { return logger != null ? logger.log('test_firewall: timeout -> no action') : void 0; } else { if (logger != null) { logger.warn('test_firewall', res); } if (logger != null) { logger.warn('test_firewall', body); } if ((res != null) || (body != null)) { if (logger != null) { logger.warn('test_firewall: request went through and got a response -> exiting with code 99'); } return process.exit(99); } else { return logger != null ? logger.warn('test_firewall: request went through with no response -> no action') : void 0; } } }); }; test_firewall(); setInterval(test_firewall, interval_ms); }; get_bugs_total = require('./bug-counter').default; exports.prometheus_metrics = function(project_id) { var labels, ref, ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9; labels = `project_id=\"${project_id}\",session_id=\"${session_id}\"`; return `# HELP cocalc_project_bugs_total The total number of caught bugs. # TYPE cocalc_project_bugs_total counter cocalc_project_bugs_total{${labels}} ${get_bugs_total()} # HELP cocalc_project_start_time when the project/session started # TYPE cocalc_project_start_time counter cocalc_project_start_time{${labels}} ${start_ts} # HELP cocalc_project_cpu_usage_seconds # TYPE cocalc_project_cpu_usage_seconds counter cocalc_project_cpu_usage_seconds{${labels}} ${(ref = (ref1 = current_status.cpu) != null ? ref1.usage : void 0) != null ? ref : 0.0} # HELP cocalc_project_disk_usage_mb # TYPE cocalc_project_disk_usage_mb gauge cocalc_project_disk_usage_mb{${labels}} ${(ref2 = current_status.disk_MB) != null ? ref2 : 0.0} # HELP cocalc_project_memory_usage_ki # TYPE cocalc_project_memory_usage_ki gauge cocalc_project_memory_usage_ki{${labels}} ${(ref3 = (ref4 = current_status.memory) != null ? ref4.rss : void 0) != null ? ref3 : 0.0} # HELP cocalc_project_memory_limit_ki # TYPE cocalc_project_memory_limit_ki gauge cocalc_project_memory_limit_ki{${labels}} ${(ref5 = (ref6 = current_status.memory) != null ? ref6.limit : void 0) != null ? ref5 : 0.0} # HELP cocalc_project_running_processes_total # TYPE cocalc_project_running_processes_total gauge cocalc_project_running_processes_total{${labels}} ${(ref7 = (ref8 = current_status.processes) != null ? ref8.count : void 0) != null ? ref7 : 0} # HELP cocalc_project_oom_kills_total # TYPE cocalc_project_oom_kills_total counter cocalc_project_oom_kills_total{${labels}} ${(ref9 = current_status.oom_kills) != null ? ref9 : 0}` + '\n'; // makes sure the response ends with a newline! }; // called inside raw_server exports.init_health_metrics = function(raw_server, project_id) { if (!exports.IN_KUCALC) { return; } PROJECT_ID = project_id; // Setup health and metrics (no url base prefix needed) raw_server.use('/health', function(req, res) { res.setHeader("Content-Type", "text/plain"); res.setHeader('Cache-Control', 'no-cache, no-store'); return res.send('OK'); }); // prometheus text format -- https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details return raw_server.use('/metrics', async function(req, res) { var part1; res.setHeader("Content-Type", "text/plain; version=0.0.4"); res.header('Cache-Control', 'no-cache, no-store'); part1 = exports.prometheus_metrics(project_id); return res.send(part1 + '\n' + ((await prom_client.register.metrics())) + '\n'); }); }; }).call(this); //# sourceMappingURL=kucalc.js.map