@cocalc/project
Version:
CoCalc: project daemon
290 lines (288 loc) • 10.7 kB
JavaScript
"use strict";
/*
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
* License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.get_ProjectStatusServer = exports.ProjectStatusServer = void 0;
/*
Project status server, doing the heavy lifting of telling the client
what's going on in the project, especially if there is a problem.
Under the hood, it subscribes to the ProjectInfoServer, which updates
various statistics at a high-frequency. Therefore, this here filters
that information to a low-frequency low-volume stream of important
status updates.
Hence in particular, information like cpu, memory and disk are smoothed out and throttled.
*/
const logger_1 = require("@cocalc/project/logger");
const misc_1 = require("@cocalc/util/misc");
const smc_version_1 = require("@cocalc/util/smc-version");
const awaiting_1 = require("awaiting");
const events_1 = require("events");
const lodash_1 = require("lodash");
const project_info_1 = require("../project-info");
const const_1 = require("./const");
const utils_1 = require("./utils");
// TODO: only return the "next" value, if it is significantly different from "prev"
//function threshold(prev?: number, next?: number): number | undefined {
// return next;
//}
const winston = (0, logger_1.getLogger)("ProjectStatusServer");
function quantize(val, order) {
const q = Math.round(Math.pow(10, order));
return Math.round(q * Math.ceil(val / q));
}
class ProjectStatusServer extends events_1.EventEmitter {
constructor(testing = false) {
super();
this.running = false;
this.elevated = {
cpu: null,
disk: null,
memory: null,
};
this.elevated_cpu_procs = {};
this.components = {};
this.lastEmit = 0; // timestamp, when status was emitted last
this.testing = testing;
this.dbg = (...msg) => winston.debug(...msg);
this.project_info = (0, project_info_1.get_ProjectInfoServer)();
}
async init() {
this.project_info.start();
this.project_info.on("info", (info) => {
//this.dbg(`got info timestamp=${info.timestamp}`);
this.info = info;
this.update();
this.emitInfo();
});
}
// checks if there the current state (after update()) should be emitted
emitInfo() {
if (this.lastEmit === 0) {
this.dbg("emitInfo[last=0]", this.status);
this.doEmit();
return;
}
// if alert changed, emit immediately
if (!(0, lodash_1.isEqual)(this.last?.alerts, this.status?.alerts)) {
this.dbg("emitInfo[alert]", this.status);
this.doEmit();
}
else {
// deep comparison check via lodash and we rate limit
const recent = this.lastEmit + 1000 * const_1.STATUS_UPDATES_INTERVAL_S > Date.now();
const changed = !(0, lodash_1.isEqual)(this.status, this.last);
if (!recent && changed) {
this.dbg("emitInfo[changed]", this.status);
this.doEmit();
}
}
}
doEmit() {
this.emit("status", this.status);
this.lastEmit = Date.now();
}
setComponentAlert(name) {
// we set this to the time when we first got notified about the problem
if (this.components[name] == null) {
this.components[name] = Date.now();
}
}
clearComponentAlert(name) {
delete this.components[name];
}
// this derives elevated levels from the project info object
update_alerts() {
if (this.info == null)
return;
const du = this.info.disk_usage.project;
const ts = this.info.timestamp;
const do_alert = (type, is_bad) => {
if (is_bad) {
// if it isn't fine, set it once to the timestamp (and let it age)
if (this.elevated[type] == null) {
this.elevated[type] = ts;
}
}
else {
// unless it's fine again, then remove the timestamp
this.elevated[type] = null;
}
};
do_alert("disk", du.free < const_1.ALERT_DISK_FREE);
this.disk_mb = du.usage;
const cg = this.info.cgroup;
const du_tmp = this.info.disk_usage.tmp;
if (cg != null) {
// we round/quantisize values to reduce the number of updates
// and also send less data with each update
const cgStats = (0, utils_1.cgroup_stats)(cg, du_tmp);
this.mem_pct = Math.round(cgStats.mem_pct);
this.cpu_pct = Math.round(cgStats.cpu_pct);
this.cpu_tot = Math.round(cgStats.cpu_tot);
this.mem_tot = quantize(cgStats.mem_tot, 1);
this.mem_rss = quantize(cgStats.mem_rss, 1);
do_alert("memory", cgStats.mem_pct > const_1.ALERT_HIGH_PCT);
do_alert("cpu-cgroup", cgStats.cpu_pct > const_1.ALERT_HIGH_PCT);
}
}
alert_cpu_processes() {
const pids = [];
if (this.info == null)
return [];
const ts = this.info.timestamp;
const ecp = this.elevated_cpu_procs;
// we have to check if there aren't any processes left which no longer exist
const leftovers = new Set(Object.keys(ecp));
// bookkeeping of elevated process PIDS
for (const [pid, proc] of Object.entries(this.info.processes ?? {})) {
leftovers.delete(pid);
if (proc.cpu.pct > const_1.ALERT_HIGH_PCT) {
if (ecp[pid] == null) {
ecp[pid] = ts;
}
}
else {
delete ecp[pid];
}
}
for (const pid of leftovers) {
delete ecp[pid];
}
// to actually fire alert when necessary
for (const [pid, ts] of Object.entries(ecp)) {
if (ts != null && (0, misc_1.how_long_ago_m)(ts) > const_1.RAISE_ALERT_AFTER_MIN) {
pids.push(pid);
}
}
pids.sort(); // to make this stable across iterations
//this.dbg("alert_cpu_processes", pids, ecp);
return pids;
}
// update alert levels and set alert states if they persist to be active
alerts() {
this.update_alerts();
const alerts = [];
const alert_keys = ["cpu-cgroup", "disk", "memory"];
for (const k of alert_keys) {
const ts = this.elevated[k];
if (ts != null && (0, misc_1.how_long_ago_m)(ts) > const_1.RAISE_ALERT_AFTER_MIN) {
alerts.push({ type: k });
}
}
const pids = this.alert_cpu_processes();
if (pids.length > 0)
alerts.push({ type: "cpu-process", pids });
const componentNames = [];
for (const [k, ts] of Object.entries(this.components)) {
if (ts == null)
continue;
// we alert without a delay
componentNames.push(k);
}
// only send any alert if there is actually a problem!
if (componentNames.length > 0) {
alerts.push({ type: "component", names: componentNames });
}
return alerts;
}
fake_data() {
const lastUsage = this.last?.["usage"];
const next = (key, max) => {
const last = lastUsage?.[key] ?? max / 2;
const dx = max / 50;
const val = last + dx * Math.random() - dx / 2;
return Math.round(Math.min(max, Math.max(0, val)));
};
const mem_tot = 3000;
const mem_pct = next("mem_pct", 100);
const mem_rss = Math.round((mem_tot * mem_pct) / 100);
const cpu_tot = (0, misc_1.round1)((lastUsage?.["cpu_tot"] ?? 0) + Math.random() / 10);
return {
disk_mb: next("disk", 3000),
mem_tot,
mem_pct,
cpu_pct: next("cpu_pct", 100),
cpu_tot,
mem_rss,
};
}
// this function takes the "info" we have (+ more maybe?)
// and derives various states from it.
// It shouldn't really matter how often it is being called,
// but still only emit new objects if it is either really necessary (new alert)
// or after some time. This must be a low-frequency and low-volume stream of data.
update() {
this.last = this.status;
// alerts must come first, it updates usage status fields
const alerts = this.alerts();
// set this to true if you're developing (otherwise you don't get any data)
const fake_data = false;
// collect status fields in usage object
const usage = fake_data
? this.fake_data()
: {
disk_mb: this.disk_mb,
mem_pct: this.mem_pct,
cpu_pct: this.cpu_pct,
cpu_tot: this.cpu_tot,
mem_rss: this.mem_rss,
mem_tot: this.mem_tot,
};
this.status = { alerts, usage, version: smc_version_1.version };
}
async get_status() {
this.update();
return this.status;
}
stop() {
this.running = false;
}
async start() {
if (this.running) {
this.dbg("project-status/server: already running, cannot be started twice");
}
else {
await this._start();
}
}
async _start() {
this.dbg("start");
if (this.running) {
throw Error("Cannot start ProjectStatusServer twice");
}
this.running = true;
await this.init();
const status = await this.get_status();
this.emit("status", status);
while (this.testing) {
await (0, awaiting_1.delay)(5000);
const status = await this.get_status();
this.emit("status", status);
}
}
}
exports.ProjectStatusServer = ProjectStatusServer;
// singleton, we instantiate it when we need it
let _status = undefined;
function get_ProjectStatusServer() {
if (_status != null)
return _status;
_status = new ProjectStatusServer();
return _status;
}
exports.get_ProjectStatusServer = get_ProjectStatusServer;
// testing: $ ts-node server.ts
if (require.main === module) {
const pss = new ProjectStatusServer(true);
pss.start();
let cnt = 0;
pss.on("status", (status) => {
console.log(JSON.stringify(status, null, 2));
cnt += 1;
if (cnt >= 2)
process.exit();
});
}
//# sourceMappingURL=server.js.map