UNPKG

@cocalc/project

Version:
290 lines (288 loc) 10.7 kB
"use strict"; /* * This file is part of CoCalc: Copyright © 2020 Sagemath, Inc. * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ Object.defineProperty(exports, "__esModule", { value: true }); exports.get_ProjectStatusServer = exports.ProjectStatusServer = void 0; /* Project status server, doing the heavy lifting of telling the client what's going on in the project, especially if there is a problem. Under the hood, it subscribes to the ProjectInfoServer, which updates various statistics at a high-frequency. Therefore, this here filters that information to a low-frequency low-volume stream of important status updates. Hence in particular, information like cpu, memory and disk are smoothed out and throttled. */ const logger_1 = require("@cocalc/project/logger"); const misc_1 = require("@cocalc/util/misc"); const smc_version_1 = require("@cocalc/util/smc-version"); const awaiting_1 = require("awaiting"); const events_1 = require("events"); const lodash_1 = require("lodash"); const project_info_1 = require("../project-info"); const const_1 = require("./const"); const utils_1 = require("./utils"); // TODO: only return the "next" value, if it is significantly different from "prev" //function threshold(prev?: number, next?: number): number | undefined { // return next; //} const winston = (0, logger_1.getLogger)("ProjectStatusServer"); function quantize(val, order) { const q = Math.round(Math.pow(10, order)); return Math.round(q * Math.ceil(val / q)); } class ProjectStatusServer extends events_1.EventEmitter { constructor(testing = false) { super(); this.running = false; this.elevated = { cpu: null, disk: null, memory: null, }; this.elevated_cpu_procs = {}; this.components = {}; this.lastEmit = 0; // timestamp, when status was emitted last this.testing = testing; this.dbg = (...msg) => winston.debug(...msg); this.project_info = (0, project_info_1.get_ProjectInfoServer)(); } async init() { this.project_info.start(); this.project_info.on("info", (info) => { //this.dbg(`got info timestamp=${info.timestamp}`); this.info = info; this.update(); this.emitInfo(); }); } // checks if there the current state (after update()) should be emitted emitInfo() { if (this.lastEmit === 0) { this.dbg("emitInfo[last=0]", this.status); this.doEmit(); return; } // if alert changed, emit immediately if (!(0, lodash_1.isEqual)(this.last?.alerts, this.status?.alerts)) { this.dbg("emitInfo[alert]", this.status); this.doEmit(); } else { // deep comparison check via lodash and we rate limit const recent = this.lastEmit + 1000 * const_1.STATUS_UPDATES_INTERVAL_S > Date.now(); const changed = !(0, lodash_1.isEqual)(this.status, this.last); if (!recent && changed) { this.dbg("emitInfo[changed]", this.status); this.doEmit(); } } } doEmit() { this.emit("status", this.status); this.lastEmit = Date.now(); } setComponentAlert(name) { // we set this to the time when we first got notified about the problem if (this.components[name] == null) { this.components[name] = Date.now(); } } clearComponentAlert(name) { delete this.components[name]; } // this derives elevated levels from the project info object update_alerts() { if (this.info == null) return; const du = this.info.disk_usage.project; const ts = this.info.timestamp; const do_alert = (type, is_bad) => { if (is_bad) { // if it isn't fine, set it once to the timestamp (and let it age) if (this.elevated[type] == null) { this.elevated[type] = ts; } } else { // unless it's fine again, then remove the timestamp this.elevated[type] = null; } }; do_alert("disk", du.free < const_1.ALERT_DISK_FREE); this.disk_mb = du.usage; const cg = this.info.cgroup; const du_tmp = this.info.disk_usage.tmp; if (cg != null) { // we round/quantisize values to reduce the number of updates // and also send less data with each update const cgStats = (0, utils_1.cgroup_stats)(cg, du_tmp); this.mem_pct = Math.round(cgStats.mem_pct); this.cpu_pct = Math.round(cgStats.cpu_pct); this.cpu_tot = Math.round(cgStats.cpu_tot); this.mem_tot = quantize(cgStats.mem_tot, 1); this.mem_rss = quantize(cgStats.mem_rss, 1); do_alert("memory", cgStats.mem_pct > const_1.ALERT_HIGH_PCT); do_alert("cpu-cgroup", cgStats.cpu_pct > const_1.ALERT_HIGH_PCT); } } alert_cpu_processes() { const pids = []; if (this.info == null) return []; const ts = this.info.timestamp; const ecp = this.elevated_cpu_procs; // we have to check if there aren't any processes left which no longer exist const leftovers = new Set(Object.keys(ecp)); // bookkeeping of elevated process PIDS for (const [pid, proc] of Object.entries(this.info.processes ?? {})) { leftovers.delete(pid); if (proc.cpu.pct > const_1.ALERT_HIGH_PCT) { if (ecp[pid] == null) { ecp[pid] = ts; } } else { delete ecp[pid]; } } for (const pid of leftovers) { delete ecp[pid]; } // to actually fire alert when necessary for (const [pid, ts] of Object.entries(ecp)) { if (ts != null && (0, misc_1.how_long_ago_m)(ts) > const_1.RAISE_ALERT_AFTER_MIN) { pids.push(pid); } } pids.sort(); // to make this stable across iterations //this.dbg("alert_cpu_processes", pids, ecp); return pids; } // update alert levels and set alert states if they persist to be active alerts() { this.update_alerts(); const alerts = []; const alert_keys = ["cpu-cgroup", "disk", "memory"]; for (const k of alert_keys) { const ts = this.elevated[k]; if (ts != null && (0, misc_1.how_long_ago_m)(ts) > const_1.RAISE_ALERT_AFTER_MIN) { alerts.push({ type: k }); } } const pids = this.alert_cpu_processes(); if (pids.length > 0) alerts.push({ type: "cpu-process", pids }); const componentNames = []; for (const [k, ts] of Object.entries(this.components)) { if (ts == null) continue; // we alert without a delay componentNames.push(k); } // only send any alert if there is actually a problem! if (componentNames.length > 0) { alerts.push({ type: "component", names: componentNames }); } return alerts; } fake_data() { const lastUsage = this.last?.["usage"]; const next = (key, max) => { const last = lastUsage?.[key] ?? max / 2; const dx = max / 50; const val = last + dx * Math.random() - dx / 2; return Math.round(Math.min(max, Math.max(0, val))); }; const mem_tot = 3000; const mem_pct = next("mem_pct", 100); const mem_rss = Math.round((mem_tot * mem_pct) / 100); const cpu_tot = (0, misc_1.round1)((lastUsage?.["cpu_tot"] ?? 0) + Math.random() / 10); return { disk_mb: next("disk", 3000), mem_tot, mem_pct, cpu_pct: next("cpu_pct", 100), cpu_tot, mem_rss, }; } // this function takes the "info" we have (+ more maybe?) // and derives various states from it. // It shouldn't really matter how often it is being called, // but still only emit new objects if it is either really necessary (new alert) // or after some time. This must be a low-frequency and low-volume stream of data. update() { this.last = this.status; // alerts must come first, it updates usage status fields const alerts = this.alerts(); // set this to true if you're developing (otherwise you don't get any data) const fake_data = false; // collect status fields in usage object const usage = fake_data ? this.fake_data() : { disk_mb: this.disk_mb, mem_pct: this.mem_pct, cpu_pct: this.cpu_pct, cpu_tot: this.cpu_tot, mem_rss: this.mem_rss, mem_tot: this.mem_tot, }; this.status = { alerts, usage, version: smc_version_1.version }; } async get_status() { this.update(); return this.status; } stop() { this.running = false; } async start() { if (this.running) { this.dbg("project-status/server: already running, cannot be started twice"); } else { await this._start(); } } async _start() { this.dbg("start"); if (this.running) { throw Error("Cannot start ProjectStatusServer twice"); } this.running = true; await this.init(); const status = await this.get_status(); this.emit("status", status); while (this.testing) { await (0, awaiting_1.delay)(5000); const status = await this.get_status(); this.emit("status", status); } } } exports.ProjectStatusServer = ProjectStatusServer; // singleton, we instantiate it when we need it let _status = undefined; function get_ProjectStatusServer() { if (_status != null) return _status; _status = new ProjectStatusServer(); return _status; } exports.get_ProjectStatusServer = get_ProjectStatusServer; // testing: $ ts-node server.ts if (require.main === module) { const pss = new ProjectStatusServer(true); pss.start(); let cnt = 0; pss.on("status", (status) => { console.log(JSON.stringify(status, null, 2)); cnt += 1; if (cnt >= 2) process.exit(); }); } //# sourceMappingURL=server.js.map