UNPKG

smc-hub

Version:

CoCalc: Backend webserver component

353 lines (317 loc) 9.82 kB
/* * This file is part of CoCalc: Copyright © 2020 Sagemath, Inc. * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details */ import { callback2 as cb2 } from "smc-util/async-utils"; import { RECENT_TIMES, RECENT_TIMES_KEY } from "smc-util/schema"; import * as misc from "smc-util/misc"; const { defaults } = misc; const required = defaults.required; const _ = require("underscore"); import { PostgreSQL } from "./types"; const { all_results } = require("../postgres-base"); // some stats queries have to crunch a lot of rows, which could take a bit // we give them a couple of minutes each… const QUERY_TIMEOUT_S = 300; interface Opts { ttl_dt: number; // 15 secs subtracted from ttl to compensate for computation duration when called via a cronjob ttl: number; // how long cached version lives (in seconds) ttl_db: number; // how long a valid result from a db query is cached in any case update: boolean; // if true: recalculate if older than ttl; false: don't recalculate and pick it from the DB (locally cached for ttl secs) cb: (err, stats) => void; } type Data = { [key: string]: number }; interface RunningProjects { free: number; member: number; } // TODO type this to fit with fields defined in db-schema/stats.ts interface Stats { id: string; time: Date; accounts: number; projects: number; projects_created: Data; projects_edited: Data; accounts_created: Data; accounts_active: Data; running_projects: RunningProjects; hub_servers: any; files_opened: { distinct: Data; total: Data; }; } let _stats_cached: any = null; let _stats_cached_db_query: Date | null = null; async function _count_timespan(db: PostgreSQL, opts): Promise<any> { opts = defaults(opts, { table: required, field: undefined, age_m: undefined, upper_m: undefined, // defaults to zero minutes (i.e. "now") }); const { table, field, age_m, upper_m } = opts; const where = {}; if (field != null) { if (age_m != null) { where[`${field} >= $::TIMESTAMP`] = misc.minutes_ago(age_m); } if (upper_m != null) { where[`${field} <= $::TIMESTAMP`] = misc.minutes_ago(upper_m); } } const result = await cb2(db._query, { query: `SELECT COUNT(*) FROM ${table}`, where, timeout_s: QUERY_TIMEOUT_S, }); // count_result return parseInt(result?.rows?.[0]?.count); } async function _count_opened_files(db: PostgreSQL, opts): Promise<void> { opts = defaults(opts, { age_m: undefined, key: required, data: required, distinct: required, // true or false }); const { age_m, key, data, distinct } = opts; const q = `\ WITH filenames AS ( SELECT ${distinct ? "DISTINCT" : ""} event ->> 'filename' AS fn FROM project_log WHERE time BETWEEN $1::TIMESTAMP AND NOW() AND event @> '{"action" : "open"}'::jsonb ), ext_count AS ( SELECT COUNT(*) as cnt, lower(reverse(split_part(reverse(fn), '.', 1))) AS ext FROM filenames GROUP BY ext ) SELECT ext, cnt FROM ext_count WHERE ext IN ('sagews', 'ipynb', 'tex', 'rtex', 'rnw', 'x11', 'rmd', 'txt', 'py', 'md', 'sage', 'term', 'rst', 'lean', 'png', 'svg', 'jpeg', 'jpg', 'pdf', 'tasks', 'course', 'sage-chat', 'chat') ORDER BY ext `; const res = await cb2(db._query, { query: q, params: [misc.minutes_ago(age_m)], timeout_s: QUERY_TIMEOUT_S, }); // misc.copy? see "all_results" const rows = res.rows.map((x) => misc.copy(x)); const values = _.object(_.pluck(rows, "ext"), _.pluck(rows, "cnt")); data[key] = values; } function check_local_cache({ update, ttl_dt, ttl, ttl_db, dbg }): Stats | null { if (_stats_cached == null) return null; // decide if cache should be used -- tighten interval if we are allowed to update const offset_dt = update ? ttl_dt : 0; const is_cache_recent = _stats_cached.time > misc.seconds_ago(ttl - offset_dt); // in case we aren't allowed to update and the cache is outdated, do not query db too often const did_query_recently = _stats_cached_db_query != null && _stats_cached_db_query > misc.seconds_ago(ttl_db); if (is_cache_recent || did_query_recently) { dbg( `using locally cached stats from ${ (new Date().getTime() - _stats_cached.time) / 1000 } secs ago.` ); return _stats_cached; } return null; } async function check_db_cache({ db, update, ttl, ttl_dt, dbg, }): Promise<Stats | null> { try { const res = await cb2(db._query, { query: "SELECT * FROM stats ORDER BY time DESC LIMIT 1", }); if (res?.rows?.length != 1) { dbg("no data (1)"); return null; } const x = misc.map_without_undefined(res.rows[0]) as any; if (x == null) { dbg("no data (2)"); return null; } dbg(`check_db_cache x = ${misc.to_json(x)}`); _stats_cached_db_query = new Date(); if (update && x.time < misc.seconds_ago(ttl - ttl_dt)) { dbg("cache outdated -- will update stats"); return null; } else { dbg( `using db stats from ${ (new Date().getTime() - x.time) / 1000 } secs ago.` ); // storing still valid result in local cache _stats_cached = misc.deep_copy(x); return _stats_cached; } } catch (err) { dbg("problem with query -- no stats in db?"); throw err; } } const running_projects_query = `\ SELECT count(*), run_quota -> 'member_host' AS member FROM projects WHERE state ->> 'state' in ('running', 'starting') GROUP BY member`; async function calc_running_projects(db): Promise<RunningProjects> { const data = { free: 0, member: 0 }; const res = await cb2(db._query, { query: running_projects_query }); for (const row of res.rows) { if (row.member) { data.member = parseInt(row.count); } else { data.free = parseInt(row.count); } } return data; } async function _calc_stats({ db, dbg, start_t }): Promise<Stats> { const stats: Stats = { id: misc.uuid(), time: new Date(), accounts: 0, projects: 0, projects_created: {}, projects_edited: {}, accounts_created: {}, accounts_active: {}, files_opened: { distinct: {}, total: {} }, hub_servers: [], running_projects: { free: 0, member: 0 }, }; const R = RECENT_TIMES; const K = RECENT_TIMES_KEY; stats.accounts = await _count_timespan(db, { table: "accounts", }); stats.projects = await _count_timespan(db, { table: "projects", }); stats.projects_edited[K.active] = await _count_timespan(db, { table: "projects", field: "last_edited", age_m: R.active, }); stats.accounts_active[K.active] = await _count_timespan(db, { table: "accounts", field: "last_active", age_m: R.active, }); await new Promise<void>((done, reject) => { db._query({ query: "SELECT expire, host, clients FROM hub_servers", cb: all_results((err, hub_servers) => { if (err) { reject(err); } else { const now = new Date(); stats.hub_servers = []; for (let x of hub_servers) { if (x.expire > now) { delete x.expire; stats.hub_servers.push(x); } } done(); } }), }); }); // this was running in parallel, but there is no hurry updating the stats... for (const tkey of ["last_month", "last_week", "last_day", "last_hour"]) { await _count_opened_files(db, { age_m: R[tkey], key: K[tkey], data: stats.files_opened.distinct, distinct: true, }); await _count_opened_files(db, { age_m: R[tkey], key: K[tkey], data: stats.files_opened.total, distinct: false, }); stats.projects_edited[K[tkey]] = await _count_timespan(db, { table: "projects", field: "last_edited", age_m: R[tkey], }); stats.projects_created[K[tkey]] = await _count_timespan(db, { table: "projects", field: "created", age_m: R[tkey], }); stats.accounts_active[K[tkey]] = await _count_timespan(db, { table: "accounts", field: "last_active", age_m: R[tkey], }); stats.accounts_created[K[tkey]] = await _count_timespan(db, { table: "accounts", field: "created", age_m: R[tkey], }); } stats.running_projects = await calc_running_projects(db); const elapsed_t = process.hrtime(start_t); const duration_s = (elapsed_t[0] + elapsed_t[1] / 1e9).toFixed(4); dbg( `everything succeeded above after ${duration_s} secs -- now insert stats` ); // storing in local and db cache _stats_cached = misc.deep_copy(stats); await cb2(db._query, { query: "INSERT INTO stats", values: stats, }); return stats; } export async function calc_stats(db: PostgreSQL, opts: Opts) { const { ttl_dt, ttl, ttl_db, update, cb } = opts; const start_t = process.hrtime(); const dbg = db._dbg("get_stats"); let stats: Stats | null = null; stats = check_local_cache({ update, ttl_dt, ttl, ttl_db, dbg }); if (stats == null) { dbg("checking db cache?"); stats = await check_db_cache({ db, update, ttl, ttl_dt, dbg }); } if (stats != null) { dbg(`stats != null → nothing to do`); } else if (!update) { dbg("warning: no recent stats but not allowed to update"); } else { dbg("we're actually recomputing the stats"); try { stats = await _calc_stats({ db, dbg, start_t }); } catch (err) { dbg(`error calculating stats: err=${err}`); cb?.(err, null); return; } } dbg(`stats=${misc.to_json(stats)})`); // uncomment to fully debug the resulting stats object //console.debug(JSON.stringify(stats, null, 2)); //process.exit(); cb?.(undefined, stats); return stats; }