smc-hub
Version:
CoCalc: Backend webserver component
169 lines (154 loc) • 6.51 kB
text/coffeescript
#########################################################################
# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
# License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
#########################################################################
###
PostgreSQL -- operations code, e.g., backups, maintenance, etc.
COPYRIGHT : (c) 2017 SageMath, Inc.
LICENSE : AGPLv3
###
fs = require('fs')
async = require('async')
misc_node = require('smc-util-node/misc_node')
{defaults} = misc = require('smc-util/misc')
required = defaults.required
{SCHEMA} = require('smc-util/schema')
exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
# Backups up the indicated tables.
# WARNING: This is NOT guaranteed to give a point
# in time backup of the entire database across tables!
# The backup of each table is only consistent within that
# table. For CoCalc, this tends to be fine, due to our design.
# The advantage of this is that we can backup huge tables
# only once a week, and other smaller tables much more frequently.
# For tables:
# - a list of tables
# - 'all' (the string) -- backs up everything in the SMC schema (not the database!)
# - 'critical' -- backs up only smaller critical tables, which we would desparately
# need for disaster recovery
backup_tables: (opts) =>
opts = defaults opts,
tables : required # list of tables, 'all' or 'critical'
path : 'backup'
limit : 3 # number of tables to backup in parallel
bup : true # creates/updates a bup archive in backup/.bup,
# so we have snapshots of all past backups!
cb : required
tables = @_get_backup_tables(opts.tables)
dbg = @_dbg("backup_tables()")
dbg("backing up tables: #{misc.to_json(tables)}")
async.series([
(cb) =>
backup = (table, cb) =>
dbg("backup '#{table}'")
@_backup_table
table : table
path : opts.path
cb : cb
async.mapLimit(tables, opts.limit, backup, cb)
(cb) =>
@_backup_bup
path : opts.path
cb : cb
], (err) => opts.cb(err))
_backup_table: (opts) =>
opts = defaults opts,
table : required
path : 'backup'
cb : required
dbg = @_dbg("_backup_table(table='#{opts.table}')")
cmd = "mkdir -p #{opts.path}; time pg_dump -Fc --table #{opts.table} #{@_database} > #{opts.path}/#{opts.table}.bak"
dbg(cmd)
misc_node.execute_code
command : cmd
timeout : 0
home : '.'
env :
PGPASSWORD : @_password
PGUSER : 'smc'
PGHOST : @_host
err_on_exit : true
cb : opts.cb
_backup_bup: (opts) =>
opts = defaults opts,
path : 'backup'
cb : required
dbg = @_dbg("_backup_bup(path='#{opts.path}')")
# We use no compression because the backup files are already all highly compressed.
cmd = "mkdir -p '#{opts.path}' && export && bup init && bup index '#{opts.path}' && bup save --strip --compress=0 '#{opts.path}' -n master"
dbg(cmd)
misc_node.execute_code
command : cmd
timeout : 0
home : '.'
env :
BUP_DIR : "#{opts.path}/.bup"
err_on_exit : true
cb : opts.cb
_get_backup_tables: (tables) =>
if misc.is_array(tables)
return tables
all = (t for t,s of SCHEMA when not s.virtual)
if tables == 'all'
return all
else if tables == 'critical'
# TODO: critical for backup or not should probably be in the schema itself, not here.
v = []
non_critical = ['stats','syncstrings','file_use','eval_outputs','blobs','eval_inputs','patches','cursors']
for x in all
if x.indexOf('log') == -1 and x not in non_critical
v.push(x)
return v
else
return [tables]
# Restore the given tables from the backup in the given directory.
restore_tables: (opts) =>
opts = defaults opts,
tables : undefined # same as for backup_tables, or undefined to use whatever we have in the path
path : '/backup/postgres'
limit : 5
cb : required
backed_up_tables = (filename[...-4] for filename in fs.readdirSync(opts.path) when filename[-4..] == '.bak')
if not opts.tables?
tables = backed_up_tables
else
tables = @_get_backup_tables(opts.tables)
for table in tables
if table not in backed_up_tables
opts.cb("there is no backup of '#{table}'")
return
dbg = @_dbg("restore_tables()")
dbg("restoring tables: #{misc.to_json(tables)}")
restore = (table, cb) =>
dbg("restore '#{table}'")
@_restore_table
table : table
path : opts.path
cb : cb
async.mapLimit(tables, opts.limit, restore, (err)=>opts.cb(err))
_restore_table: (opts) =>
opts = defaults opts,
table : required
path : 'backup'
cb : required
dbg = @_dbg("_restore_table(table='#{opts.table}')")
async.series([
(cb) =>
dbg("dropping existing table if it exists")
@_query
query : "DROP TABLE IF EXISTS #{opts.table}"
cb : cb
(cb) =>
cmd = "time pg_restore -C -d #{@_database} #{opts.path}/#{opts.table}.bak"
dbg(cmd)
misc_node.execute_code
command : cmd
timeout : 0
home : '.'
env :
PGPASSWORD : @_password
PGUSER : @_user
PGHOST : @_host
err_on_exit : true
cb : cb
], (err) => opts.cb(err))