smc-hub
Version:
CoCalc: Backend webserver component
1,373 lines (1,334 loc) • 47.8 kB
JavaScript
// Generated by CoffeeScript 2.5.1
(function() {
//########################################################################
// This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
// License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
//########################################################################
var COCALC_BLOB_STORE, TIMEOUT_LONG_S, all_results, async, defaults, delete_patches, expire_time, filesystem_bucket, fs, misc, misc_node, one_result, required, snappy, zlib,
boundMethodCheck = function(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new Error('Bound instance method accessed before binding'); } };
/*
PostgreSQL -- implementation of queries needed for storage and managing blobs,
including backups, integration with google cloud storage, etc.
COPYRIGHT : (c) 2017 SageMath, Inc.
LICENSE : AGPLv3
*/
// Bucket used for cheaper longterm storage of blobs (outside of PostgreSQL).
// NOTE: We should add this to site configuration, and have it get read once when first
// needed and cached. Also it would be editable in admin account settings.
// If this env variable begins with a / it is assumed to be a path in the filesystem,
// e.g., a remote mount (in practice, we are using gcsfuse to mount gcloud buckets).
// If it is gs:// then it is a google cloud storage bucket.
COCALC_BLOB_STORE = process.env.COCALC_BLOB_STORE;
async = require('async');
snappy = require('snappy');
zlib = require('zlib');
fs = require('fs');
misc_node = require('smc-util-node/misc_node');
({defaults} = misc = require('smc-util/misc'));
required = defaults.required;
({expire_time, one_result, all_results} = require('./postgres-base'));
({delete_patches} = require('./postgres/delete-patches'));
({filesystem_bucket} = require('./filesystem-bucket'));
// some queries do searches, which could take a bit. we give them 5 minutes …
TIMEOUT_LONG_S = 300;
exports.extend_PostgreSQL = function(ext) {
var PostgreSQL;
return PostgreSQL = class PostgreSQL extends ext {
constructor() {
super(...arguments);
this.save_blob = this.save_blob.bind(this);
// Used internally by save_blob to possibly extend the expire time of a blob.
this._extend_blob_ttl = this._extend_blob_ttl.bind(this);
this.get_blob = this.get_blob.bind(this);
this.touch_blob = this.touch_blob.bind(this);
// Return gcloud API interface
this.gcloud = this.gcloud.bind(this);
this.blob_store = this.blob_store.bind(this);
// Uploads the blob with given sha1 uuid to gcloud storage, if it hasn't already
// been uploaded there.
this.copy_blob_to_gcloud = this.copy_blob_to_gcloud.bind(this);
/*
Backup limit blobs that previously haven't been dumped to blobs, and put them in
a tarball in the given path. The tarball's name is the time when the backup starts.
The tarball is compressed using gzip compression.
db._error_thresh=1e6; db.backup_blobs_to_tarball(limit:10000,path:'/backup/tmp-blobs',repeat_until_done:60, cb:done())
I have not written code to restore from these tarballs. Assuming the database has been restored,
so there is an entry in the blobs table for each blob, it would suffice to upload the tarballs,
then copy their contents straight into the COCALC_BLOB_STORE, and that’s it.
If we don't have the blobs table in the DB, make dummy entries from the blob names in the tarballs.
*/
this.backup_blobs_to_tarball = this.backup_blobs_to_tarball.bind(this);
/*
Copied all blobs that will never expire to a google cloud storage bucket.
errors={}; db.copy_all_blobs_to_gcloud(limit:500, cb:done(), remove:true, repeat_until_done_s:10, errors:errors)
*/
this.copy_all_blobs_to_gcloud = this.copy_all_blobs_to_gcloud.bind(this);
this.blob_maintenance = this.blob_maintenance.bind(this);
this.remove_blob_ttls = this.remove_blob_ttls.bind(this);
// If blob has been copied to gcloud, remove the BLOB part of the data
// from the database (to save space). If not copied, copy it to gcloud,
// then remove from database.
this.close_blob = this.close_blob.bind(this);
/*
* Syncstring maintainence
*/
this.syncstring_maintenance = this.syncstring_maintenance.bind(this);
// Offlines and archives the patch, unless the string is active very recently, in
// which case this is a no-op.
this.archive_patches = this.archive_patches.bind(this);
this.unarchive_patches = this.unarchive_patches.bind(this);
/*
Export/import of syncstring history and info. Right now used mainly for debugging
purposes, but will obviously be useful for a user-facing feature involving import
and export (and copying) of complete edit history.
*/
this.export_patches = this.export_patches.bind(this);
this.import_patches = this.import_patches.bind(this);
this.delete_blob = this.delete_blob.bind(this);
}
save_blob(opts) {
var dbg, rows, ttl, uuid;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuid: void 0, // uuid=sha1-based id coming from blob
blob: required, // unless check=true, we assume misc_node.uuidsha1(opts.blob) == opts.uuid;
// blob must be a string or Buffer
ttl: 0, // object in blobstore will have *at least* this ttl in seconds;
// if there is already something in blobstore with longer ttl, we leave it;
// infinite ttl = 0.
project_id: required, // the id of the project that is saving the blob
check: false, // if true, will give error if misc_node.uuidsha1(opts.blob) != opts.uuid
compress: void 0, // optional compression to use: 'gzip', 'zlib', 'snappy'; only used if blob not already in db.
level: -1, // compression level (if compressed) -- see https://github.com/expressjs/compression#level
cb: required // cb(err, ttl actually used in seconds); ttl=0 for infinite ttl
});
if (!Buffer.isBuffer(opts.blob)) {
// CRITICAL: We assume everywhere below that opts.blob is a
// buffer, e.g., in the .toString('hex') method!
opts.blob = Buffer.from(opts.blob);
}
if (opts.uuid == null) {
opts.uuid = misc_node.uuidsha1(opts.blob);
} else if (opts.check) {
uuid = misc_node.uuidsha1(opts.blob);
if (uuid !== opts.uuid) {
opts.cb(`the sha1 uuid (='${uuid}') of the blob must equal the given uuid (='${opts.uuid}')`);
return;
}
}
if (!misc.is_valid_uuid_string(opts.uuid)) {
opts.cb("uuid is invalid");
return;
}
dbg = this._dbg(`save_blob(uuid='${opts.uuid}')`);
dbg();
rows = ttl = void 0;
return async.series([
(cb) => {
return this._query({
query: 'SELECT expire FROM blobs',
where: {
"id = $::UUID": opts.uuid
},
cb: (err,
x) => {
rows = x != null ? x.rows : void 0;
return cb(err);
}
});
},
(cb) => {
if (rows.length === 0 && opts.compress) {
dbg("compression requested and blob not already saved, so we compress blob");
switch (opts.compress) {
case 'gzip':
return zlib.gzip(opts.blob,
{
level: opts.level
},
(err,
blob) => {
opts.blob = blob;
return cb(err);
});
case 'zlib':
return zlib.deflate(opts.blob,
{
level: opts.level
},
(err,
blob) => {
opts.blob = blob;
return cb(err);
});
case 'snappy':
return snappy.compress(opts.blob,
(err,
blob) => {
opts.blob = blob;
return cb(err);
});
default:
return cb(`compression format '${opts.compress}' not implemented`);
}
} else {
return cb();
}
},
(cb) => {
if (rows.length === 0) {
dbg("nothing in DB, so we insert the blob.");
ttl = opts.ttl;
return this._query({
query: "INSERT INTO blobs",
values: {
id: opts.uuid,
blob: '\\x' + opts.blob.toString('hex'),
project_id: opts.project_id,
count: 0,
size: opts.blob.length,
created: new Date(),
compress: opts.compress,
expire: ttl ? expire_time(ttl) : void 0
},
cb: cb
});
} else {
dbg("blob already in the DB, so see if we need to change the expire time");
return this._extend_blob_ttl({
expire: rows[0].expire,
ttl: opts.ttl,
uuid: opts.uuid,
cb: (err,
_ttl) => {
ttl = _ttl;
return cb(err);
}
});
}
}
], (err) => {
return opts.cb(err, ttl);
});
}
_extend_blob_ttl(opts) {
var new_expire, ttl, z;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
expire: void 0, // what expire is currently set to in the database
ttl: required, // requested ttl -- extend expire to at least this
uuid: required,
cb: required // (err, effective ttl (with 0=oo))
});
if (!misc.is_valid_uuid_string(opts.uuid)) {
opts.cb("uuid is invalid");
return;
}
if (!opts.expire) {
// ttl already infinite -- nothing to do
opts.cb(void 0, 0);
return;
}
new_expire = ttl = void 0;
if (opts.ttl) {
// saved ttl is finite as is requested one; change in DB if requested is longer
z = expire_time(opts.ttl);
if (z > opts.expire) {
new_expire = z;
ttl = opts.ttl;
} else {
ttl = (opts.expire - new Date()) / 1000.0;
}
} else {
// saved ttl is finite but requested one is infinite
ttl = new_expire = 0;
}
if (new_expire != null) {
// change the expire time for the blob already in the DB
return this._query({
query: 'UPDATE blobs',
where: {
"id = $::UUID": opts.uuid
},
set: {
"expire :: TIMESTAMP ": new_expire === 0 ? void 0 : new_expire
},
cb: (err) => {
return opts.cb(err, ttl);
}
});
} else {
return opts.cb(void 0, ttl);
}
}
get_blob(opts) {
var blob, x;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuid: required,
save_in_db: false, // if true and blob isn't in DB and is only in gcloud, copies to local DB
// (for faster access e.g., 20ms versus 5ms -- i.e., not much faster; gcloud is FAST too.)
touch: true,
cb: required // cb(err) or cb(undefined, blob_value) or cb(undefined, undefined) in case no such blob
});
if (!misc.is_valid_uuid_string(opts.uuid)) {
opts.cb("uuid is invalid");
return;
}
x = void 0;
blob = void 0;
return async.series([
(cb) => {
return this._query({
query: "SELECT expire, blob, gcloud, compress FROM blobs",
where: {
"id = $::UUID": opts.uuid
},
cb: one_result((err,
_x) => {
x = _x;
return cb(err);
})
});
},
(cb) => {
if (x == null) {
// nothing to do -- blob not in db (probably expired)
return cb();
} else if (x.expire && x.expire <= new Date()) {
// the blob already expired -- background delete it
this._query({ // delete it (but don't wait for this to finish)
query: "DELETE FROM blobs",
where: {
"id = $::UUID": opts.uuid
}
});
return cb();
} else if (x.blob != null) {
// blob not expired and is in database
blob = x.blob;
return cb();
} else if (x.gcloud) {
if (COCALC_BLOB_STORE == null) {
cb("no blob store configured -- set the COCALC_BLOB_STORE env variable");
return;
}
// blob not available locally, but should be in a Google cloud storage bucket -- try to get it
// NOTE: we now ignore the actual content of x.gcloud -- we don't support spreading blobs
// across multiple buckets... as it isn't needed because buckets are infinite, and it
// is potentially confusing to manage.
return this.blob_store().read({
name: opts.uuid,
cb: (err,
_blob) => {
if (err) {
return cb(err);
} else {
blob = _blob;
cb();
if (opts.save_in_db) {
// also save in database so will be faster next time (again, don't wait on this)
return this._query({ // delete it (but don't wait for this to finish)
query: "UPDATE blobs",
set: {
blob: blob
},
where: {
"id = $::UUID": opts.uuid
}
});
}
}
}
});
} else {
// blob not local and not in gcloud -- this shouldn't happen
// (just view this as "expired" by not setting blob)
return cb();
}
},
(cb) => {
if ((blob == null) || ((x != null ? x.compress : void 0) == null)) {
cb();
return;
}
// blob is compressed -- decompress it
switch (x.compress) {
case 'gzip':
return zlib.gunzip(blob,
(err,
_blob) => {
blob = _blob;
return cb(err);
});
case 'zlib':
return zlib.inflate(blob,
(err,
_blob) => {
blob = _blob;
return cb(err);
});
case 'snappy':
return snappy.uncompress(blob,
(err,
_blob) => {
blob = _blob;
return cb(err);
});
default:
return cb(`compression format '${x.compress}' not implemented`);
}
}
], (err) => {
opts.cb(err, blob);
if ((blob != null) && opts.touch) {
// blob was pulled from db or gcloud, so note that it was accessed (updates a counter)
return this.touch_blob({
uuid: opts.uuid
});
}
});
}
touch_blob(opts) {
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuid: required,
cb: void 0
});
if (!misc.is_valid_uuid_string(opts.uuid)) {
if (typeof opts.cb === "function") {
opts.cb("uuid is invalid");
}
return;
}
return this._query({
query: "UPDATE blobs SET count = count + 1, last_active = NOW()",
where: {
"id = $::UUID": opts.uuid
},
cb: opts.cb
});
}
gcloud() {
boundMethodCheck(this, PostgreSQL);
return this._gcloud != null ? this._gcloud : this._gcloud = require('./smc_gcloud').gcloud();
}
blob_store(bucket) {
boundMethodCheck(this, PostgreSQL);
if (!bucket) {
bucket = COCALC_BLOB_STORE;
}
if (misc.startswith(bucket, 'gs://')) {
// Google Cloud Storage -- only works if hub has full direct gcloud storage API access, so
// NOT in KuCalc or Docker or really anywhere anymore...
return this.gcloud().bucket({
name: bucket.slice('gs://'.length)
});
} else {
// Filesystem -- could be a big NFS volume, remotely mounted gcsfuse, or just
// a single big local filesystem -- etc. -- we don't care.
return filesystem_bucket({
name: bucket
});
}
}
copy_blob_to_gcloud(opts) {
var dbg, locals;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuid: required, // uuid=sha1-based uuid coming from blob
bucket: COCALC_BLOB_STORE, // name of bucket
force: false, // if true, upload even if already uploaded
remove: false, // if true, deletes blob from database after successful upload to gcloud (to free space)
cb: void 0 // cb(err)
});
dbg = this._dbg(`copy_blob_to_gcloud(uuid='${opts.uuid}')`);
dbg();
if (!misc.is_valid_uuid_string(opts.uuid)) {
dbg("invalid uuid");
if (typeof opts.cb === "function") {
opts.cb("uuid is invalid");
}
return;
}
if (!opts.bucket) {
dbg("invalid bucket");
if (typeof opts.cb === "function") {
opts.cb("no blob store configured -- set the COCALC_BLOB_STORE env variable");
}
return;
}
locals = {
x: void 0
};
return async.series([
(cb) => {
dbg("get blob info from database");
return this._query({
query: "SELECT blob, gcloud FROM blobs",
where: {
"id = $::UUID": opts.uuid
},
cb: one_result((err,
x) => {
locals.x = x;
if (err) {
return cb(err);
} else if (x == null) {
return cb('no such blob');
} else if (!x.blob && !x.gcloud) {
return cb('blob not available -- this should not be possible');
} else if (!x.blob && opts.force) {
return cb("blob can't be re-uploaded since it was already deleted");
} else {
return cb();
}
})
});
},
(cb) => {
if (((locals.x.gcloud != null) && !opts.force) || (locals.x.blob == null)) {
dbg("already uploaded -- don't need to do anything; or already deleted locally");
cb();
return;
}
// upload to Google cloud storage
locals.bucket = this.blob_store(opts.bucket);
return locals.bucket.write({
name: opts.uuid,
content: locals.x.blob,
cb: cb
});
},
(cb) => {
if (((locals.x.gcloud != null) && !opts.force) || (locals.x.blob == null)) {
// already uploaded -- don't need to do anything; or already deleted locally
cb();
return;
}
dbg("read blob back and compare"); // -- we do *NOT* trust GCS with such important data
return locals.bucket.read({
name: opts.uuid,
cb: (err,
data) => {
if (err) {
return cb(err);
} else if (!locals.x.blob.equals(data)) {
dbg("FAILED!");
return cb("BLOB write to GCS failed check!");
} else {
dbg("check succeeded");
return cb();
}
}
});
},
(cb) => {
var set;
if (locals.x.blob == null) {
// no blob in db; nothing further to do.
return cb();
} else {
// We successful upload to gcloud -- set locals.x.gcloud
set = {
gcloud: opts.bucket
};
if (opts.remove) {
set.blob = null; // remove blob content from database to save space
}
return this._query({
query: "UPDATE blobs",
where: {
"id = $::UUID": opts.uuid
},
set: set,
cb: cb
});
}
}
], (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
}
backup_blobs_to_tarball(opts) {
var dbg, dir, join, tarball, target, to_remove, v;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
limit: 10000, // number of blobs to backup
path: required, // path where [timestamp].tar file is placed
throttle: 0, // wait this many seconds between pulling blobs from database
repeat_until_done: 0, // if positive, keeps re-call'ing this function until no more
// results to backup (pauses this many seconds between)
map_limit: 5,
cb: void 0 // cb(err, '[timestamp].tar')
});
dbg = this._dbg(`backup_blobs_to_tarball(limit=${opts.limit},path='${opts.path}')`);
join = require('path').join;
dir = misc.date_to_snapshot_format(new Date());
target = join(opts.path, dir);
tarball = target + '.tar.gz';
v = void 0;
to_remove = [];
return async.series([
(cb) => {
dbg(`make target='${target}'`);
return fs.mkdir(target,
cb);
},
(cb) => {
dbg("get blobs that we need to back up");
return this._query({
query: "SELECT id FROM blobs",
where: "expire IS NULL and backup IS NOT true",
limit: opts.limit,
timeout_s: TIMEOUT_LONG_S,
cb: all_results('id',
(err,
x) => {
v = x;
return cb(err);
})
});
},
(cb) => {
var f;
dbg(`backing up ${v.length} blobs`);
f = (id,
cb) => {
return this.get_blob({
uuid: id,
touch: false,
cb: (err,
blob) => {
if (err) {
dbg(`ERROR! blob ${id} -- ${err}`);
return cb(err);
} else if (blob != null) {
dbg(`got blob ${id} from db -- now write to disk`);
to_remove.push(id);
return fs.writeFile(join(target,
id),
blob,
(err) => {
if (opts.throttle) {
return setTimeout(cb,
opts.throttle * 1000);
} else {
return cb();
}
});
} else {
dbg(`blob ${id} is expired, so nothing to be done, ever.`);
return cb();
}
}
});
};
return async.mapLimit(v,
opts.map_limit,
f,
cb);
},
(cb) => {
dbg("successfully wrote all blobs to files; now make tarball");
return misc_node.execute_code({
command: 'tar',
args: ['zcvf',
tarball,
dir],
path: opts.path,
timeout: 3600,
cb: cb
});
},
(cb) => {
var f;
dbg("remove temporary blobs");
f = (x,
cb) => {
return fs.unlink(join(target,
x),
cb);
};
return async.mapLimit(to_remove,
10,
f,
cb);
},
(cb) => {
dbg("remove temporary directory");
return fs.rmdir(target,
cb);
},
(cb) => {
dbg("backup succeeded completely -- mark all blobs as backed up");
return this._query({
query: "UPDATE blobs",
set: {
backup: true
},
where: {
"id = ANY($)": v
},
cb: cb
});
}
], (err) => {
var f;
if (err) {
dbg(`ERROR: ${err}`);
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
} else {
dbg("done");
if (opts.repeat_until_done && to_remove.length === opts.limit) {
f = () => {
return this.backup_blobs_to_tarball(opts);
};
return setTimeout(f, opts.repeat_until_done * 1000);
} else {
return typeof opts.cb === "function" ? opts.cb(void 0, tarball) : void 0;
}
}
});
}
copy_all_blobs_to_gcloud(opts) {
var dbg;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
bucket: COCALC_BLOB_STORE,
limit: 1000, // copy this many in each batch
map_limit: 1, // copy this many at once.
throttle: 0, // wait this many seconds between uploads
repeat_until_done_s: 0, // if nonzero, waits this many seconds, then calls this function again until nothing gets uploaded.
errors: void 0, // object: used to accumulate errors -- if not given, then everything will terminate on first error
remove: false,
cutoff: '1 month', // postgresql interval - only copy blobs to gcloud that haven't been accessed at least this long.
cb: required
});
dbg = this._dbg("copy_all_blobs_to_gcloud");
dbg();
// This query selects the blobs that will never expire, but have not yet
// been copied to Google cloud storage.
dbg("getting blob id's...");
return this._query({
query: 'SELECT id, size FROM blobs',
where: `expire IS NULL AND gcloud IS NULL and (last_active <= NOW() - INTERVAL '${opts.cutoff}' OR last_active IS NULL)`,
limit: opts.limit,
timeout_s: TIMEOUT_LONG_S,
//# order_by : 'id' # this is not important and was causing VERY excessive load in production (due to bad query plannnig?!)
cb: all_results((err, v) => {
var f, m, n;
if (err) {
dbg(`fail: ${err}`);
return opts.cb(err);
} else {
n = v.length;
m = 0;
dbg(`got ${n} blob id's`);
f = (x, cb) => {
var k, start;
m += 1;
k = m;
start = new Date();
dbg(`**** ${k}/${n}: uploading ${x.id} of size ${x.size / 1000}KB`);
return this.copy_blob_to_gcloud({
uuid: x.id,
bucket: opts.bucket,
remove: opts.remove,
cb: (err) => {
dbg(`**** ${k}/${n}: finished -- ${err}; size ${x.size / 1000}KB; time=${new Date() - start}ms`);
if (err) {
if (opts.error != null) {
opts.errors[x.id] = err;
} else {
cb(err);
}
}
if (opts.throttle) {
return setTimeout(cb, 1000 * opts.throttle);
} else {
return cb();
}
}
});
};
return async.mapLimit(v, opts.map_limit, f, (err) => {
dbg(`finished this round -- ${err}`);
if (err && (opts.errors == null)) {
opts.cb(err);
return;
}
if (opts.repeat_until_done_s && v.length > 0) {
dbg("repeat_until_done triggering another round");
return setTimeout((() => {
return this.copy_all_blobs_to_gcloud(opts);
}), opts.repeat_until_done_s * 1000);
} else {
dbg(`done : ${misc.to_json(opts.errors)}`);
return opts.cb(misc.len(opts.errors) > 0 ? opts.errors : void 0);
}
});
}
})
});
}
blob_maintenance(opts) {
var dbg;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
path: '/backup/blobs',
map_limit: 1,
blobs_per_tarball: 10000,
throttle: 0,
cb: void 0
});
dbg = this._dbg("blob_maintenance()");
dbg();
return async.series([
(cb) => {
dbg("maintain the patches and syncstrings");
return this.syncstring_maintenance({
repeat_until_done: true,
limit: 500,
map_limit: opts.map_limit,
delay: 1000, // 1s, since syncstring_maintence heavily loads db
cb: cb
});
},
(cb) => {
dbg("backup_blobs_to_tarball");
return this.backup_blobs_to_tarball({
throttle: opts.throttle,
limit: opts.blobs_per_tarball,
path: opts.path,
map_limit: opts.map_limit,
repeat_until_done: 5,
cb: cb
});
},
(cb) => {
var errors;
dbg("copy_all_blobs_to_gcloud");
errors = {};
return this.copy_all_blobs_to_gcloud({
limit: 1000,
repeat_until_done_s: 5,
errors: errors,
remove: true,
map_limit: opts.map_limit,
throttle: opts.throttle,
cb: (err) => {
if (misc.len(errors) > 0) {
dbg(`errors! ${misc.to_json(errors)}`);
}
return cb(err);
}
});
}
], (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
}
remove_blob_ttls(opts) {
var x;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuids: required, // uuid=sha1-based from blob
cb: required // cb(err)
});
return this._query({
query: "UPDATE blobs",
set: {
expire: null
},
where: {
"id::UUID = ANY($)": (function() {
var j, len, ref, results;
ref = opts.uuids;
results = [];
for (j = 0, len = ref.length; j < len; j++) {
x = ref[j];
if (misc.is_valid_uuid_string(x)) {
results.push(x);
}
}
return results;
})()
},
cb: opts.cb
});
}
close_blob(opts) {
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuid: required, // uuid=sha1-based from blob
bucket: COCALC_BLOB_STORE,
cb: void 0 // cb(err)
});
if (!misc.is_valid_uuid_string(opts.uuid)) {
if (typeof opts.cb === "function") {
opts.cb("uuid is invalid");
}
return;
}
return async.series([
(cb) => {
// ensure blob is in gcloud
return this._query({
query: 'SELECT gcloud FROM blobs',
where: {
'id = $::UUID': opts.uuid
},
cb: one_result('gcloud',
(err,
gcloud) => {
if (err) {
return cb(err);
} else if (!gcloud) {
// not yet copied to gcloud storage
return this.copy_blob_to_gcloud({
uuid: opts.uuid,
bucket: opts.bucket,
cb: cb
});
} else {
// copied already
return cb();
}
})
});
},
(cb) => {
// now blob is in gcloud -- delete blob data in database
return this._query({
query: 'SELECT gcloud FROM blobs',
where: {
'id = $::UUID': opts.uuid
},
set: {
blob: null
},
cb: cb
});
}
], (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
}
syncstring_maintenance(opts) {
var dbg, syncstrings;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
age_days: 30, // archive patches of syncstrings that are inactive for at least this long
map_limit: 1, // how much parallelism to use
limit: 1000, // do only this many
repeat_until_done: true,
delay: 0,
cb: void 0
});
dbg = this._dbg("syncstring_maintenance");
dbg(opts);
syncstrings = void 0;
return async.series([
(cb) => {
dbg("determine inactive syncstring ids");
return this._query({
query: 'SELECT string_id FROM syncstrings',
where: [
{
'last_active <= $::TIMESTAMP': misc.days_ago(opts.age_days)
},
'archived IS NULL'
],
limit: opts.limit,
timeout_s: TIMEOUT_LONG_S,
cb: all_results('string_id',
(err,
v) => {
syncstrings = v;
return cb(err);
})
});
},
(cb) => {
var f,
i;
dbg("archive patches for inactive syncstrings");
i = 0;
f = (string_id,
cb) => {
i += 1;
console.log(`*** ${i}/${syncstrings.length}: archiving string ${string_id} ***`);
return this.archive_patches({
string_id: string_id,
cb: function(err) {
if (err || !opts.delay) {
return cb(err);
} else {
return setTimeout(cb,
opts.delay);
}
}
});
};
return async.mapLimit(syncstrings,
opts.map_limit,
f,
cb);
}
], (err) => {
if (err) {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
} else if (opts.repeat_until_done && syncstrings.length === opts.limit) {
dbg("doing it again");
return this.syncstring_maintenance(opts);
} else {
return typeof opts.cb === "function" ? opts.cb() : void 0;
}
});
}
archive_patches(opts) {
var blob_uuid, dbg, last_active, patches, project_id, syncstring, where;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
string_id: required,
compress: 'zlib',
level: -1, // the default
cutoff: misc.minutes_ago(30), // never touch anything this new
cb: void 0
});
dbg = this._dbg(`archive_patches(string_id='${opts.string_id}')`);
syncstring = patches = blob_uuid = project_id = last_active = void 0;
where = {
"string_id = $::CHAR(40)": opts.string_id
};
return async.series([
(cb) => {
dbg("get project_id");
return this._query({
query: "SELECT project_id, archived, last_active FROM syncstrings",
where: where,
cb: one_result((err,
x) => {
if (err) {
return cb(err);
} else if (x == null) {
return cb(`no such syncstring with id '${opts.string_id}'`);
} else if (x.archived) {
return cb(`string_id='${opts.string_id}' already archived as blob id '${x.archived}'`);
} else {
project_id = x.project_id;
last_active = x.last_active;
return cb();
}
})
});
},
(cb) => {
if ((last_active != null) && last_active >= opts.cutoff) {
dbg("excluding due to cutoff");
cb();
return;
}
dbg("get patches");
return this.export_patches({
string_id: opts.string_id,
cb: (err,
x) => {
patches = x;
return cb(err);
}
});
},
(cb) => {
var blob,
err;
if ((last_active != null) && last_active >= opts.cutoff) {
cb();
return;
}
dbg("create blob from patches");
try {
blob = Buffer.from(JSON.stringify(patches));
} catch (error) {
err = error;
// TODO: This *will* happen if the total length of all patches is too big.
// need to break patches up...
// This is not exactly the end of the world as the entire point of all this is to
// just save some space in the database...
cb(err);
return;
}
dbg('save blob');
blob_uuid = misc_node.uuidsha1(blob);
return this.save_blob({
uuid: blob_uuid,
blob: blob,
project_id: project_id,
compress: opts.compress,
level: opts.level,
cb: cb
});
},
(cb) => {
if ((last_active != null) && last_active >= opts.cutoff) {
cb();
return;
}
dbg("update syncstring to indicate patches have been archived in a blob");
return this._query({
query: "UPDATE syncstrings",
set: {
archived: blob_uuid
},
where: where,
cb: cb
});
},
(cb) => {
if ((last_active != null) && last_active >= opts.cutoff) {
cb();
return;
}
dbg("actually deleting patches");
return delete_patches({
db: this,
string_id: opts.string_id,
cb: cb
});
}
], (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
}
unarchive_patches(opts) {
var dbg, where;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
string_id: required,
cb: void 0
});
dbg = this._dbg(`unarchive_patches(string_id='${opts.string_id}')`);
where = {
"string_id = $::CHAR(40)": opts.string_id
};
return this._query({
query: "SELECT archived FROM syncstrings",
where: where,
cb: one_result('archived', (err, blob_uuid) => {
var blob;
if (err || (blob_uuid == null)) {
if (typeof opts.cb === "function") {
opts.cb(err);
}
return;
}
blob = void 0;
return async.series([
//(cb) =>
// For testing only!
// setTimeout(cb, 7000)
(cb) => {
dbg("download blob");
return this.get_blob({
uuid: blob_uuid,
cb: (err,
x) => {
if (err) {
return cb(err);
} else if (x == null) {
return cb("blob is gone");
} else {
blob = x;
return cb(err);
}
}
});
},
(cb) => {
var e,
patches;
dbg("extract blob");
try {
patches = JSON.parse(blob);
} catch (error) {
e = error;
cb(`corrupt patches blob -- ${e}`);
return;
}
return this.import_patches({
patches: patches,
cb: cb
});
},
(cb) => {
return async.parallel([
(cb) => {
dbg("update syncstring to indicate that patches are now available");
return this._query({
query: "UPDATE syncstrings SET archived=NULL",
where: where,
cb: cb
});
},
(cb) => {
dbg('delete blob, which is no longer needed');
return this.delete_blob({
uuid: blob_uuid,
cb: cb
});
}
],
cb);
}
], (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
})
});
}
export_patches(opts) {
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
string_id: required,
cb: required // cb(err, array)
});
return this._query({
query: "SELECT extract(epoch from time)*1000 as epoch, * FROM patches",
where: {
"string_id = $::CHAR(40)": opts.string_id
},
cb: all_results((err, patches) => {
var j, len, p;
if (err) {
return opts.cb(err);
} else {
for (j = 0, len = patches.length; j < len; j++) {
p = patches[j];
p.time = new Date(p.epoch);
delete p.epoch;
}
return opts.cb(void 0, patches);
}
})
});
}
import_patches(opts) {
var f, insert_block_size, j, l, len, len1, patch, patches, ref, v, x;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
patches: required, // array as exported by export_patches
string_id: void 0, // if given, change the string_id when importing the patches to this
cb: void 0
});
patches = opts.patches;
if (patches.length === 0) { // easy
if (typeof opts.cb === "function") {
opts.cb();
}
return;
}
if (patches[0].id != null) {
// convert from OLD RethinkDB format!
v = [];
for (j = 0, len = patches.length; j < len; j++) {
x = patches[j];
patch = {
string_id: x.id[0],
time: new Date(x.id[1]),
user_id: x.user,
patch: x.patch,
snapshot: x.snapshot,
sent: x.sent,
prev: x.prev
};
v.push(patch);
}
patches = v;
}
// change string_id, if requested.
if (opts.string_id != null) {
for (l = 0, len1 = patches.length; l < len1; l++) {
x = patches[l];
x.string_id = opts.string_id;
}
}
// We break into blocks since there is limit (about 65K) on
// number of params that can be inserted in a single query.
insert_block_size = 1000;
f = (i, cb) => {
return this._query({
query: 'INSERT INTO patches',
values: patches.slice(insert_block_size * i, insert_block_size * (i + 1)),
conflict: 'ON CONFLICT DO NOTHING', // in case multiple servers (or this server) are doing this import at once -- this can and does happen sometimes.
cb: cb
});
};
return async.mapSeries((function() {
var results = [];
for (var o = 0, ref = patches.length / insert_block_size; 0 <= ref ? o < ref : o > ref; 0 <= ref ? o++ : o--){ results.push(o); }
return results;
}).apply(this), f, (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
}
delete_blob(opts) {
var dbg, gcloud;
boundMethodCheck(this, PostgreSQL);
opts = defaults(opts, {
uuid: required,
cb: void 0
});
if (!misc.is_valid_uuid_string(opts.uuid)) {
if (typeof opts.cb === "function") {
opts.cb("uuid is invalid");
}
return;
}
gcloud = void 0;
dbg = this._dbg(`delete_blob(uuid='${opts.uuid}')`);
return async.series([
(cb) => {
dbg("check if blob in gcloud");
return this._query({
query: "SELECT gcloud FROM blobs",
where: {
"id = $::UUID": opts.uuid
},
cb: one_result('gcloud',
(err,
x) => {
gcloud = x;
return cb(err);
})
});
},
(cb) => {
if (!gcloud || !COCALC_BLOB_STORE) {
cb();
return;
}
dbg("delete from gcloud");
return this.blob_store(gcloud).delete({
name: opts.uuid,
cb: cb
});
},
(cb) => {
dbg("delete from local database");
return this._query({
query: "DELETE FROM blobs",
where: {
"id = $::UUID": opts.uuid
},
cb: cb
});
}
], (err) => {
return typeof opts.cb === "function" ? opts.cb(err) : void 0;
});
}
};
};
}).call(this);
//# sourceMappingURL=postgres-blobs.js.map