@cocalc/project
Version:
CoCalc: project daemon
242 lines (215 loc) • 7.71 kB
text/typescript
/*
* This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
* License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
*/
/*
Jupyter's blob store (based on sqlite), which hooks into the raw http server.
*/
import { BlobStoreInterface } from "@cocalc/frontend/jupyter/project-interface";
import * as fs from "fs";
import { readFile } from "./async-utils-node";
import Logger from "@cocalc/backend/logger";
import { months_ago, to_json } from "@cocalc/util/misc";
const misc_node = require("@cocalc/backend/misc_node");
import Database from "better-sqlite3";
import { Router } from "express";
const winston = Logger("jupyter-blobs-sqlite");
import { get_ProjectStatusServer } from "@cocalc/project/project-status/server";
const JUPYTER_BLOBS_DB_FILE: string =
process.env.JUPYTER_BLOBS_DB_FILE ??
`${process.env.SMC_LOCAL_HUB_HOME ?? process.env.HOME}/.jupyter-blobs-v0.db`;
// TODO: are these the only base64 encoded types that jupyter kernels return?
const BASE64_TYPES = ["image/png", "image/jpeg", "application/pdf", "base64"];
export class BlobStore implements BlobStoreInterface {
private db: Database.Database;
private stmt_insert;
private stmt_update;
private stmt_get;
private stmt_data;
private stmt_ipynb;
private stmt_keys;
constructor() {
winston.debug("jupyter BlobStore: constructor");
try {
this.init();
winston.debug(`jupyter BlobStore: ${JUPYTER_BLOBS_DB_FILE} opened fine`);
} catch (err) {
winston.debug(
`jupyter BlobStore: ${JUPYTER_BLOBS_DB_FILE} open error - ${err}`
);
// File may be corrupt/broken/etc. -- in this case, remove and try again.
// This database is only an image *cache*, so this is fine.
// See https://github.com/sagemathinc/cocalc/issues/2766
// Using sync is also fine, since this only happens once
// during initialization.
winston.debug("jupyter BlobStore: resetting database cache");
try {
fs.unlinkSync(JUPYTER_BLOBS_DB_FILE);
} catch (error) {
err = error;
winston.debug(
`Error trying to delete ${JUPYTER_BLOBS_DB_FILE}... ignoring: `,
err
);
}
this.init();
}
}
init(): void {
if (JUPYTER_BLOBS_DB_FILE == "memory") {
// as any, because @types/better-sqlite3 is not yet updated to support this
// doc about the constructor: https://wchargin.com/better-sqlite3/api.html#new-databasepath-options
this.db = new Database(".db", { memory: true } as any);
} else {
this.db = new Database(JUPYTER_BLOBS_DB_FILE);
}
this.init_table();
this.init_statements(); // table must exist!
if (JUPYTER_BLOBS_DB_FILE !== "memory") {
this.clean(); // do this once on start
this.db.exec("VACUUM");
}
}
private init_table() {
this.db
.prepare(
"CREATE TABLE IF NOT EXISTS blobs (sha1 TEXT, data BLOB, type TEXT, ipynb TEXT, time INTEGER)"
)
.run();
}
private init_statements() {
this.stmt_insert = this.db.prepare(
"INSERT INTO blobs VALUES(?, ?, ?, ?, ?)"
);
this.stmt_update = this.db.prepare("UPDATE blobs SET time=? WHERE sha1=?");
this.stmt_get = this.db.prepare("SELECT * FROM blobs WHERE sha1=?");
this.stmt_data = this.db.prepare("SELECT data FROM blobs where sha1=?");
this.stmt_keys = this.db.prepare("SELECT sha1 FROM blobs");
this.stmt_ipynb = this.db.prepare(
"SELECT ipynb, type, data FROM blobs where sha1=?"
);
}
private clean(): void {
this.clean_old();
this.clean_filesize();
}
private clean_old() {
// Delete anything old...
// The main point of this blob store being in the db is to ensure that when the
// project restarts, then user saves an ipynb,
// that they do not loose any work. So a few weeks should be way more than enough.
// Note that TimeTravel may rely on these old blobs, so images in TimeTravel may
// stop working after this long. That's a tradeoff.
this.db
.prepare("DELETE FROM blobs WHERE time <= ?")
.run(months_ago(1).getTime());
}
private clean_filesize() {
// we also check for the actual filesize and in case, get rid of half of the old blobs
try {
const stats = fs.statSync(JUPYTER_BLOBS_DB_FILE);
const size_mb = stats.size / (1024 * 1024);
if (size_mb > 128) {
const cnt = this.db.prepare("SELECT COUNT(*) as cnt FROM blobs").get();
if (cnt?.cnt == null) return;
const n = Math.floor(cnt.cnt / 2);
winston.debug(
`jupyter BlobStore: large file of ${size_mb}MiB detected – deleting ${n} old rows.`
);
if (n == 0) return;
const when = this.db
.prepare("SELECT time FROM blobs ORDER BY time ASC LIMIT 1 OFFSET ?")
.get(n);
if (when?.time == null) return;
winston.debug(`jupyter BlobStore: delete starting from ${when.time}`);
this.db.prepare("DELETE FROM blobs WHERE time <= ?").run(when.time);
}
} catch (err) {
winston.debug(`jupyter BlobStore: clean_filesize error: ${err}`);
}
}
// used in testing
delete_all_blobs(): void {
this.db.prepare("DELETE FROM blobs").run();
}
// data could, e.g., be a uuencoded image
// We return the sha1 hash of it, and store it, along with a reference count.
// ipynb = (optional) text that is also stored and will be
// returned when get_ipynb is called
// This is used for some iframe support code.
save(data, type, ipynb?): string {
if (BASE64_TYPES.includes(type)) {
data = Buffer.from(data, "base64");
} else {
data = Buffer.from(data);
}
const sha1: string = misc_node.sha1(data);
const row = this.stmt_get.get(sha1);
if (row == null) {
this.stmt_insert.run([sha1, data, type, ipynb, Date.now()]);
} else {
this.stmt_update.run([Date.now(), sha1]);
}
return sha1;
}
// Read a file from disk and save it in the database.
// Returns the sha1 hash of the file.
async readFile(path: string, type: string): Promise<string> {
return await this.save(await readFile(path), type);
}
/*
free(sha1: string): void {
// instead, stuff gets freed 1 month after last save.
}
*/
// Return data with given sha1, or undefined if no such data.
get(sha1: string): undefined | Buffer {
const x = this.stmt_data.get(sha1);
if (x != null) {
return x.data;
}
}
get_ipynb(sha1: string): any {
const row = this.stmt_ipynb.get(sha1);
if (row == null) {
return;
}
if (row.ipynb != null) {
return row.ipynb;
}
if (BASE64_TYPES.includes(row.type)) {
return row.data.toString("base64");
} else {
return row.data.toString();
}
}
keys(): string[] {
return this.stmt_keys.all().map((x) => x.sha1);
}
express_router(base): Router {
const router = Router();
base += "blobs/";
router.get(base, (_, res) => {
res.send(to_json(this.keys()));
});
router.get(base + "*", (req, res) => {
const filename: string = req.path.slice(base.length);
const sha1: string = `${req.query.sha1}`;
res.type(filename);
res.send(this.get(sha1));
});
return router;
}
}
let blob_store: BlobStore | undefined = undefined;
export function get_blob_store() {
if (blob_store != null) return blob_store;
try {
blob_store = new BlobStore();
get_ProjectStatusServer().clearComponentAlert("BlobStore");
return blob_store;
} catch (err) {
get_ProjectStatusServer().setComponentAlert("BlobStore");
winston.warn(`unable to instantiate BlobStore -- ${err}`);
}
}