UNPKG

@darlean/fs-persistence-suite

Version:

File System Persistence Suite that uses a physical or shared file system to persist data.

410 lines (409 loc) 18.1 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.FsPersistenceWorker = void 0; const fs_1 = __importDefault(require("fs")); const utils_1 = require("@darlean/utils"); const filtering_1 = require("./filtering"); const worker_1 = require("threads/worker"); const sqlite_sync_1 = require("./sqlite-sync"); const TABLE = 'data'; const INDEX_SRC = 'srcidx'; const FIELD_PK = 'pk'; const FIELD_SK = 'sk'; const FIELD_VALUE = 'value'; const FIELD_SOURCE_NAME = 'sourcename'; const FIELD_SOURCE_SEQ = 'sourceseq'; const FIELD_VERSION = 'version'; const SOURCE = 'source'; const MAX_RESPONSE_LENGTH = 500 * 1000; // IMPORTANT: When you change the encoding, also ensure that the maxOutReadableEncodedString function // is adjusted accordingly!!! const decode = utils_1.decodeKeyReadable; const encode = utils_1.encodeKeyReadable; function maxOutReadableEncodedString(value) { // Because the readable encoding prefixes every character with a '.' or uses '--' for a separator, it // is safe to use 'Z' here at the position of a '.' or '-' for maxing out. return value + 'Z'; } class FsPersistenceWorker { constructor(time, filterer, deser) { this.lastSeqNr = 0; this.keyWhere = this.makeKeyWhere(); this.filterer = filterer; this.deser = deser; } load(options) { const pool = this.connection?.poolLoad; if (!pool) { throw new Error('No statement pool'); } const values = this.makeKeyValues(options.partitionKey, options.sortKey); const statement = pool.obtain(); try { const result = statement.get(values); if (result) { const buffer = result.value; const projection = options.projectionFilter ? (0, utils_1.parseMultiFilter)(options.projectionFilter) : undefined; const value = projection ? this.project(projection, buffer, options.projectionBases ?? []) : buffer; return { value: value ? new Blob([value]) : undefined, version: result[FIELD_VERSION] }; } return {}; } finally { statement.release(); } } query(options) { const direction = options.sortKeyOrder ?? 'ascending'; const requiresContent = options.filterExpression || isContentFilter(options.projectionFilter); const pool = direction === 'descending' ? requiresContent ? this.connection?.poolQueryDesc : this.connection?.poolQueryDescNoContents : requiresContent ? this.connection?.poolQueryAsc : this.connection?.poolQueryAscNoContents; if (!pool) { throw new Error('No statement pool'); } const sortKeyFromString = options.sortKeyFrom ? encode(options.sortKeyFrom) : null; const sortKeyToString = options.sortKeyTo ? maxOutReadableEncodedString(encode([...options.sortKeyTo, ...(options.sortKeyToMatch === 'loose' ? [] : [''])])) : null; let limiter = options.sortKeyOrder === 'descending' ? maxOutReadableEncodedString('') : ''; if (options.continuationToken) { const ct = JSON.parse(Buffer.from(options.continuationToken, 'base64').toString()); limiter = ct.sk; } const result = { items: [] }; // When we have a filter expression, do not set a hard limit, but iterate over entire result set until we // have maxItems *filtered* results. const limit = options.maxItems && !options.filterExpression ? options.maxItems : -1; const values = [ encode(options.partitionKey), sortKeyFromString, sortKeyFromString, sortKeyToString, sortKeyToString, limiter, limit ]; const statement = pool.obtain(); try { const projection = options.projectionFilter ? (0, utils_1.parseMultiFilter)(options.projectionFilter) : undefined; let length = 0; let lastSK; let nrows = 0; for (const row of statement.iterate(values)) { nrows++; const data = row; if (!requiresContent || !options.filterExpression || this.filter(data, options.filterExpression, options.filterFieldBase, options.filterPartitionKeyOffset, options.filterSortKeyOffset)) { const deserFields = options.filterFieldBase ? [options.filterFieldBase] : []; const value = requiresContent ? projection ? this.project(projection, data[FIELD_VALUE], deserFields) : data[FIELD_VALUE] : undefined; length += value?.length ?? 0; if (length > MAX_RESPONSE_LENGTH) { if (result.items.length === 0) { throw new Error('Data too large'); } const ct = { sk: lastSK ?? '' }; const ctEncoded = Buffer.from(JSON.stringify(ct)).toString('base64'); result.continuationToken = ctEncoded; return result; } result.items.push({ sortKey: decode(data.sk ?? ''), value: value ? new Blob([value]) : undefined }); lastSK = data.sk; } } const canHaveMore = options.maxItems !== undefined && nrows >= options.maxItems; if (canHaveMore) { const ct = { sk: lastSK ?? '' }; const ctEncoded = Buffer.from(JSON.stringify(ct)).toString('base64'); result.continuationToken = ctEncoded; } return result; } finally { statement.release(); } } async storeBatch(options) { this.connection?.db.run('BEGIN TRANSACTION'); try { for (const item of options.items) { const version = item.version; if (item.value === undefined) { // Delete record const pool = this.connection?.poolDelete; if (!pool) { throw new Error('No statement pool'); } const values = [...this.makeKeyValues(item.partitionKey, item.sortKey), version]; const statement = pool.obtain(); try { statement.run(values); } finally { statement.release(); } } else { // Upsert record const pool = this.connection?.poolStore; if (!pool) { throw new Error('No statement pool'); } const seqnr = this.lastSeqNr + 1; this.lastSeqNr = seqnr; const value = Buffer.from(await item.value.arrayBuffer()); const values = this.makeKeyValues(item.partitionKey, item.sortKey); values.push(value); values.push(SOURCE); values.push(seqnr); values.push(version); values.push(value); values.push(SOURCE); values.push(seqnr); values.push(version); values.push(version); const statement = pool.obtain(); try { statement.run(values); } finally { statement.release(); } } } } finally { this.connection?.db.run('COMMIT TRANSACTION'); } } project(config, data, deserFields) { if (!data) { return undefined; } const parsed = this.deser.deserialize(data); if (!parsed) { return undefined; } for (const field of deserFields) { const value = parsed[field]; if (value && Buffer.isBuffer(value)) { parsed[field] = this.deser.deserialize(value); } } const filtered = (0, utils_1.filterStructure)(config, parsed, ''); return this.deser.serialize(filtered); } filter(data, filter, base, pkOffset, skOffset) { let data2; let pkey2; let skey2; const context = { data: () => { if (data2) { return data2; } if (data[FIELD_VALUE]) { const d = this.deser.deserialize(data[FIELD_VALUE]); if (base) { let dBase = d?.[base]; if (Buffer.isBuffer(dBase)) { dBase = this.deser.deserialize(dBase); } const d2 = dBase ?? {}; data2 = d2; return d2; } data2 = d; return d; } else { data2 = {}; return data2; } }, sortKey: (idx) => { const jdx = (skOffset ?? 0) + idx; if (skey2) { return skey2[jdx]; } const k = (data[FIELD_SK] ? decode(data[FIELD_SK]) : []) ?? []; skey2 = k; return k[jdx]; }, partitionKey: (idx) => { const jdx = (pkOffset ?? 0) + idx; if (pkey2) { return pkey2[jdx]; } const k = (data[FIELD_PK] ? decode(data[FIELD_PK]) : []) ?? []; pkey2 = k; return k[jdx]; } }; const value = this.filterer.process(context, filter); return this.filterer.isTruthy(value); } openDatabase(basePath, mode) { const filepath = basePath; if (!fs_1.default.existsSync(filepath)) { if (mode === 'writable') { fs_1.default.mkdirSync(filepath, { recursive: true }); } else { throw new Error('Path does not exist'); } } const filename = [filepath, 'store.db'].join('/'); const db = new sqlite_sync_1.SqliteDatabase(); db.open(filename, mode !== 'writable'); // Only exclusive mode makes it possible to use the faster WAL without having a shared lock // (which we do not have, as the nodes run on different machines). // Even though we access SQLite from multiple threads within 1 process, SQLIte still gives locked errors // for the reader threads when we enable exclusive locking. //db.run('PRAGMA locking_mode=EXCLUSIVE;'); // Enable the (faster) WAL mode db.run('PRAGMA journal_mode=WAL;'); // Never makes the database corrupt, but on a power failure at just the wrong time, some commits may be lost. // We take the risk -- and when we deploy with redundancy > 1, one of the other instances will still have // the lost commits, so after sync, they will be there again. db.run('PRAGMA synchronous=NORMAL;'); const connection = { db, lock: mode === 'writable' ? new utils_1.SharedExclusiveLock('exclusive') : undefined }; this.connection = connection; if (mode === 'writable') { const MAX_SEQ = 'MaxSeq'; db.run(`CREATE TABLE IF NOT EXISTS ${TABLE} (${FIELD_PK} TEXT, ${FIELD_SK} TEXT, ${FIELD_VALUE} BLOB, ${FIELD_SOURCE_NAME} TEXT, ${FIELD_SOURCE_SEQ} NUMBER, ${FIELD_VERSION} TEXT, PRIMARY KEY (${FIELD_PK}, ${FIELD_SK}))`); db.run(`CREATE UNIQUE INDEX IF NOT EXISTS ${INDEX_SRC} ON ${TABLE} (${FIELD_SOURCE_NAME}, ${FIELD_SOURCE_SEQ})`); const seqnrPool = db.prepare(`SELECT MAX(${FIELD_SOURCE_SEQ}) AS ${MAX_SEQ} FROM ${TABLE} WHERE ${FIELD_SOURCE_NAME}=?`); try { const query = seqnrPool.obtain(); try { const result = query.get(SOURCE); this.lastSeqNr = result[MAX_SEQ] === null ? 0 : result[MAX_SEQ]; } finally { query.release(); } } finally { seqnrPool.finalize(); } connection.poolStore = this.makeStorePool(db); connection.poolDelete = this.makeDeletePool(db); } connection.poolLoad = this.makeLoadPool(db); connection.poolQueryAsc = this.makeQueryPoolAsc(db); connection.poolQueryDesc = this.makeQueryPoolDesc(db); connection.poolQueryAscNoContents = this.makeQueryPoolAsc(db, [FIELD_PK, FIELD_SK]); connection.poolQueryDescNoContents = this.makeQueryPoolDesc(db, [FIELD_PK, FIELD_SK]); } makeLoadPool(db) { // Note: SQLite does not enforce uniqueness of the (compound) primary key when using // NULL values for some of the fields. So, we use the number 0 for "not present" // (numbers are not used in key fields; the string "0" if different from number 0). return db.prepare(`SELECT * FROM ${TABLE} WHERE (${this.keyWhere})`); } makeQueryPoolAsc(db, fields) { // The question marks stand for // - Partition key exact value // - Lower value of sort key // - Same as previous field (must be provided twice because better-sqlite does not support using numeric placeholders to reuse the same value) // - Upper value of sort key // - Same as previous field (must be provided twice because better-sqlite does not support using numeric placeholders to reuse the same value) // - Paging value that is used to continue a previous query // - Limit (or negative number to do not use a limit) const fieldNames = fields ? fields.join(',') : '*'; return db.prepare(`SELECT ${fieldNames} FROM ${TABLE} WHERE (${FIELD_PK}=?) AND (? IS NULL OR (${FIELD_SK} >=?)) AND (? IS NULL OR (${FIELD_SK} <=?)) AND (${FIELD_SK} > ?) ORDER BY ${FIELD_SK} ASC LIMIT ?`); } makeQueryPoolDesc(db, fields) { const fieldNames = fields ? fields.join(',') : '*'; return db.prepare(`SELECT ${fieldNames} FROM ${TABLE} WHERE (${FIELD_PK}=?) AND (? IS NULL OR (${FIELD_SK} >=?)) AND (? IS NULL OR (${FIELD_SK} <=?)) AND (${FIELD_SK} < ?) ORDER BY ${FIELD_SK} DESC LIMIT ?`); } makeStorePool(db) { const placeholders = new Array(6).fill('?'); return db.prepare(`INSERT INTO ${TABLE} (${FIELD_PK}, ${FIELD_SK}, ${FIELD_VALUE}, ${FIELD_SOURCE_NAME}, ${FIELD_SOURCE_SEQ}, ${FIELD_VERSION}) ` + `VALUES (${placeholders.join(',')}) ` + `ON CONFLICT DO UPDATE SET ${FIELD_VALUE}=?, ${FIELD_SOURCE_NAME}=?, ${FIELD_SOURCE_SEQ}=?,${FIELD_VERSION}=? ` + `WHERE ${FIELD_VERSION} < ?`); } makeDeletePool(db) { return db.prepare(`DELETE FROM ${TABLE} WHERE (${this.keyWhere}) AND ${FIELD_VERSION} < ?`); } makeKeyWhere() { return `${FIELD_PK}=? AND ${FIELD_SK}=?`; } makeKeyValues(partitionKey, sortKey) { const pk = encode(partitionKey); const sk = encode(sortKey ?? []); return [pk, sk]; } async closeDatabase() { this.connection?.poolDelete?.finalize(); this.connection?.poolLoad?.finalize(); this.connection?.poolQueryAsc?.finalize(); this.connection?.poolQueryDesc?.finalize(); this.connection?.poolQueryAscNoContents?.finalize(); this.connection?.poolQueryDescNoContents?.finalize(); this.connection?.poolStore?.finalize(); this.connection?.db.close(); } } exports.FsPersistenceWorker = FsPersistenceWorker; function isContentFilter(filter) { if (filter === undefined) { return true; } if (filter.length !== 1) { return true; } return filter[0] !== '-*'; } const worker = new FsPersistenceWorker(new utils_1.Time(), new filtering_1.Filterer(), new utils_1.MultiDeSer()); /* export interface IFsPersistenceWorker { open(basePath: string, mode: 'readonly' | 'writable'): Promise<void>; close(): Promise<void>; load(options: IPersistenceLoadOptions): Promise<IPersistenceLoadResult>; query<T>(options: IPersistenceQueryOptions): Promise<IPersistenceQueryResult<T>>; storeBatch(options: IPersistenceStoreBatchOptions): Promise<IPersistenceStoreBatchResult>; }*/ const workerdef = { open: (basePath, mode) => { return worker.openDatabase(basePath, mode); }, close: () => { return worker.closeDatabase(); }, load: (options) => { return worker.load(options); }, query: (options) => { return worker.query(options); }, storeBatch: (options) => { return worker.storeBatch(options); } }; (0, worker_1.expose)(workerdef);