UNPKG

@naturalcycles/datastore-lib

Version:

Opinionated library to work with Google Datastore, implements CommonDB

512 lines (511 loc) 19.2 kB
import { Transform } from 'node:stream'; import { BaseCommonDB, commonDBFullSupport } from '@naturalcycles/db-lib'; import { _round } from '@naturalcycles/js-lib'; import { _chunk } from '@naturalcycles/js-lib/array/array.util.js'; import { _assert } from '@naturalcycles/js-lib/error/assert.js'; import { _errorDataAppend, TimeoutError } from '@naturalcycles/js-lib/error/error.util.js'; import { commonLoggerMinLevel } from '@naturalcycles/js-lib/log'; import { _omit } from '@naturalcycles/js-lib/object/object.util.js'; import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'; import { pRetry, pRetryFn } from '@naturalcycles/js-lib/promise/pRetry.js'; import { pTimeout } from '@naturalcycles/js-lib/promise/pTimeout.js'; import { boldWhite } from '@naturalcycles/nodejs-lib/colors'; import { DatastoreType } from './datastore.model.js'; import { DatastoreStreamReadable } from './DatastoreStreamReadable.js'; import { dbQueryToDatastoreQuery } from './query.util.js'; // Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc. const MAX_ITEMS = 500; // It's an empyrical value, but anything less than infinity is better than infinity const DATASTORE_RECOMMENDED_CONCURRENCY = 8; const RETRY_ON = [ 'GOAWAY', 'UNAVAILABLE', 'UNKNOWN', 'DEADLINE_EXCEEDED', 'ABORTED', 'much contention', 'try again', 'timeout', ].map(s => s.toLowerCase()); // Examples of errors: // UNKNOWN: Stream removed const DATASTORE_TIMEOUT = 'DATASTORE_TIMEOUT'; const methodMap = { insert: 'insert', update: 'update', upsert: 'save', }; /** * Datastore API: * https://googlecloudplatform.github.io/google-cloud-node/#/docs/datastore/1.0.3/datastore * https://cloud.google.com/datastore/docs/datastore-api-tutorial */ export class DatastoreDB extends BaseCommonDB { support = { ...commonDBFullSupport, patchByQuery: false, increment: false, }; constructor(cfg = {}) { super(); this.cfg = { logger: console, ...cfg, }; } cfg; cachedDatastore; /** * Datastore.KEY */ KEY; // @memo() // not used to be able to connect to many DBs in the same server instance async ds() { if (!this.cachedDatastore) { _assert(process.env['APP_ENV'] !== 'test', 'DatastoreDB cannot be used in Test env, please use InMemoryDB'); const DS = (await this.getDatastoreLib()).Datastore; this.cfg.projectId ||= this.cfg.credentials?.project_id || process.env['GOOGLE_CLOUD_PROJECT']; if (this.cfg.projectId) { this.cfg.logger.log(`DatastoreDB connected to ${boldWhite(this.cfg.projectId)}`); } else if (process.env['GOOGLE_APPLICATION_CREDENTIALS']) { this.cfg.logger.log(`DatastoreDB connected via GOOGLE_APPLICATION_CREDENTIALS`); } if (this.cfg.grpc) { this.cfg.logger.log('!!! DatastoreDB using custom grpc !!!'); } this.cachedDatastore = new DS(this.cfg); this.KEY = this.cachedDatastore.KEY; } return this.cachedDatastore; } async getPropertyFilter() { return (await this.getDatastoreLib()).PropertyFilter; } async getDatastoreLib() { // Lazy-loading const lib = await import('@google-cloud/datastore'); return lib; } async ping() { await this.getAllStats(); } async getByIds(table, ids, opt = {}) { if (!ids.length) return []; let ds = await this.ds(); const keys = ids.map(id => this.key(ds, table, id)); let rows; const dsOpt = this.getRunQueryOptions(opt); if (this.cfg.timeout) { // First try try { const r = await pTimeout(() => (opt.tx?.tx || ds).get(keys, dsOpt), { timeout: this.cfg.timeout, name: `datastore.getByIds(${table})`, }); rows = r[0]; } catch (err) { if (!(err instanceof TimeoutError)) { // Not a timeout error, re-throw throw err; } this.cfg.logger.log('datastore recreated on error'); // This is to debug "GCP Datastore Timeout issue" const datastoreLib = await this.getDatastoreLib(); const DS = datastoreLib.Datastore; ds = this.cachedDatastore = new DS(this.cfg); // Second try (will throw) try { const r = await pRetry(() => (opt.tx?.tx || ds).get(keys, dsOpt), { ...this.getPRetryOptions(`datastore.getByIds(${table}) second try`), maxAttempts: 3, timeout: this.cfg.timeout, }); rows = r[0]; } catch (err) { if (err instanceof TimeoutError) { _errorDataAppend(err, { fingerprint: [DATASTORE_TIMEOUT], }); } throw err; } } } else { rows = await pRetry(async () => { return (await ds.get(keys, dsOpt))[0]; }, this.getPRetryOptions(`datastore.getByIds(${table})`)); } return (rows .map(r => this.mapId(r)) // Seems like datastore .get() method doesn't return items properly sorted by input ids, so we gonna sort them here // same ids are not expected here .sort((a, b) => (a.id > b.id ? 1 : -1))); } // getQueryKind(q: Query): string { // if (!q?.kinds?.length) return '' // should never be the case, but // return q.kinds[0]! // } async runQuery(dbQuery, opt = {}) { const idFilter = dbQuery._filters.find(f => f.name === 'id'); if (idFilter) { const ids = idFilter.op === '==' ? [idFilter.val] : idFilter.val; return { rows: await this.getByIds(dbQuery.table, ids, opt), }; } const ds = await this.ds(); const q = dbQueryToDatastoreQuery(dbQuery, ds.createQuery(dbQuery.table), await this.getPropertyFilter()); const dsOpt = this.getRunQueryOptions(opt); const qr = await this.runDatastoreQuery(q, dsOpt); // Special case when projection query didn't specify 'id' if (dbQuery._selectedFieldNames && !dbQuery._selectedFieldNames.includes('id')) { qr.rows = qr.rows.map(r => _omit(r, ['id'])); } return qr; } async runQueryCount(dbQuery, opt = {}) { const ds = await this.ds(); const q = dbQueryToDatastoreQuery(dbQuery.select([]), ds.createQuery(dbQuery.table), await this.getPropertyFilter()); const aq = ds.createAggregationQuery(q).count('count'); const dsOpt = this.getRunQueryOptions(opt); const [entities] = await ds.runAggregationQuery(aq, dsOpt); return entities[0]?.count; } async runDatastoreQuery(q, dsOpt) { const ds = await this.ds(); const [entities, queryResult] = await ds.runQuery(q, dsOpt); const rows = entities.map(e => this.mapId(e)); return { ...queryResult, rows, }; } streamQuery(dbQuery, _opt) { const transform = new Transform({ objectMode: true, transform: (chunk, _, cb) => { cb(null, this.mapId(chunk)); }, }); void this.ds().then(async (ds) => { const q = dbQueryToDatastoreQuery(dbQuery, ds.createQuery(dbQuery.table), await this.getPropertyFilter()); const opt = { ...this.cfg.streamOptions, ..._opt, }; (opt.experimentalCursorStream ? new DatastoreStreamReadable(q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn')) : ds.runQueryStream(q, this.getRunQueryOptions(opt))) .on('error', err => transform.emit('error', err)) .pipe(transform); }); return transform; } // https://github.com/GoogleCloudPlatform/nodejs-getting-started/blob/master/2-structured-data/books/model-datastore.js /** * Returns saved entities with generated id/updated/created (non-mutating!) */ async saveBatch(table, rows, opt = {}) { const ds = await this.ds(); const entities = rows.map(obj => this.toDatastoreEntity(ds, table, obj, opt.excludeFromIndexes)); const method = methodMap[opt.saveMethod || 'upsert'] || 'save'; const save = pRetryFn(async (batch) => { await (opt.tx?.tx || ds)[method](batch); }, this.getPRetryOptions(`DatastoreLib.saveBatch(${table})`)); try { const chunks = _chunk(entities, MAX_ITEMS); if (chunks.length === 1) { // Not using pMap in hope to preserve stack trace await save(chunks[0]); } else { await pMap(chunks, async (batch) => await save(batch), { concurrency: DATASTORE_RECOMMENDED_CONCURRENCY, }); } } catch (err) { if (err instanceof TimeoutError) { _errorDataAppend(err, { fingerprint: [DATASTORE_TIMEOUT], }); } // console.log(`datastore.save ${kind}`, { obj, entity }) this.cfg.logger.error(`error in DatastoreLib.saveBatch for ${table} (${rows.length} rows)`, err); throw err; } } async deleteByQuery(q, opt = {}) { const idFilter = q._filters.find(f => f.name === 'id'); if (idFilter) { const ids = idFilter.op === '==' ? [idFilter.val] : idFilter.val; return await this.deleteByIds(q.table, ids, opt); } const ds = await this.ds(); const datastoreQuery = dbQueryToDatastoreQuery(q.select([]), ds.createQuery(q.table), await this.getPropertyFilter()); const dsOpt = this.getRunQueryOptions(opt); const { rows } = await this.runDatastoreQuery(datastoreQuery, dsOpt); return await this.deleteByIds(q.table, rows.map(obj => obj.id), opt); } /** * Limitation: Datastore's delete returns void, so we always return all ids here as "deleted" * regardless if they were actually deleted or not. */ async deleteByIds(table, ids, opt = {}) { const ds = await this.ds(); const keys = ids.map(id => this.key(ds, table, id)); const retryOptions = this.getPRetryOptions(`DatastoreLib.deleteByIds(${table})`); await pMap(_chunk(keys, MAX_ITEMS), // async batch => await doDelete(batch), async (batchOfKeys) => { await pRetry(async () => { await (opt.tx?.tx || ds).delete(batchOfKeys); }, retryOptions); }, { concurrency: DATASTORE_RECOMMENDED_CONCURRENCY, }); return ids.length; } async createTransaction(opt = {}) { const ds = await this.ds(); const { readOnly } = opt; const datastoreTx = ds.transaction({ readOnly, }); await datastoreTx.run(); return new DatastoreDBTransaction(this, datastoreTx); } async runInTransaction(fn, opt = {}) { const ds = await this.ds(); const { readOnly } = opt; const datastoreTx = ds.transaction({ readOnly, }); try { await datastoreTx.run(); const tx = new DatastoreDBTransaction(this, datastoreTx); await fn(tx); await datastoreTx.commit(); } catch (err) { await this.rollback(datastoreTx); throw err; } } async getAllStats() { const ds = await this.ds(); const q = ds.createQuery('__Stat_Kind__'); const [statsArray] = await ds.runQuery(q); return statsArray || []; } /** * Returns undefined e.g when Table is non-existing */ async getStats(table) { const ds = await this.ds(); const propertyFilter = await this.getPropertyFilter(); const q = ds .createQuery('__Stat_Kind__') // .filter('kind_name', table) .filter(new propertyFilter('kind_name', '=', table)) .limit(1); const [statsArray] = await ds.runQuery(q); const [stats] = statsArray; return stats; } async getStatsCount(table) { const stats = await this.getStats(table); return stats?.count; } async getTableProperties(table) { const ds = await this.ds(); const q = ds .createQuery('__Stat_PropertyType_PropertyName_Kind__') // .filter('kind_name', table) .filter(new (await this.getPropertyFilter())('kind_name', '=', table)); const [stats] = await ds.runQuery(q); return stats; } mapId(o, preserveKey = false) { if (!o) return o; const r = { ...o, id: this.getKey(this.getDsKey(o)), }; if (!preserveKey) delete r[this.KEY]; return r; } // if key field exists on entity, it will be used as key (prevent to duplication of numeric keyed entities) toDatastoreEntity(ds, kind, o, excludeFromIndexes = []) { const key = this.getDsKey(o) || this.key(ds, kind, o.id); const data = Object.assign({}, o); delete data.id; delete data[this.KEY]; return { key, data, excludeFromIndexes, }; } key(ds, kind, id) { _assert(id, `Cannot save "${kind}" entity without "id"`); return ds.key([kind, id]); } getDsKey(o) { return o?.[this.KEY]; } getKey(key) { const id = key.id || key.name; return id?.toString(); } async createTable(_table, _schema) { } async getTables() { const statsArray = await this.getAllStats(); // Filter out tables starting with `_` by default (internal Datastore tables) return statsArray.map(stats => stats.kind_name).filter(table => table && !table.startsWith('_')); } async getTableSchema(table) { const stats = await this.getTableProperties(table); const s = { $id: `${table}.schema.json`, type: 'object', properties: { id: { type: 'string' }, }, additionalProperties: true, required: [], }; stats .filter(s => !s.property_name.includes('.') && s.property_name !== 'id') // filter out objectify's "virtual properties" .forEach(stats => { const { property_type: dtype } = stats; const name = stats.property_name; if (dtype === DatastoreType.Blob) { s.properties[name] = { instanceof: 'Buffer', }; } else if (dtype === DatastoreType.Text || dtype === DatastoreType.String) { s.properties[name] = { type: 'string', }; } else if (dtype === DatastoreType.EmbeddedEntity) { s.properties[name] = { type: 'object', additionalProperties: true, properties: {}, required: [], }; } else if (dtype === DatastoreType.Integer) { s.properties[name] = { type: 'integer', }; } else if (dtype === DatastoreType.Float) { s.properties[name] = { type: 'number', }; } else if (dtype === DatastoreType.Boolean) { s.properties[name] = { type: 'boolean', }; } else if (dtype === DatastoreType.DATE_TIME) { // Don't know how to map it properly s.properties[name] = {}; } else if (dtype === DatastoreType.NULL) { // check, maybe we can just skip this type and do nothing? s.properties[name] ||= { type: 'null', }; } else { throw new Error(`Unknown Datastore Type '${stats.property_type}' for ${table}.${name}`); } }); return s; } getPRetryOptions(name) { return { predicate: err => RETRY_ON.some(s => err?.message?.toLowerCase()?.includes(s)), name, timeout: 20_000, maxAttempts: 5, delay: 5000, delayMultiplier: 1.5, logFirstAttempt: false, logFailures: true, // logAll: true, logger: this.cfg.logger, // not appending fingerprint here, otherwise it would just group all kinds of errors, not just Timeout errors // errorData: { // fingerprint: [DATASTORE_TIMEOUT], // }, }; } /** * Silently rollback the transaction. * It may happen that transaction is already committed/rolled back, so we don't want to throw an error here. */ async rollback(datastoreTx) { try { await datastoreTx.rollback(); } catch (err) { // log the error, but don't re-throw, as this should be a graceful rollback this.cfg.logger.error(err); } } getRunQueryOptions(opt) { if (!opt.readAt) return {}; return { // Datastore expects UnixTimestamp in milliseconds // Datastore requires the timestamp to be rounded to the whole minutes readTime: _round(opt.readAt, 60) * 1000, }; } } /** * https://cloud.google.com/datastore/docs/concepts/transactions#datastore-datastore-transactional-update-nodejs */ export class DatastoreDBTransaction { db; tx; constructor(db, tx) { this.db = db; this.tx = tx; } async commit() { await this.tx.commit(); } async rollback() { try { await this.tx.rollback(); } catch (err) { // log the error, but don't re-throw, as this should be a graceful rollback this.db.cfg.logger.error(err); } } async getByIds(table, ids, opt) { return await this.db.getByIds(table, ids, { ...opt, tx: this }); } async saveBatch(table, rows, opt) { await this.db.saveBatch(table, rows, { ...opt, tx: this }); } async deleteByIds(table, ids, opt) { return await this.db.deleteByIds(table, ids, { ...opt, tx: this }); } }