@naturalcycles/datastore-lib
Version:
Opinionated library to work with Google Datastore, implements CommonDB
512 lines (511 loc) • 19.2 kB
JavaScript
import { Transform } from 'node:stream';
import { BaseCommonDB, commonDBFullSupport } from '@naturalcycles/db-lib';
import { _round } from '@naturalcycles/js-lib';
import { _chunk } from '@naturalcycles/js-lib/array/array.util.js';
import { _assert } from '@naturalcycles/js-lib/error/assert.js';
import { _errorDataAppend, TimeoutError } from '@naturalcycles/js-lib/error/error.util.js';
import { commonLoggerMinLevel } from '@naturalcycles/js-lib/log';
import { _omit } from '@naturalcycles/js-lib/object/object.util.js';
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
import { pRetry, pRetryFn } from '@naturalcycles/js-lib/promise/pRetry.js';
import { pTimeout } from '@naturalcycles/js-lib/promise/pTimeout.js';
import { boldWhite } from '@naturalcycles/nodejs-lib/colors';
import { DatastoreType } from './datastore.model.js';
import { DatastoreStreamReadable } from './DatastoreStreamReadable.js';
import { dbQueryToDatastoreQuery } from './query.util.js';
// Datastore (also Firestore and other Google APIs) supports max 500 of items when saving/deleting, etc.
const MAX_ITEMS = 500;
// It's an empyrical value, but anything less than infinity is better than infinity
const DATASTORE_RECOMMENDED_CONCURRENCY = 8;
const RETRY_ON = [
'GOAWAY',
'UNAVAILABLE',
'UNKNOWN',
'DEADLINE_EXCEEDED',
'ABORTED',
'much contention',
'try again',
'timeout',
].map(s => s.toLowerCase());
// Examples of errors:
// UNKNOWN: Stream removed
const DATASTORE_TIMEOUT = 'DATASTORE_TIMEOUT';
const methodMap = {
insert: 'insert',
update: 'update',
upsert: 'save',
};
/**
* Datastore API:
* https://googlecloudplatform.github.io/google-cloud-node/#/docs/datastore/1.0.3/datastore
* https://cloud.google.com/datastore/docs/datastore-api-tutorial
*/
export class DatastoreDB extends BaseCommonDB {
support = {
...commonDBFullSupport,
patchByQuery: false,
increment: false,
};
constructor(cfg = {}) {
super();
this.cfg = {
logger: console,
...cfg,
};
}
cfg;
cachedDatastore;
/**
* Datastore.KEY
*/
KEY;
// @memo() // not used to be able to connect to many DBs in the same server instance
async ds() {
if (!this.cachedDatastore) {
_assert(process.env['APP_ENV'] !== 'test', 'DatastoreDB cannot be used in Test env, please use InMemoryDB');
const DS = (await this.getDatastoreLib()).Datastore;
this.cfg.projectId ||= this.cfg.credentials?.project_id || process.env['GOOGLE_CLOUD_PROJECT'];
if (this.cfg.projectId) {
this.cfg.logger.log(`DatastoreDB connected to ${boldWhite(this.cfg.projectId)}`);
}
else if (process.env['GOOGLE_APPLICATION_CREDENTIALS']) {
this.cfg.logger.log(`DatastoreDB connected via GOOGLE_APPLICATION_CREDENTIALS`);
}
if (this.cfg.grpc) {
this.cfg.logger.log('!!! DatastoreDB using custom grpc !!!');
}
this.cachedDatastore = new DS(this.cfg);
this.KEY = this.cachedDatastore.KEY;
}
return this.cachedDatastore;
}
async getPropertyFilter() {
return (await this.getDatastoreLib()).PropertyFilter;
}
async getDatastoreLib() {
// Lazy-loading
const lib = await import('@google-cloud/datastore');
return lib;
}
async ping() {
await this.getAllStats();
}
async getByIds(table, ids, opt = {}) {
if (!ids.length)
return [];
let ds = await this.ds();
const keys = ids.map(id => this.key(ds, table, id));
let rows;
const dsOpt = this.getRunQueryOptions(opt);
if (this.cfg.timeout) {
// First try
try {
const r = await pTimeout(() => (opt.tx?.tx || ds).get(keys, dsOpt), {
timeout: this.cfg.timeout,
name: `datastore.getByIds(${table})`,
});
rows = r[0];
}
catch (err) {
if (!(err instanceof TimeoutError)) {
// Not a timeout error, re-throw
throw err;
}
this.cfg.logger.log('datastore recreated on error');
// This is to debug "GCP Datastore Timeout issue"
const datastoreLib = await this.getDatastoreLib();
const DS = datastoreLib.Datastore;
ds = this.cachedDatastore = new DS(this.cfg);
// Second try (will throw)
try {
const r = await pRetry(() => (opt.tx?.tx || ds).get(keys, dsOpt), {
...this.getPRetryOptions(`datastore.getByIds(${table}) second try`),
maxAttempts: 3,
timeout: this.cfg.timeout,
});
rows = r[0];
}
catch (err) {
if (err instanceof TimeoutError) {
_errorDataAppend(err, {
fingerprint: [DATASTORE_TIMEOUT],
});
}
throw err;
}
}
}
else {
rows = await pRetry(async () => {
return (await ds.get(keys, dsOpt))[0];
}, this.getPRetryOptions(`datastore.getByIds(${table})`));
}
return (rows
.map(r => this.mapId(r))
// Seems like datastore .get() method doesn't return items properly sorted by input ids, so we gonna sort them here
// same ids are not expected here
.sort((a, b) => (a.id > b.id ? 1 : -1)));
}
// getQueryKind(q: Query): string {
// if (!q?.kinds?.length) return '' // should never be the case, but
// return q.kinds[0]!
// }
async runQuery(dbQuery, opt = {}) {
const idFilter = dbQuery._filters.find(f => f.name === 'id');
if (idFilter) {
const ids = idFilter.op === '==' ? [idFilter.val] : idFilter.val;
return {
rows: await this.getByIds(dbQuery.table, ids, opt),
};
}
const ds = await this.ds();
const q = dbQueryToDatastoreQuery(dbQuery, ds.createQuery(dbQuery.table), await this.getPropertyFilter());
const dsOpt = this.getRunQueryOptions(opt);
const qr = await this.runDatastoreQuery(q, dsOpt);
// Special case when projection query didn't specify 'id'
if (dbQuery._selectedFieldNames && !dbQuery._selectedFieldNames.includes('id')) {
qr.rows = qr.rows.map(r => _omit(r, ['id']));
}
return qr;
}
async runQueryCount(dbQuery, opt = {}) {
const ds = await this.ds();
const q = dbQueryToDatastoreQuery(dbQuery.select([]), ds.createQuery(dbQuery.table), await this.getPropertyFilter());
const aq = ds.createAggregationQuery(q).count('count');
const dsOpt = this.getRunQueryOptions(opt);
const [entities] = await ds.runAggregationQuery(aq, dsOpt);
return entities[0]?.count;
}
async runDatastoreQuery(q, dsOpt) {
const ds = await this.ds();
const [entities, queryResult] = await ds.runQuery(q, dsOpt);
const rows = entities.map(e => this.mapId(e));
return {
...queryResult,
rows,
};
}
streamQuery(dbQuery, _opt) {
const transform = new Transform({
objectMode: true,
transform: (chunk, _, cb) => {
cb(null, this.mapId(chunk));
},
});
void this.ds().then(async (ds) => {
const q = dbQueryToDatastoreQuery(dbQuery, ds.createQuery(dbQuery.table), await this.getPropertyFilter());
const opt = {
...this.cfg.streamOptions,
..._opt,
};
(opt.experimentalCursorStream
? new DatastoreStreamReadable(q, opt, commonLoggerMinLevel(this.cfg.logger, opt.debug ? 'log' : 'warn'))
: ds.runQueryStream(q, this.getRunQueryOptions(opt)))
.on('error', err => transform.emit('error', err))
.pipe(transform);
});
return transform;
}
// https://github.com/GoogleCloudPlatform/nodejs-getting-started/blob/master/2-structured-data/books/model-datastore.js
/**
* Returns saved entities with generated id/updated/created (non-mutating!)
*/
async saveBatch(table, rows, opt = {}) {
const ds = await this.ds();
const entities = rows.map(obj => this.toDatastoreEntity(ds, table, obj, opt.excludeFromIndexes));
const method = methodMap[opt.saveMethod || 'upsert'] || 'save';
const save = pRetryFn(async (batch) => {
await (opt.tx?.tx || ds)[method](batch);
}, this.getPRetryOptions(`DatastoreLib.saveBatch(${table})`));
try {
const chunks = _chunk(entities, MAX_ITEMS);
if (chunks.length === 1) {
// Not using pMap in hope to preserve stack trace
await save(chunks[0]);
}
else {
await pMap(chunks, async (batch) => await save(batch), {
concurrency: DATASTORE_RECOMMENDED_CONCURRENCY,
});
}
}
catch (err) {
if (err instanceof TimeoutError) {
_errorDataAppend(err, {
fingerprint: [DATASTORE_TIMEOUT],
});
}
// console.log(`datastore.save ${kind}`, { obj, entity })
this.cfg.logger.error(`error in DatastoreLib.saveBatch for ${table} (${rows.length} rows)`, err);
throw err;
}
}
async deleteByQuery(q, opt = {}) {
const idFilter = q._filters.find(f => f.name === 'id');
if (idFilter) {
const ids = idFilter.op === '==' ? [idFilter.val] : idFilter.val;
return await this.deleteByIds(q.table, ids, opt);
}
const ds = await this.ds();
const datastoreQuery = dbQueryToDatastoreQuery(q.select([]), ds.createQuery(q.table), await this.getPropertyFilter());
const dsOpt = this.getRunQueryOptions(opt);
const { rows } = await this.runDatastoreQuery(datastoreQuery, dsOpt);
return await this.deleteByIds(q.table, rows.map(obj => obj.id), opt);
}
/**
* Limitation: Datastore's delete returns void, so we always return all ids here as "deleted"
* regardless if they were actually deleted or not.
*/
async deleteByIds(table, ids, opt = {}) {
const ds = await this.ds();
const keys = ids.map(id => this.key(ds, table, id));
const retryOptions = this.getPRetryOptions(`DatastoreLib.deleteByIds(${table})`);
await pMap(_chunk(keys, MAX_ITEMS),
// async batch => await doDelete(batch),
async (batchOfKeys) => {
await pRetry(async () => {
await (opt.tx?.tx || ds).delete(batchOfKeys);
}, retryOptions);
}, {
concurrency: DATASTORE_RECOMMENDED_CONCURRENCY,
});
return ids.length;
}
async createTransaction(opt = {}) {
const ds = await this.ds();
const { readOnly } = opt;
const datastoreTx = ds.transaction({
readOnly,
});
await datastoreTx.run();
return new DatastoreDBTransaction(this, datastoreTx);
}
async runInTransaction(fn, opt = {}) {
const ds = await this.ds();
const { readOnly } = opt;
const datastoreTx = ds.transaction({
readOnly,
});
try {
await datastoreTx.run();
const tx = new DatastoreDBTransaction(this, datastoreTx);
await fn(tx);
await datastoreTx.commit();
}
catch (err) {
await this.rollback(datastoreTx);
throw err;
}
}
async getAllStats() {
const ds = await this.ds();
const q = ds.createQuery('__Stat_Kind__');
const [statsArray] = await ds.runQuery(q);
return statsArray || [];
}
/**
* Returns undefined e.g when Table is non-existing
*/
async getStats(table) {
const ds = await this.ds();
const propertyFilter = await this.getPropertyFilter();
const q = ds
.createQuery('__Stat_Kind__')
// .filter('kind_name', table)
.filter(new propertyFilter('kind_name', '=', table))
.limit(1);
const [statsArray] = await ds.runQuery(q);
const [stats] = statsArray;
return stats;
}
async getStatsCount(table) {
const stats = await this.getStats(table);
return stats?.count;
}
async getTableProperties(table) {
const ds = await this.ds();
const q = ds
.createQuery('__Stat_PropertyType_PropertyName_Kind__')
// .filter('kind_name', table)
.filter(new (await this.getPropertyFilter())('kind_name', '=', table));
const [stats] = await ds.runQuery(q);
return stats;
}
mapId(o, preserveKey = false) {
if (!o)
return o;
const r = {
...o,
id: this.getKey(this.getDsKey(o)),
};
if (!preserveKey)
delete r[this.KEY];
return r;
}
// if key field exists on entity, it will be used as key (prevent to duplication of numeric keyed entities)
toDatastoreEntity(ds, kind, o, excludeFromIndexes = []) {
const key = this.getDsKey(o) || this.key(ds, kind, o.id);
const data = Object.assign({}, o);
delete data.id;
delete data[this.KEY];
return {
key,
data,
excludeFromIndexes,
};
}
key(ds, kind, id) {
_assert(id, `Cannot save "${kind}" entity without "id"`);
return ds.key([kind, id]);
}
getDsKey(o) {
return o?.[this.KEY];
}
getKey(key) {
const id = key.id || key.name;
return id?.toString();
}
async createTable(_table, _schema) { }
async getTables() {
const statsArray = await this.getAllStats();
// Filter out tables starting with `_` by default (internal Datastore tables)
return statsArray.map(stats => stats.kind_name).filter(table => table && !table.startsWith('_'));
}
async getTableSchema(table) {
const stats = await this.getTableProperties(table);
const s = {
$id: `${table}.schema.json`,
type: 'object',
properties: {
id: { type: 'string' },
},
additionalProperties: true,
required: [],
};
stats
.filter(s => !s.property_name.includes('.') && s.property_name !== 'id') // filter out objectify's "virtual properties"
.forEach(stats => {
const { property_type: dtype } = stats;
const name = stats.property_name;
if (dtype === DatastoreType.Blob) {
s.properties[name] = {
instanceof: 'Buffer',
};
}
else if (dtype === DatastoreType.Text || dtype === DatastoreType.String) {
s.properties[name] = {
type: 'string',
};
}
else if (dtype === DatastoreType.EmbeddedEntity) {
s.properties[name] = {
type: 'object',
additionalProperties: true,
properties: {},
required: [],
};
}
else if (dtype === DatastoreType.Integer) {
s.properties[name] = {
type: 'integer',
};
}
else if (dtype === DatastoreType.Float) {
s.properties[name] = {
type: 'number',
};
}
else if (dtype === DatastoreType.Boolean) {
s.properties[name] = {
type: 'boolean',
};
}
else if (dtype === DatastoreType.DATE_TIME) {
// Don't know how to map it properly
s.properties[name] = {};
}
else if (dtype === DatastoreType.NULL) {
// check, maybe we can just skip this type and do nothing?
s.properties[name] ||= {
type: 'null',
};
}
else {
throw new Error(`Unknown Datastore Type '${stats.property_type}' for ${table}.${name}`);
}
});
return s;
}
getPRetryOptions(name) {
return {
predicate: err => RETRY_ON.some(s => err?.message?.toLowerCase()?.includes(s)),
name,
timeout: 20_000,
maxAttempts: 5,
delay: 5000,
delayMultiplier: 1.5,
logFirstAttempt: false,
logFailures: true,
// logAll: true,
logger: this.cfg.logger,
// not appending fingerprint here, otherwise it would just group all kinds of errors, not just Timeout errors
// errorData: {
// fingerprint: [DATASTORE_TIMEOUT],
// },
};
}
/**
* Silently rollback the transaction.
* It may happen that transaction is already committed/rolled back, so we don't want to throw an error here.
*/
async rollback(datastoreTx) {
try {
await datastoreTx.rollback();
}
catch (err) {
// log the error, but don't re-throw, as this should be a graceful rollback
this.cfg.logger.error(err);
}
}
getRunQueryOptions(opt) {
if (!opt.readAt)
return {};
return {
// Datastore expects UnixTimestamp in milliseconds
// Datastore requires the timestamp to be rounded to the whole minutes
readTime: _round(opt.readAt, 60) * 1000,
};
}
}
/**
* https://cloud.google.com/datastore/docs/concepts/transactions#datastore-datastore-transactional-update-nodejs
*/
export class DatastoreDBTransaction {
db;
tx;
constructor(db, tx) {
this.db = db;
this.tx = tx;
}
async commit() {
await this.tx.commit();
}
async rollback() {
try {
await this.tx.rollback();
}
catch (err) {
// log the error, but don't re-throw, as this should be a graceful rollback
this.db.cfg.logger.error(err);
}
}
async getByIds(table, ids, opt) {
return await this.db.getByIds(table, ids, { ...opt, tx: this });
}
async saveBatch(table, rows, opt) {
await this.db.saveBatch(table, rows, { ...opt, tx: this });
}
async deleteByIds(table, ids, opt) {
return await this.db.deleteByIds(table, ids, { ...opt, tx: this });
}
}