UNPKG

@naturalcycles/datastore-lib

Version:

Opinionated library to work with Google Datastore, implements CommonDB

194 lines (193 loc) 6.83 kB
import { Readable } from 'node:stream'; import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'; import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js'; import { TimeoutError } from '@naturalcycles/js-lib/error'; import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log'; import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js'; import { getRunQueryOptions } from './query.util.js'; export class DatastoreStreamReadable extends Readable { q; table; originalLimit; rowsRetrieved = 0; /** * Counts how many times _read was called. * For debugging. */ countReads = 0; endCursor; queryIsRunning = false; paused = false; done = false; lastQueryDone; totalWait = 0; /** * Used to support maxWait */ lastReadTimestamp = 0; maxWaitInterval; opt; logger; dsOpt; constructor(q, opt) { // 1_000 was optimal in benchmarks const { batchSize = 1000 } = opt; const { highWaterMark = batchSize * 3 } = opt; // Defaulting highWaterMark to 3x batchSize super({ objectMode: true, highWaterMark }); this.q = q; this.opt = { ...opt, batchSize, highWaterMark, }; this.dsOpt = getRunQueryOptions(opt); const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel); this.logger = logger; this.originalLimit = q.limitVal; this.table = q.kinds[0]; logger.log(`!! using experimentalCursorStream`, { table: this.table, batchSize, highWaterMark, }); const { maxWait } = this.opt; if (maxWait) { logger.log(`!! ${this.table} maxWait ${maxWait}`); this.maxWaitInterval = setInterval(() => { const millisSinceLastRead = Date.now() - this.lastReadTimestamp; if (millisSinceLastRead < maxWait * 1000) { logger.log(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`); return; } const { queryIsRunning, rowsRetrieved } = this; logger.log(`maxWait of ${maxWait} seconds reached, force-triggering _read`, { running: queryIsRunning, rowsRetrieved, }); // force-trigger _read // regardless of `running` status this._read(); }, (maxWait * 1000) / 2); } } _read() { this.lastReadTimestamp = localTime.nowUnixMillis(); // console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging this.countReads++; if (this.done) { this.logger.warn(`!!! ${this.table} _read was called, but done==true`); return; } if (this.paused) { this.logger.debug(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`); this.paused = false; } if (this.queryIsRunning) { this.logger.debug(`_read #${this.countReads}, queryIsRunning: true, doing nothing`); return; } void this.runNextQuery().catch(err => { this.logger.error('error in runNextQuery', err); this.destroy(err); }); } async runNextQuery() { if (this.done) return; const { logger, table } = this; if (this.lastQueryDone) { const now = Date.now(); this.totalWait += now - this.lastQueryDone; } this.queryIsRunning = true; let limit = this.opt.batchSize; if (this.originalLimit) { limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved); } let q = this.q.limit(limit); if (this.endCursor) { q = q.start(this.endCursor); } const started = localTime.nowUnixMillis(); const res = await this.runQuery(q); const queryTook = Date.now() - started; if (!res) { // error already emitted in runQuery return; } const rows = res[0]; const info = res[1]; this.rowsRetrieved += rows.length; logger.debug(`${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`); this.endCursor = info.endCursor; this.queryIsRunning = false; // ready to take more _reads this.lastQueryDone = Date.now(); let shouldContinue = false; for (const row of rows) { shouldContinue = this.push(row); } if (!info.endCursor || info.moreResults === 'NO_MORE_RESULTS' || (this.originalLimit && this.rowsRetrieved >= this.originalLimit)) { logger.log(`${table} stream is done, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`); this.push(null); this.done = true; this.paused = false; clearInterval(this.maxWaitInterval); return; } if (shouldContinue) { // Keep the stream flowing logger.debug(`${table} continuing the stream`); void this.runNextQuery(); } else { // Not starting the next query if (this.paused) { logger.debug(`${table} stream is already paused`); } else { logger.debug(`${table} pausing the stream`); this.paused = true; } } } async runQuery(q) { const { table, logger } = this; try { return await pRetry(async () => { return await q.run(this.dsOpt); }, { name: `DatastoreStreamReadable.query(${table})`, predicate: err => err instanceof TimeoutError || RETRY_ON.some(s => err?.message?.toLowerCase()?.includes(s)), maxAttempts: 5, delay: 5000, delayMultiplier: 2, logger, timeout: 120_000, // 2 minutes }); } catch (err) { logger.error(`DatastoreStreamReadable error!\n`, { table, rowsRetrieved: this.rowsRetrieved, }, err); clearInterval(this.maxWaitInterval); this.destroy(err); } } } // Examples of errors: // UNKNOWN: Stream removed const RETRY_ON = [ 'GOAWAY', 'UNAVAILABLE', 'UNKNOWN', 'DEADLINE_EXCEEDED', 'ABORTED', 'much contention', 'try again', 'timeout', ].map(s => s.toLowerCase());