@naturalcycles/datastore-lib
Version:
Opinionated library to work with Google Datastore, implements CommonDB
194 lines (193 loc) • 6.83 kB
JavaScript
import { Readable } from 'node:stream';
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
import { TimeoutError } from '@naturalcycles/js-lib/error';
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
import { getRunQueryOptions } from './query.util.js';
export class DatastoreStreamReadable extends Readable {
q;
table;
originalLimit;
rowsRetrieved = 0;
/**
* Counts how many times _read was called.
* For debugging.
*/
countReads = 0;
endCursor;
queryIsRunning = false;
paused = false;
done = false;
lastQueryDone;
totalWait = 0;
/**
* Used to support maxWait
*/
lastReadTimestamp = 0;
maxWaitInterval;
opt;
logger;
dsOpt;
constructor(q, opt) {
// 1_000 was optimal in benchmarks
const { batchSize = 1000 } = opt;
const { highWaterMark = batchSize * 3 } = opt;
// Defaulting highWaterMark to 3x batchSize
super({ objectMode: true, highWaterMark });
this.q = q;
this.opt = {
...opt,
batchSize,
highWaterMark,
};
this.dsOpt = getRunQueryOptions(opt);
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
this.logger = logger;
this.originalLimit = q.limitVal;
this.table = q.kinds[0];
logger.log(`!! using experimentalCursorStream`, {
table: this.table,
batchSize,
highWaterMark,
});
const { maxWait } = this.opt;
if (maxWait) {
logger.log(`!! ${this.table} maxWait ${maxWait}`);
this.maxWaitInterval = setInterval(() => {
const millisSinceLastRead = Date.now() - this.lastReadTimestamp;
if (millisSinceLastRead < maxWait * 1000) {
logger.log(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
return;
}
const { queryIsRunning, rowsRetrieved } = this;
logger.log(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
running: queryIsRunning,
rowsRetrieved,
});
// force-trigger _read
// regardless of `running` status
this._read();
}, (maxWait * 1000) / 2);
}
}
_read() {
this.lastReadTimestamp = localTime.nowUnixMillis();
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
this.countReads++;
if (this.done) {
this.logger.warn(`!!! ${this.table} _read was called, but done==true`);
return;
}
if (this.paused) {
this.logger.debug(`_read #${this.countReads}, queryIsRunning: ${this.queryIsRunning}, unpausing stream`);
this.paused = false;
}
if (this.queryIsRunning) {
this.logger.debug(`_read #${this.countReads}, queryIsRunning: true, doing nothing`);
return;
}
void this.runNextQuery().catch(err => {
this.logger.error('error in runNextQuery', err);
this.destroy(err);
});
}
async runNextQuery() {
if (this.done)
return;
const { logger, table } = this;
if (this.lastQueryDone) {
const now = Date.now();
this.totalWait += now - this.lastQueryDone;
}
this.queryIsRunning = true;
let limit = this.opt.batchSize;
if (this.originalLimit) {
limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
}
let q = this.q.limit(limit);
if (this.endCursor) {
q = q.start(this.endCursor);
}
const started = localTime.nowUnixMillis();
const res = await this.runQuery(q);
const queryTook = Date.now() - started;
if (!res) {
// error already emitted in runQuery
return;
}
const rows = res[0];
const info = res[1];
this.rowsRetrieved += rows.length;
logger.debug(`${table} got ${rows.length} rows in ${_ms(queryTook)}, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
this.endCursor = info.endCursor;
this.queryIsRunning = false; // ready to take more _reads
this.lastQueryDone = Date.now();
let shouldContinue = false;
for (const row of rows) {
shouldContinue = this.push(row);
}
if (!info.endCursor ||
info.moreResults === 'NO_MORE_RESULTS' ||
(this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
logger.log(`${table} stream is done, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
this.push(null);
this.done = true;
this.paused = false;
clearInterval(this.maxWaitInterval);
return;
}
if (shouldContinue) {
// Keep the stream flowing
logger.debug(`${table} continuing the stream`);
void this.runNextQuery();
}
else {
// Not starting the next query
if (this.paused) {
logger.debug(`${table} stream is already paused`);
}
else {
logger.debug(`${table} pausing the stream`);
this.paused = true;
}
}
}
async runQuery(q) {
const { table, logger } = this;
try {
return await pRetry(async () => {
return await q.run(this.dsOpt);
}, {
name: `DatastoreStreamReadable.query(${table})`,
predicate: err => err instanceof TimeoutError ||
RETRY_ON.some(s => err?.message?.toLowerCase()?.includes(s)),
maxAttempts: 5,
delay: 5000,
delayMultiplier: 2,
logger,
timeout: 120_000, // 2 minutes
});
}
catch (err) {
logger.error(`DatastoreStreamReadable error!\n`, {
table,
rowsRetrieved: this.rowsRetrieved,
}, err);
clearInterval(this.maxWaitInterval);
this.destroy(err);
}
}
}
// Examples of errors:
// UNKNOWN: Stream removed
const RETRY_ON = [
'GOAWAY',
'UNAVAILABLE',
'UNKNOWN',
'DEADLINE_EXCEEDED',
'ABORTED',
'much contention',
'try again',
'timeout',
].map(s => s.toLowerCase());