@naturalcycles/datastore-lib
Version:
Opinionated library to work with Google Datastore, implements CommonDB
157 lines (156 loc) • 5.64 kB
JavaScript
import { Readable } from 'node:stream';
import { _ms } from '@naturalcycles/js-lib/datetime/time.util.js';
import { pRetry } from '@naturalcycles/js-lib/promise/pRetry.js';
export class DatastoreStreamReadable extends Readable {
q;
logger;
originalLimit;
rowsRetrieved = 0;
endCursor;
running = false;
done = false;
lastQueryDone;
totalWait = 0;
table;
/**
* Used to support maxWait
*/
lastReadTimestamp = 0;
maxWaitInterval;
opt;
dsOpt;
constructor(q, opt, logger) {
super({ objectMode: true });
this.q = q;
this.logger = logger;
this.opt = {
rssLimitMB: 1000,
batchSize: 1000,
...opt,
};
this.dsOpt = {};
if (opt.readAt) {
// Datastore expects UnixTimestamp in milliseconds
this.dsOpt.readTime = opt.readAt * 1000;
}
this.originalLimit = q.limitVal;
this.table = q.kinds[0];
logger.log(`!! using experimentalCursorStream !! ${this.table}, batchSize: ${opt.batchSize}`);
const { maxWait } = this.opt;
if (maxWait) {
this.logger.warn(`!! ${this.table} maxWait ${maxWait}`);
this.maxWaitInterval = setInterval(() => {
const millisSinceLastRead = Date.now() - this.lastReadTimestamp;
if (millisSinceLastRead < maxWait * 1000) {
this.logger.warn(`!! ${this.table} millisSinceLastRead(${millisSinceLastRead}) < maxWait*1000`);
return;
}
const { running, rowsRetrieved } = this;
this.logger.warn(`maxWait of ${maxWait} seconds reached, force-triggering _read`, {
running,
rowsRetrieved,
});
// force-trigger _read
// regardless of `running` status
this._read();
}, (maxWait * 1000) / 2);
}
}
async runNextQuery() {
if (this.done)
return;
if (this.lastQueryDone) {
const now = Date.now();
this.totalWait += now - this.lastQueryDone;
}
this.running = true;
// console.log('running query...')
let limit = this.opt.batchSize;
if (this.originalLimit) {
limit = Math.min(this.opt.batchSize, this.originalLimit - this.rowsRetrieved);
}
// console.log(`limit: ${limit}`)
let q = this.q.limit(limit);
if (this.endCursor) {
q = q.start(this.endCursor);
}
let rows = [];
let info = {};
try {
await pRetry(async () => {
const res = await q.run(this.dsOpt);
rows = res[0];
info = res[1];
}, {
name: `DatastoreStreamReadable.query(${this.table})`,
maxAttempts: 5,
delay: 5000,
delayMultiplier: 2,
logger: this.logger,
timeout: 120_000, // 2 minutes
});
}
catch (err) {
console.log(`DatastoreStreamReadable error!\n`, {
table: this.table,
rowsRetrieved: this.rowsRetrieved,
}, err);
this.emit('error', err);
clearInterval(this.maxWaitInterval);
return;
}
this.rowsRetrieved += rows.length;
this.logger.log(`${this.table} got ${rows.length} rows, ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
if (!rows.length) {
this.logger.warn(`${this.table} got 0 rows, totalWait: ${_ms(this.totalWait)}`, info.moreResults);
}
this.endCursor = info.endCursor;
this.running = false; // ready to take more _reads
this.lastQueryDone = Date.now();
rows.forEach(row => this.push(row));
if (!info.endCursor ||
info.moreResults === 'NO_MORE_RESULTS' ||
(this.originalLimit && this.rowsRetrieved >= this.originalLimit)) {
this.logger.log(`!!!! DONE! ${this.rowsRetrieved} rowsRetrieved, totalWait: ${_ms(this.totalWait)}`);
this.push(null);
this.done = true;
clearInterval(this.maxWaitInterval);
}
else if (this.opt.singleBatchBuffer) {
// here we don't start next query until we're asked (via next _read call)
// do, let's do nothing
}
else if (this.opt.rssLimitMB) {
const rssMB = Math.round(process.memoryUsage().rss / 1024 / 1024);
if (rssMB <= this.opt.rssLimitMB) {
void this.runNextQuery();
}
else {
this.logger.warn(`${this.table} rssLimitMB reached ${rssMB} > ${this.opt.rssLimitMB}, pausing stream`);
}
}
}
/**
* Counts how many times _read was called.
* For debugging.
*/
count = 0;
_read() {
this.lastReadTimestamp = Date.now();
// console.log(`_read called ${++this.count}, wasRunning: ${this.running}`) // debugging
this.count++;
if (this.done) {
this.logger.warn(`!!! _read was called, but done==true`);
return;
}
if (!this.running) {
void this.runNextQuery().catch(err => {
console.log('error in runNextQuery', err);
this.emit('error', err);
});
}
else {
this.logger.log(`_read ${this.count}, wasRunning: true`);
}
}
}