@clickup/ent-framework
Version:
A PostgreSQL graph-database-alike library with microsharding and row-level security
449 lines • 21.9 kB
JavaScript
"use strict";
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
return c > 3 && r && Object.defineProperty(target, key, r), r;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Cluster = void 0;
const util_1 = require("util");
const delay_1 = __importDefault(require("delay"));
const fast_typescript_memoize_1 = require("fast-typescript-memoize");
const defaults_1 = __importDefault(require("lodash/defaults"));
const random_1 = __importDefault(require("lodash/random"));
const p_timeout_1 = __importDefault(require("p-timeout"));
const CachedRefreshedValue_1 = require("../internal/CachedRefreshedValue");
const misc_1 = require("../internal/misc");
const Registry_1 = require("../internal/Registry");
const ClientError_1 = require("./ClientError");
const Island_1 = require("./Island");
const Shard_1 = require("./Shard");
const ShardIsNotDiscoverableError_1 = require("./ShardIsNotDiscoverableError");
/** Same as vanilla delay(), but with unref()ed timers. */
const delay = delay_1.default.createWithTimers({
setTimeout: (...args) => setTimeout(...args).unref(),
clearTimeout: (...args) => clearTimeout(...args),
});
/**
* Cluster is a collection of Islands and an orchestration of shardNo -> Island
* resolution.
*
* It's unknown beforehand, which Island some particular Shard belongs to; the
* resolution is done asynchronously and lazily.
*
* Shard 0 is a special "global" Shard.
*/
class Cluster {
/**
* Initializes the Cluster, but doesn't send any queries yet, even discovery
* queries (also, no implicit prewarming).
*/
constructor(options) {
/** Once set to true, Clients for newly appearing nodes will be pre-warmed. */
this.prewarmEnabled = false;
this.options = (0, defaults_1.default)({}, options, Cluster.DEFAULT_OPTIONS);
if (typeof this.options.reloadIslandsIntervalMs === "number" &&
isNaN(this.options.reloadIslandsIntervalMs)) {
this.options.reloadIslandsIntervalMs = util_1.types.isAsyncFunction(this.options.islands)
? (0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs)
: 500;
}
this.clientRegistry = new Registry_1.Registry({
key: (node) => (0, misc_1.jsonHash)(node),
create: (node) => {
const client = this.options.createClient(node);
client.options.shardNamer ??= this.options.shardNamer;
const loggers = { ...client.options.loggers };
client.options.loggers = {
swallowedErrorLogger: (props) => {
this.options.loggers.swallowedErrorLogger(props);
loggers.swallowedErrorLogger?.(props);
},
clientQueryLogger: (props) => {
this.options.loggers.clientQueryLogger?.(props);
loggers.clientQueryLogger?.(props);
},
};
return client;
},
end: async (client) => {
const startTime = performance.now();
await client.end().catch((error) => this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.clientRegistry`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "normal",
}));
},
});
this.islandRegistry = new Registry_1.Registry({
key: ({ no, nodes }) => (0, misc_1.jsonHash)({ no, nodes }),
create: ({ no, clients }) => new Island_1.Island({
no,
clients,
createShard: (no) => this.shardByNo(no),
localCache: this.options.localCache ?? undefined,
}),
});
this.islandsCache = new CachedRefreshedValue_1.CachedRefreshedValue({
delayMs: () => Math.round((0, misc_1.maybeCall)(this.options.reloadIslandsIntervalMs) *
(0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter))),
warningTimeoutMs: () => (0, misc_1.maybeCall)(this.options.reloadIslandsIntervalMs),
deps: {
// If `options.islands` is reassigned externally (e.g. in a unit test),
// then it will be reflected in `await this.islandsCache.cached()`
// within `deps.delayMs` (not immediately). To expedite this (and Shards
// map) recheck, call Cluster#rediscover().
delayMs: 50,
// We use the value of `options.islands` itself as a dependency, so if
// `options.islands` is reassigned externally, then we'll catch the
// change quickly, within `deps.delayMs`. We do NOT call
// `options.islands()` intentionally, we use its value - since we just
// want to check for reassignment (e.g. in unit tests).
handler: () => this.options.islands,
},
resolverName: "Cluster#options.islands",
resolverFn: async () => (0, misc_1.maybeAsyncCall)(this.options.islands),
delay,
onError: (error, elapsed) => this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.islandsCache`,
error,
elapsed,
importance: "normal",
}),
});
this.shardsDiscoverCache = new CachedRefreshedValue_1.CachedRefreshedValue({
delayMs: () => Math.round((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) *
(0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter))),
warningTimeoutMs: () => (0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs),
deps: {
delayMs: () => (0, misc_1.maybeCall)(this.options.reloadIslandsIntervalMs),
handler: async () => (0, misc_1.jsonHash)(await (0, misc_1.maybeAsyncCall)(this.islandsCache.cached())),
},
resolverName: "Cluster#shardsDiscoverExpensive",
resolverFn: async () => this.shardsDiscoverExpensive(),
delay,
onError: (error, elapsed) => this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.shardsDiscoverCache`,
error,
elapsed,
importance: "normal",
}),
});
}
/**
* Signals the Cluster to keep the Clients pre-warmed, e.g. open. (It's up to
* the particular Client's implementation, what does a "pre-warmed Client"
* mean; typically, it's keeping some minimal number of pooled connections.)
*
* Except when `randomizedDelayMs` is passed as 0, the actual prewarm (and
* Islands discovery) queries will run with a randomized delay between N/2 and
* N ms. It is better to operate in such mode: if multiple Node processes
* start simultaneously in the cluster, then the randomization helps to avoid
* new connections burst (new connections establishment is expensive for e.g.
* pgbouncer or when DB is accessed over SSL).
*/
prewarm(randomizedDelayMs = 5000, onInitialPrewarm) {
if (this.prewarmEnabled) {
return;
}
this.prewarmEnabled = true;
const initialDelayMs = randomizedDelayMs
? Math.round((0, random_1.default)(randomizedDelayMs / 2, randomizedDelayMs))
: 0;
setTimeout(() => (0, misc_1.runInVoid)(async () => {
onInitialPrewarm?.(initialDelayMs);
for (const island of await this.islands()) {
for (const client of island.clients) {
client.prewarm();
}
}
}), initialDelayMs).unref();
}
/**
* Returns a global Shard of the Cluster. This method is made synchronous
* intentionally, to defer the I/O and possible errors to the moment of the
* actual query.
*/
globalShard() {
return this.shardByNo(0);
}
/**
* Returns all currently known (discovered) non-global Shards in the Cluster.
*/
async nonGlobalShards() {
const { nonGlobalShardNos } = await this.shardsDiscoverCache.cached();
return nonGlobalShardNos.map((shardNo) => this.shardByNo(shardNo));
}
/**
* Returns Shard of a particular id. This method is made synchronous
* intentionally, to defer the I/O and possible errors to the moment of the
* actual query.
*
* Why is it important? Because Shards may go up and down temporarily at
* random moments of time. Imagine we made this method async and asserted that
* the Shard is actually available at the moment when the method is called.
* What would happen if the Shard object was stored somewhere as "successful"
* by the caller, then the Island went down, and then a query is sent to the
* Shard in, say, 20 seconds? We'd get an absolutely different exception, at
* the moment of the query. We don't want this to happen: we want all of the
* exceptions to be thrown with a consistent call stack (e.g. at the moment of
* the query), no matter whether it was an immediate call or a deferred one.
*/
shard(id) {
return this.shardByNo(this.options.shardNamer ? this.options.shardNamer.shardNoByID(id) : 0);
}
/**
* Returns a Shard if we know its number. The idea: for each Shard number
* (even for non-discovered yet Shards), we keep the corresponding Shard
* object in a Memoize cache, so Shards with the same number always resolve
* into the same Shard object. Then, an actual Island locating process happens
* when the caller wants to get a Client of that Shard (and it throws if such
* Shard hasn't been discovered actually).
*/
shardByNo(shardNo) {
return new Shard_1.Shard(shardNo, this.runOnShard.bind(this));
}
/**
* Returns a random Shard among the ones which are currently known
* (discovered) in the Cluster.
*/
async randomShard(seed) {
const { nonGlobalShardNos } = await this.shardsDiscoverCache.cached();
let index;
if (seed !== undefined) {
const numHash = (0, misc_1.objectHash)(seed).readUInt32BE();
index = numHash % nonGlobalShardNos.length;
}
else {
// TODO: implement power-of-two algorithm to pick the Shard which is
// smallest in size.
index = (0, random_1.default)(0, nonGlobalShardNos.length - 1);
}
return this.shardByNo((0, misc_1.nullthrows)(nonGlobalShardNos[index], () => "There are no non-global Shards in the Cluster"));
}
/**
* Returns an Island by its number.
*/
async island(islandNo) {
const { islandNoToIsland } = await this.shardsDiscoverCache.cached();
return (0, misc_1.nullthrows)(islandNoToIsland.get(islandNo), () => `No such Island: ${islandNo}`);
}
/**
* Returns all Islands in the Cluster.
*/
async islands() {
const { islandNoToIsland } = await this.shardsDiscoverCache.cached();
return [...islandNoToIsland.values()];
}
/**
* Triggers shards rediscovery and finishes as soon as it's done. To be used
* in unit tests mostly, because in real life, it's enough to just modify the
* cluster configuration.
*/
async rediscover(what) {
if (!what || what === "islands") {
await this.islandsCache.refreshAndWait();
}
if (!what || what === "shards") {
await this.shardsDiscoverCache.refreshAndWait();
}
}
/**
* Runs the body function with retries. The Island injected into the body
* function is located automatically by the Shard number. In case of an error
* after any run attempt, calls onAttemptError().
*/
async runOnShard(shardNo, body, onAttemptError) {
for (let attempt = 0;; attempt++) {
let island;
try {
// Re-read Islands map on every retry, because it might change.
const startTime = performance.now();
const { shardNoToIslandNo, islandNoToIsland, errors } = await this.shardsDiscoverCache.cached();
const islandNo = shardNoToIslandNo.get(shardNo);
if (islandNo === undefined) {
// Notice that we don't retry ShardIsNotDiscoverableError below (it's
// not a ClientError) to avoid DoS, since it could be e.g. a fake ID
// passed to us in some URL or something else. We still want to log it
// through runOnShardErrorLogger() though.
throw new ShardIsNotDiscoverableError_1.ShardIsNotDiscoverableError(shardNo, errors, [...islandNoToIsland.values()], Math.round(performance.now() - startTime));
}
// Retry the entire call to body(), to let it re-elect Client if needed.
island = (0, misc_1.nullthrows)(islandNoToIsland.get(islandNo));
return await body(island, attempt);
}
catch (cause) {
const error = cause;
if (typeof error?.stack === "string") {
const suffix = `\n after ${attempt + 1} attempt${attempt > 0 ? "s" : ""}`;
if (!error.stack.endsWith(suffix)) {
error.stack = error.stack.trimEnd() + suffix;
}
}
onAttemptError?.(error, attempt);
this.options.loggers.runOnShardErrorLogger?.({ error, attempt });
if (!(error instanceof ClientError_1.ClientError) ||
attempt >= (0, misc_1.maybeCall)(this.options.runOnShardErrorRetryCount)) {
throw error;
}
switch (error.postAction) {
case "rediscover-cluster":
await this.rediscoverCluster();
continue;
case "rediscover-island":
await this.rediscoverIsland(island);
continue;
case "choose-another-client":
continue;
case "fail":
throw error;
}
}
}
}
/**
* Runs the whole-cluster rediscover after a delay, hoping that we'll load the
* new Shards-to-Island mapping.
*
* Multiple concurrent calls to this method will be coalesced into one
* (including the delay period):
* 1. This protects against the burst of rediscover requests caused by
* multiple failing concurrent queries.
* 2. It also allows to keep the queries batched when they are retried (i.e.
* the whole batch will be retried, not individual queries).
*/
async rediscoverCluster() {
await delay((0, misc_1.maybeCall)(this.options.runOnShardErrorRediscoverClusterDelayMs));
// We don't want to wait forever if some Island is completely down.
const startTime = performance.now();
await (0, p_timeout_1.default)(
// Notice that we intentionally DO NOT call `islandsCache#refreshAndWait()
// here: changes in the list of Islands never reveal moved Shards.
this.shardsDiscoverCache.refreshAndWait(), Math.round((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) *
(0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter)) *
2), "Timed out while waiting for whole-Cluster Shards discovery.").catch((error) => this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.rediscoverCluster`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "normal",
}));
}
/**
* Runs Island#rediscover() after a delay.
*
* Multiple concurrent calls to this method will be coalesced into one
* (including the delay period):
* 1. This protects against the burst of rediscover requests caused by
* multiple failing concurrent queries.
* 2. It also allows to keep the queries batched when they are retried (i.e.
* the whole batch will be retried, not individual queries).
*/
async rediscoverIsland(island) {
await delay((0, misc_1.maybeCall)(this.options.runOnShardErrorRediscoverIslandDelayMs));
// We don't want to wait forever if the Island is completely down.
const startTime = performance.now();
await (0, p_timeout_1.default)(island.rediscover(), Math.round((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) *
(0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter)) *
2), `Timed out while waiting for Island ${island.no} Shards discovery.`).catch((error) => this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.rediscoverIsland(${island.no})`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "normal",
}));
}
/**
* Runs the actual Shards discovery queries over all Islands and updates the
* mapping from each Shard number to an Island where it lives. These queries
* may be expensive, so it's expected that the returned Promise is heavily
* cached by the caller code.
*/
async shardsDiscoverExpensive() {
const islands = await this.islandsCache.cached();
const seenKeys = new Set();
const islandNoToIsland = new Map(islands.map(({ no, nodes }) => {
const clients = nodes.map((node) => {
const [client, key] = this.clientRegistry.getOrCreate(node);
seenKeys.add(key);
this.prewarmEnabled && client.prewarm();
return client;
});
const [island, key] = this.islandRegistry.getOrCreate({
no,
nodes,
clients,
});
seenKeys.add(key);
return [no, island];
}));
const shardNoToIslandNo = new Map();
const nonGlobalShardNos = [];
const errors = [];
const shards = [];
await (0, misc_1.mapJoin)([...islandNoToIsland.entries()], async ([islandNo, island]) => {
errors.push(...(await island.rediscover()));
for (const shard of island.shards()) {
shards.push(shard);
const otherIslandNo = shardNoToIslandNo.get(shard.no);
if (otherIslandNo !== undefined) {
throw Error(`Shard #${shard.no} exists in more than one island: ` +
islandNoToIsland.get(otherIslandNo)?.master().options.name +
`(${otherIslandNo})` +
" and " +
island.master().options.name +
`(${islandNo})`);
}
shardNoToIslandNo.set(shard.no, islandNo);
if (shard.no !== 0) {
nonGlobalShardNos.push(shard.no);
}
}
});
// Assign the last known Island number to all Shards synchronously.
for (const shard of shards) {
shard.lastKnownIslandNo =
shardNoToIslandNo.get(shard.no) ?? null;
}
// Gracefully delete and disconnect the Clients which didn't correspond to
// the list of nodes mentioned in this.options.islands, and also, delete
// leftover Islands which are not used anymore. In case we don't reach this
// point and threw earlier, it will eventually be reached on the next Shards
// discovery iterations.
for (const registry of [this.clientRegistry, this.islandRegistry]) {
(0, misc_1.runInVoid)(registry.deleteExcept(seenKeys));
}
// Return the updated ENTIRE snapshot.
return {
islandNoToIsland,
shardNoToIslandNo,
nonGlobalShardNos: nonGlobalShardNos.sort((a, b) => a - b),
errors,
};
}
}
exports.Cluster = Cluster;
/** Default values for the constructor options. */
Cluster.DEFAULT_OPTIONS = {
localCache: null,
shardNamer: null,
shardsDiscoverIntervalMs: 10000,
shardsDiscoverIntervalJitter: 0.2,
reloadIslandsIntervalMs: NaN,
runOnShardErrorRetryCount: 2,
runOnShardErrorRediscoverClusterDelayMs: 1000,
runOnShardErrorRediscoverIslandDelayMs: 5000,
};
__decorate([
(0, fast_typescript_memoize_1.Memoize)()
], Cluster.prototype, "shardByNo", null);
__decorate([
(0, fast_typescript_memoize_1.Memoize)({ clearOnResolve: true })
], Cluster.prototype, "rediscoverCluster", null);
__decorate([
(0, fast_typescript_memoize_1.Memoize)((island) => island.no, { clearOnResolve: true })
], Cluster.prototype, "rediscoverIsland", null);
//# sourceMappingURL=Cluster.js.map