UNPKG

@clickup/ent-framework

Version:

A PostgreSQL graph-database-alike library with microsharding and row-level security

449 lines 21.9 kB
"use strict"; var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; return c > 3 && r && Object.defineProperty(target, key, r), r; }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Cluster = void 0; const util_1 = require("util"); const delay_1 = __importDefault(require("delay")); const fast_typescript_memoize_1 = require("fast-typescript-memoize"); const defaults_1 = __importDefault(require("lodash/defaults")); const random_1 = __importDefault(require("lodash/random")); const p_timeout_1 = __importDefault(require("p-timeout")); const CachedRefreshedValue_1 = require("../internal/CachedRefreshedValue"); const misc_1 = require("../internal/misc"); const Registry_1 = require("../internal/Registry"); const ClientError_1 = require("./ClientError"); const Island_1 = require("./Island"); const Shard_1 = require("./Shard"); const ShardIsNotDiscoverableError_1 = require("./ShardIsNotDiscoverableError"); /** Same as vanilla delay(), but with unref()ed timers. */ const delay = delay_1.default.createWithTimers({ setTimeout: (...args) => setTimeout(...args).unref(), clearTimeout: (...args) => clearTimeout(...args), }); /** * Cluster is a collection of Islands and an orchestration of shardNo -> Island * resolution. * * It's unknown beforehand, which Island some particular Shard belongs to; the * resolution is done asynchronously and lazily. * * Shard 0 is a special "global" Shard. */ class Cluster { /** * Initializes the Cluster, but doesn't send any queries yet, even discovery * queries (also, no implicit prewarming). */ constructor(options) { /** Once set to true, Clients for newly appearing nodes will be pre-warmed. */ this.prewarmEnabled = false; this.options = (0, defaults_1.default)({}, options, Cluster.DEFAULT_OPTIONS); if (typeof this.options.reloadIslandsIntervalMs === "number" && isNaN(this.options.reloadIslandsIntervalMs)) { this.options.reloadIslandsIntervalMs = util_1.types.isAsyncFunction(this.options.islands) ? (0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) : 500; } this.clientRegistry = new Registry_1.Registry({ key: (node) => (0, misc_1.jsonHash)(node), create: (node) => { const client = this.options.createClient(node); client.options.shardNamer ??= this.options.shardNamer; const loggers = { ...client.options.loggers }; client.options.loggers = { swallowedErrorLogger: (props) => { this.options.loggers.swallowedErrorLogger(props); loggers.swallowedErrorLogger?.(props); }, clientQueryLogger: (props) => { this.options.loggers.clientQueryLogger?.(props); loggers.clientQueryLogger?.(props); }, }; return client; }, end: async (client) => { const startTime = performance.now(); await client.end().catch((error) => this.options.loggers.swallowedErrorLogger({ where: `${this.constructor.name}.clientRegistry`, error, elapsed: Math.round(performance.now() - startTime), importance: "normal", })); }, }); this.islandRegistry = new Registry_1.Registry({ key: ({ no, nodes }) => (0, misc_1.jsonHash)({ no, nodes }), create: ({ no, clients }) => new Island_1.Island({ no, clients, createShard: (no) => this.shardByNo(no), localCache: this.options.localCache ?? undefined, }), }); this.islandsCache = new CachedRefreshedValue_1.CachedRefreshedValue({ delayMs: () => Math.round((0, misc_1.maybeCall)(this.options.reloadIslandsIntervalMs) * (0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter))), warningTimeoutMs: () => (0, misc_1.maybeCall)(this.options.reloadIslandsIntervalMs), deps: { // If `options.islands` is reassigned externally (e.g. in a unit test), // then it will be reflected in `await this.islandsCache.cached()` // within `deps.delayMs` (not immediately). To expedite this (and Shards // map) recheck, call Cluster#rediscover(). delayMs: 50, // We use the value of `options.islands` itself as a dependency, so if // `options.islands` is reassigned externally, then we'll catch the // change quickly, within `deps.delayMs`. We do NOT call // `options.islands()` intentionally, we use its value - since we just // want to check for reassignment (e.g. in unit tests). handler: () => this.options.islands, }, resolverName: "Cluster#options.islands", resolverFn: async () => (0, misc_1.maybeAsyncCall)(this.options.islands), delay, onError: (error, elapsed) => this.options.loggers.swallowedErrorLogger({ where: `${this.constructor.name}.islandsCache`, error, elapsed, importance: "normal", }), }); this.shardsDiscoverCache = new CachedRefreshedValue_1.CachedRefreshedValue({ delayMs: () => Math.round((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) * (0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter))), warningTimeoutMs: () => (0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs), deps: { delayMs: () => (0, misc_1.maybeCall)(this.options.reloadIslandsIntervalMs), handler: async () => (0, misc_1.jsonHash)(await (0, misc_1.maybeAsyncCall)(this.islandsCache.cached())), }, resolverName: "Cluster#shardsDiscoverExpensive", resolverFn: async () => this.shardsDiscoverExpensive(), delay, onError: (error, elapsed) => this.options.loggers.swallowedErrorLogger({ where: `${this.constructor.name}.shardsDiscoverCache`, error, elapsed, importance: "normal", }), }); } /** * Signals the Cluster to keep the Clients pre-warmed, e.g. open. (It's up to * the particular Client's implementation, what does a "pre-warmed Client" * mean; typically, it's keeping some minimal number of pooled connections.) * * Except when `randomizedDelayMs` is passed as 0, the actual prewarm (and * Islands discovery) queries will run with a randomized delay between N/2 and * N ms. It is better to operate in such mode: if multiple Node processes * start simultaneously in the cluster, then the randomization helps to avoid * new connections burst (new connections establishment is expensive for e.g. * pgbouncer or when DB is accessed over SSL). */ prewarm(randomizedDelayMs = 5000, onInitialPrewarm) { if (this.prewarmEnabled) { return; } this.prewarmEnabled = true; const initialDelayMs = randomizedDelayMs ? Math.round((0, random_1.default)(randomizedDelayMs / 2, randomizedDelayMs)) : 0; setTimeout(() => (0, misc_1.runInVoid)(async () => { onInitialPrewarm?.(initialDelayMs); for (const island of await this.islands()) { for (const client of island.clients) { client.prewarm(); } } }), initialDelayMs).unref(); } /** * Returns a global Shard of the Cluster. This method is made synchronous * intentionally, to defer the I/O and possible errors to the moment of the * actual query. */ globalShard() { return this.shardByNo(0); } /** * Returns all currently known (discovered) non-global Shards in the Cluster. */ async nonGlobalShards() { const { nonGlobalShardNos } = await this.shardsDiscoverCache.cached(); return nonGlobalShardNos.map((shardNo) => this.shardByNo(shardNo)); } /** * Returns Shard of a particular id. This method is made synchronous * intentionally, to defer the I/O and possible errors to the moment of the * actual query. * * Why is it important? Because Shards may go up and down temporarily at * random moments of time. Imagine we made this method async and asserted that * the Shard is actually available at the moment when the method is called. * What would happen if the Shard object was stored somewhere as "successful" * by the caller, then the Island went down, and then a query is sent to the * Shard in, say, 20 seconds? We'd get an absolutely different exception, at * the moment of the query. We don't want this to happen: we want all of the * exceptions to be thrown with a consistent call stack (e.g. at the moment of * the query), no matter whether it was an immediate call or a deferred one. */ shard(id) { return this.shardByNo(this.options.shardNamer ? this.options.shardNamer.shardNoByID(id) : 0); } /** * Returns a Shard if we know its number. The idea: for each Shard number * (even for non-discovered yet Shards), we keep the corresponding Shard * object in a Memoize cache, so Shards with the same number always resolve * into the same Shard object. Then, an actual Island locating process happens * when the caller wants to get a Client of that Shard (and it throws if such * Shard hasn't been discovered actually). */ shardByNo(shardNo) { return new Shard_1.Shard(shardNo, this.runOnShard.bind(this)); } /** * Returns a random Shard among the ones which are currently known * (discovered) in the Cluster. */ async randomShard(seed) { const { nonGlobalShardNos } = await this.shardsDiscoverCache.cached(); let index; if (seed !== undefined) { const numHash = (0, misc_1.objectHash)(seed).readUInt32BE(); index = numHash % nonGlobalShardNos.length; } else { // TODO: implement power-of-two algorithm to pick the Shard which is // smallest in size. index = (0, random_1.default)(0, nonGlobalShardNos.length - 1); } return this.shardByNo((0, misc_1.nullthrows)(nonGlobalShardNos[index], () => "There are no non-global Shards in the Cluster")); } /** * Returns an Island by its number. */ async island(islandNo) { const { islandNoToIsland } = await this.shardsDiscoverCache.cached(); return (0, misc_1.nullthrows)(islandNoToIsland.get(islandNo), () => `No such Island: ${islandNo}`); } /** * Returns all Islands in the Cluster. */ async islands() { const { islandNoToIsland } = await this.shardsDiscoverCache.cached(); return [...islandNoToIsland.values()]; } /** * Triggers shards rediscovery and finishes as soon as it's done. To be used * in unit tests mostly, because in real life, it's enough to just modify the * cluster configuration. */ async rediscover(what) { if (!what || what === "islands") { await this.islandsCache.refreshAndWait(); } if (!what || what === "shards") { await this.shardsDiscoverCache.refreshAndWait(); } } /** * Runs the body function with retries. The Island injected into the body * function is located automatically by the Shard number. In case of an error * after any run attempt, calls onAttemptError(). */ async runOnShard(shardNo, body, onAttemptError) { for (let attempt = 0;; attempt++) { let island; try { // Re-read Islands map on every retry, because it might change. const startTime = performance.now(); const { shardNoToIslandNo, islandNoToIsland, errors } = await this.shardsDiscoverCache.cached(); const islandNo = shardNoToIslandNo.get(shardNo); if (islandNo === undefined) { // Notice that we don't retry ShardIsNotDiscoverableError below (it's // not a ClientError) to avoid DoS, since it could be e.g. a fake ID // passed to us in some URL or something else. We still want to log it // through runOnShardErrorLogger() though. throw new ShardIsNotDiscoverableError_1.ShardIsNotDiscoverableError(shardNo, errors, [...islandNoToIsland.values()], Math.round(performance.now() - startTime)); } // Retry the entire call to body(), to let it re-elect Client if needed. island = (0, misc_1.nullthrows)(islandNoToIsland.get(islandNo)); return await body(island, attempt); } catch (cause) { const error = cause; if (typeof error?.stack === "string") { const suffix = `\n after ${attempt + 1} attempt${attempt > 0 ? "s" : ""}`; if (!error.stack.endsWith(suffix)) { error.stack = error.stack.trimEnd() + suffix; } } onAttemptError?.(error, attempt); this.options.loggers.runOnShardErrorLogger?.({ error, attempt }); if (!(error instanceof ClientError_1.ClientError) || attempt >= (0, misc_1.maybeCall)(this.options.runOnShardErrorRetryCount)) { throw error; } switch (error.postAction) { case "rediscover-cluster": await this.rediscoverCluster(); continue; case "rediscover-island": await this.rediscoverIsland(island); continue; case "choose-another-client": continue; case "fail": throw error; } } } } /** * Runs the whole-cluster rediscover after a delay, hoping that we'll load the * new Shards-to-Island mapping. * * Multiple concurrent calls to this method will be coalesced into one * (including the delay period): * 1. This protects against the burst of rediscover requests caused by * multiple failing concurrent queries. * 2. It also allows to keep the queries batched when they are retried (i.e. * the whole batch will be retried, not individual queries). */ async rediscoverCluster() { await delay((0, misc_1.maybeCall)(this.options.runOnShardErrorRediscoverClusterDelayMs)); // We don't want to wait forever if some Island is completely down. const startTime = performance.now(); await (0, p_timeout_1.default)( // Notice that we intentionally DO NOT call `islandsCache#refreshAndWait() // here: changes in the list of Islands never reveal moved Shards. this.shardsDiscoverCache.refreshAndWait(), Math.round((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) * (0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter)) * 2), "Timed out while waiting for whole-Cluster Shards discovery.").catch((error) => this.options.loggers.swallowedErrorLogger({ where: `${this.constructor.name}.rediscoverCluster`, error, elapsed: Math.round(performance.now() - startTime), importance: "normal", })); } /** * Runs Island#rediscover() after a delay. * * Multiple concurrent calls to this method will be coalesced into one * (including the delay period): * 1. This protects against the burst of rediscover requests caused by * multiple failing concurrent queries. * 2. It also allows to keep the queries batched when they are retried (i.e. * the whole batch will be retried, not individual queries). */ async rediscoverIsland(island) { await delay((0, misc_1.maybeCall)(this.options.runOnShardErrorRediscoverIslandDelayMs)); // We don't want to wait forever if the Island is completely down. const startTime = performance.now(); await (0, p_timeout_1.default)(island.rediscover(), Math.round((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalMs) * (0, misc_1.jitter)((0, misc_1.maybeCall)(this.options.shardsDiscoverIntervalJitter)) * 2), `Timed out while waiting for Island ${island.no} Shards discovery.`).catch((error) => this.options.loggers.swallowedErrorLogger({ where: `${this.constructor.name}.rediscoverIsland(${island.no})`, error, elapsed: Math.round(performance.now() - startTime), importance: "normal", })); } /** * Runs the actual Shards discovery queries over all Islands and updates the * mapping from each Shard number to an Island where it lives. These queries * may be expensive, so it's expected that the returned Promise is heavily * cached by the caller code. */ async shardsDiscoverExpensive() { const islands = await this.islandsCache.cached(); const seenKeys = new Set(); const islandNoToIsland = new Map(islands.map(({ no, nodes }) => { const clients = nodes.map((node) => { const [client, key] = this.clientRegistry.getOrCreate(node); seenKeys.add(key); this.prewarmEnabled && client.prewarm(); return client; }); const [island, key] = this.islandRegistry.getOrCreate({ no, nodes, clients, }); seenKeys.add(key); return [no, island]; })); const shardNoToIslandNo = new Map(); const nonGlobalShardNos = []; const errors = []; const shards = []; await (0, misc_1.mapJoin)([...islandNoToIsland.entries()], async ([islandNo, island]) => { errors.push(...(await island.rediscover())); for (const shard of island.shards()) { shards.push(shard); const otherIslandNo = shardNoToIslandNo.get(shard.no); if (otherIslandNo !== undefined) { throw Error(`Shard #${shard.no} exists in more than one island: ` + islandNoToIsland.get(otherIslandNo)?.master().options.name + `(${otherIslandNo})` + " and " + island.master().options.name + `(${islandNo})`); } shardNoToIslandNo.set(shard.no, islandNo); if (shard.no !== 0) { nonGlobalShardNos.push(shard.no); } } }); // Assign the last known Island number to all Shards synchronously. for (const shard of shards) { shard.lastKnownIslandNo = shardNoToIslandNo.get(shard.no) ?? null; } // Gracefully delete and disconnect the Clients which didn't correspond to // the list of nodes mentioned in this.options.islands, and also, delete // leftover Islands which are not used anymore. In case we don't reach this // point and threw earlier, it will eventually be reached on the next Shards // discovery iterations. for (const registry of [this.clientRegistry, this.islandRegistry]) { (0, misc_1.runInVoid)(registry.deleteExcept(seenKeys)); } // Return the updated ENTIRE snapshot. return { islandNoToIsland, shardNoToIslandNo, nonGlobalShardNos: nonGlobalShardNos.sort((a, b) => a - b), errors, }; } } exports.Cluster = Cluster; /** Default values for the constructor options. */ Cluster.DEFAULT_OPTIONS = { localCache: null, shardNamer: null, shardsDiscoverIntervalMs: 10000, shardsDiscoverIntervalJitter: 0.2, reloadIslandsIntervalMs: NaN, runOnShardErrorRetryCount: 2, runOnShardErrorRediscoverClusterDelayMs: 1000, runOnShardErrorRediscoverIslandDelayMs: 5000, }; __decorate([ (0, fast_typescript_memoize_1.Memoize)() ], Cluster.prototype, "shardByNo", null); __decorate([ (0, fast_typescript_memoize_1.Memoize)({ clearOnResolve: true }) ], Cluster.prototype, "rediscoverCluster", null); __decorate([ (0, fast_typescript_memoize_1.Memoize)((island) => island.no, { clearOnResolve: true }) ], Cluster.prototype, "rediscoverIsland", null); //# sourceMappingURL=Cluster.js.map