@clickup/ent-framework
Version:
A PostgreSQL graph-database-alike library with microsharding and row-level security
650 lines (609 loc) • 25 kB
text/typescript
import { types } from "util";
import delayMod from "delay";
import { Memoize } from "fast-typescript-memoize";
import defaults from "lodash/defaults";
import random from "lodash/random";
import pTimeout from "p-timeout";
import { CachedRefreshedValue } from "../internal/CachedRefreshedValue";
import type {
Writeable,
DesperateAny,
MaybeCallable,
MaybeError,
PickPartial,
MaybeAsyncCallable,
} from "../internal/misc";
import {
nullthrows,
mapJoin,
runInVoid,
objectHash,
maybeCall,
jsonHash,
jitter,
maybeAsyncCall,
} from "../internal/misc";
import { Registry } from "../internal/Registry";
import type { Client } from "./Client";
import { ClientError } from "./ClientError";
import { Island } from "./Island";
import type { LocalCache } from "./LocalCache";
import type { Loggers, SwallowedErrorLoggerProps } from "./Loggers";
import { Shard } from "./Shard";
import { ShardIsNotDiscoverableError } from "./ShardIsNotDiscoverableError";
import type { ShardNamer } from "./ShardNamer";
/** Same as vanilla delay(), but with unref()ed timers. */
const delay = delayMod.createWithTimers({
setTimeout: (...args) => setTimeout(...args).unref(),
clearTimeout: (...args) => clearTimeout(...args),
});
/**
* Options for Cluster constructor.
*/
export interface ClusterOptions<TClient extends Client, TNode> {
/** Islands configuration of the Cluster. */
islands: MaybeAsyncCallable<ClusterIslands<TNode>>;
/** Given a node of some Island, instantiates a Client for this node. Called
* when a new node appears in the Cluster statically or dynamically. */
createClient: (node: TNode) => TClient;
/** Loggers to be injected into all Clients returned by createClient(). */
loggers: Loggers;
/** An instance of LocalCache which may be used for auxiliary purposes when
* discovering Shards/Clients. */
localCache?: LocalCache | null;
/** How often to recheck for changes in `options.islands`. If it is SYNC, then
* by default - often, like every 500 ms (since it's assumed that
* `options.islands` calculation is cheap). If it is ASYNC, then by default -
* not so often, every `shardsDiscoverIntervalMs` (we assume that getting the
* list of Island nodes may be expensive, e.g. fetching from AWS API or so).
* If the Islands list here changes, then we trigger Shards rediscovery and
* Clients recreation ASAP. */
reloadIslandsIntervalMs?: MaybeCallable<number>;
/** Info on how to build/parse Shard names. */
shardNamer?: ShardNamer | null;
/** How often to run Shards rediscovery in normal circumstances. */
shardsDiscoverIntervalMs?: MaybeCallable<number>;
/** Jitter for shardsDiscoverIntervalMs and reloadIslandsIntervalMs. */
shardsDiscoverIntervalJitter?: MaybeCallable<number>;
/** Used in the following situations:
* 1. If we think that we know the Island of a particular Shard, but an
* attempt to access it fails. This means that maybe the Shard is migrating
* to another Island. So, we wait a bit and retry that many times. We
* should not do it too many times though, because all DB requests will be
* blocked waiting for the resolution.
* 2. If we sent a WRITE request to a Client, but it appeared that this Client
* is a replica, and the master moved to some other Client. In this case,
* we wait a bit and ping all Clients of the Island to refresh, who is
* master and who is replica. */
runOnShardErrorRetryCount?: MaybeCallable<number>;
/** How much time to wait before we retry rediscovering the entire Cluster
* after a Shard-to-Island resolution error. The time here should be just
* enough to wait for switching the Shard from one Island to another
* (typically quick). */
runOnShardErrorRediscoverClusterDelayMs?: MaybeCallable<number>;
/** How much time to wait before sending discover requests to all Clients of
* the Island trying to find the new master (or to reconnect). The time here
* may reach several seconds, since some DBs shut down the old master and
* promote some replica to it not simultaneously. */
runOnShardErrorRediscoverIslandDelayMs?: MaybeCallable<number>;
}
/**
* A type of `ClusterOptions#islands` property. Represents the full list of
* Islands and their corresponding Nodes (masters and replicas).
*/
export type ClusterIslands<TNode> = ReadonlyArray<{
no: number;
nodes: readonly TNode[];
}>;
/**
* Holds the complete auto-discovered and non-contradictory snapshot of Islands
* and a map of Shards to figure out, which Island each Shard is located on.
* Also, includes all errors which caused some Islands to be completely
* undiscoverable (i.e. if we could not locate Shards on master and all
* replicas, so we gave up for that Island till the next rediscovery).
*/
interface ShardsDiscovered<TClient extends Client> {
islandNoToIsland: Map<number, Island<TClient>>;
shardNoToIslandNo: ReadonlyMap<number, number>;
nonGlobalShardNos: readonly number[];
errors: SwallowedErrorLoggerProps[];
}
/**
* Cluster is a collection of Islands and an orchestration of shardNo -> Island
* resolution.
*
* It's unknown beforehand, which Island some particular Shard belongs to; the
* resolution is done asynchronously and lazily.
*
* Shard 0 is a special "global" Shard.
*/
export class Cluster<TClient extends Client, TNode = DesperateAny> {
/** Default values for the constructor options. */
static readonly DEFAULT_OPTIONS: Required<
PickPartial<ClusterOptions<Client, never>>
> = {
localCache: null,
shardNamer: null,
shardsDiscoverIntervalMs: 10000,
shardsDiscoverIntervalJitter: 0.2,
reloadIslandsIntervalMs: NaN,
runOnShardErrorRetryCount: 2,
runOnShardErrorRediscoverClusterDelayMs: 1000,
runOnShardErrorRediscoverIslandDelayMs: 5000,
};
/** The complete registry of all initialized Clients. Cluster nodes may change
* at runtime, so once a new node appears, its Client is added to the
* registry. Also, the Clients of disappeared nodes are eventually removed
* from the registry on the next Shards discovery. */
private clientRegistry: Registry<TNode, Client>;
/** The complete registry of all Islands ever created. If some Island changes
* configuration, its old version is eventually removed from the registry
* during the next Shards discovery. */
private islandRegistry: Registry<
{ no: number; nodes: readonly TNode[]; clients: readonly Client[] },
Island<Client>
>;
/** Represents the result of the recent successful call to
* `options.islands()`. */
private islandsCache: CachedRefreshedValue<ClusterIslands<TNode>>;
/** Represents the result of the recent successful Shards discovery. */
private shardsDiscoverCache: CachedRefreshedValue<ShardsDiscovered<TClient>>;
/** Once set to true, Clients for newly appearing nodes will be pre-warmed. */
private prewarmEnabled = false;
/** Cluster configuration options. */
readonly options: Required<ClusterOptions<TClient, TNode>>;
/**
* Initializes the Cluster, but doesn't send any queries yet, even discovery
* queries (also, no implicit prewarming).
*/
constructor(options: ClusterOptions<TClient, TNode>) {
this.options = defaults({}, options, Cluster.DEFAULT_OPTIONS);
if (
typeof this.options.reloadIslandsIntervalMs === "number" &&
isNaN(this.options.reloadIslandsIntervalMs)
) {
this.options.reloadIslandsIntervalMs = types.isAsyncFunction(
this.options.islands,
)
? maybeCall(this.options.shardsDiscoverIntervalMs)
: 500;
}
this.clientRegistry = new Registry<TNode, Client>({
key: (node) => jsonHash(node),
create: (node) => {
const client = this.options.createClient(node);
client.options.shardNamer ??= this.options.shardNamer;
const loggers = { ...client.options.loggers };
client.options.loggers = {
swallowedErrorLogger: (props) => {
this.options.loggers.swallowedErrorLogger(props);
loggers.swallowedErrorLogger?.(props);
},
clientQueryLogger: (props) => {
this.options.loggers.clientQueryLogger?.(props);
loggers.clientQueryLogger?.(props);
},
};
return client;
},
end: async (client) => {
const startTime = performance.now();
await client.end().catch((error) =>
this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.clientRegistry`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "normal",
}),
);
},
});
this.islandRegistry = new Registry({
key: ({ no, nodes }) => jsonHash({ no, nodes }),
create: ({ no, clients }) =>
new Island({
no,
clients,
createShard: (no) => this.shardByNo(no),
localCache: this.options.localCache ?? undefined,
}),
});
this.islandsCache = new CachedRefreshedValue({
delayMs: () =>
Math.round(
maybeCall(this.options.reloadIslandsIntervalMs) *
jitter(maybeCall(this.options.shardsDiscoverIntervalJitter)),
),
warningTimeoutMs: () => maybeCall(this.options.reloadIslandsIntervalMs),
deps: {
// If `options.islands` is reassigned externally (e.g. in a unit test),
// then it will be reflected in `await this.islandsCache.cached()`
// within `deps.delayMs` (not immediately). To expedite this (and Shards
// map) recheck, call Cluster#rediscover().
delayMs: 50,
// We use the value of `options.islands` itself as a dependency, so if
// `options.islands` is reassigned externally, then we'll catch the
// change quickly, within `deps.delayMs`. We do NOT call
// `options.islands()` intentionally, we use its value - since we just
// want to check for reassignment (e.g. in unit tests).
handler: () => this.options.islands,
},
resolverName: "Cluster#options.islands",
resolverFn: async () => maybeAsyncCall(this.options.islands),
delay,
onError: (error, elapsed) =>
this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.islandsCache`,
error,
elapsed,
importance: "normal",
}),
});
this.shardsDiscoverCache = new CachedRefreshedValue({
delayMs: () =>
Math.round(
maybeCall(this.options.shardsDiscoverIntervalMs) *
jitter(maybeCall(this.options.shardsDiscoverIntervalJitter)),
),
warningTimeoutMs: () => maybeCall(this.options.shardsDiscoverIntervalMs),
deps: {
delayMs: () => maybeCall(this.options.reloadIslandsIntervalMs),
handler: async () =>
jsonHash(await maybeAsyncCall(this.islandsCache.cached())),
},
resolverName: "Cluster#shardsDiscoverExpensive",
resolverFn: async () => this.shardsDiscoverExpensive(),
delay,
onError: (error, elapsed) =>
this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.shardsDiscoverCache`,
error,
elapsed,
importance: "normal",
}),
});
}
/**
* Signals the Cluster to keep the Clients pre-warmed, e.g. open. (It's up to
* the particular Client's implementation, what does a "pre-warmed Client"
* mean; typically, it's keeping some minimal number of pooled connections.)
*
* Except when `randomizedDelayMs` is passed as 0, the actual prewarm (and
* Islands discovery) queries will run with a randomized delay between N/2 and
* N ms. It is better to operate in such mode: if multiple Node processes
* start simultaneously in the cluster, then the randomization helps to avoid
* new connections burst (new connections establishment is expensive for e.g.
* pgbouncer or when DB is accessed over SSL).
*/
prewarm(
randomizedDelayMs: number = 5000,
onInitialPrewarm?: (delayMs: number) => void,
): void {
if (this.prewarmEnabled) {
return;
}
this.prewarmEnabled = true;
const initialDelayMs = randomizedDelayMs
? Math.round(random(randomizedDelayMs / 2, randomizedDelayMs))
: 0;
setTimeout(
() =>
runInVoid(async () => {
onInitialPrewarm?.(initialDelayMs);
for (const island of await this.islands()) {
for (const client of island.clients) {
client.prewarm();
}
}
}),
initialDelayMs,
).unref();
}
/**
* Returns a global Shard of the Cluster. This method is made synchronous
* intentionally, to defer the I/O and possible errors to the moment of the
* actual query.
*/
globalShard(): Shard<TClient> {
return this.shardByNo(0);
}
/**
* Returns all currently known (discovered) non-global Shards in the Cluster.
*/
async nonGlobalShards(): Promise<ReadonlyArray<Shard<TClient>>> {
const { nonGlobalShardNos } = await this.shardsDiscoverCache.cached();
return nonGlobalShardNos.map((shardNo) => this.shardByNo(shardNo));
}
/**
* Returns Shard of a particular id. This method is made synchronous
* intentionally, to defer the I/O and possible errors to the moment of the
* actual query.
*
* Why is it important? Because Shards may go up and down temporarily at
* random moments of time. Imagine we made this method async and asserted that
* the Shard is actually available at the moment when the method is called.
* What would happen if the Shard object was stored somewhere as "successful"
* by the caller, then the Island went down, and then a query is sent to the
* Shard in, say, 20 seconds? We'd get an absolutely different exception, at
* the moment of the query. We don't want this to happen: we want all of the
* exceptions to be thrown with a consistent call stack (e.g. at the moment of
* the query), no matter whether it was an immediate call or a deferred one.
*/
shard(id: string): Shard<TClient> {
return this.shardByNo(
this.options.shardNamer ? this.options.shardNamer.shardNoByID(id) : 0,
);
}
/**
* Returns a Shard if we know its number. The idea: for each Shard number
* (even for non-discovered yet Shards), we keep the corresponding Shard
* object in a Memoize cache, so Shards with the same number always resolve
* into the same Shard object. Then, an actual Island locating process happens
* when the caller wants to get a Client of that Shard (and it throws if such
* Shard hasn't been discovered actually).
*/
()
shardByNo(shardNo: number): Shard<TClient> {
return new Shard(shardNo, this.runOnShard.bind(this));
}
/**
* Returns a random Shard among the ones which are currently known
* (discovered) in the Cluster.
*/
async randomShard(seed?: object): Promise<Shard<TClient>> {
const { nonGlobalShardNos } = await this.shardsDiscoverCache.cached();
let index;
if (seed !== undefined) {
const numHash = objectHash(seed).readUInt32BE();
index = numHash % nonGlobalShardNos.length;
} else {
// TODO: implement power-of-two algorithm to pick the Shard which is
// smallest in size.
index = random(0, nonGlobalShardNos.length - 1);
}
return this.shardByNo(
nullthrows(
nonGlobalShardNos[index],
() => "There are no non-global Shards in the Cluster",
),
);
}
/**
* Returns an Island by its number.
*/
async island(islandNo: number): Promise<Island<TClient>> {
const { islandNoToIsland } = await this.shardsDiscoverCache.cached();
return nullthrows(
islandNoToIsland.get(islandNo),
() => `No such Island: ${islandNo}`,
);
}
/**
* Returns all Islands in the Cluster.
*/
async islands(): Promise<Array<Island<TClient>>> {
const { islandNoToIsland } = await this.shardsDiscoverCache.cached();
return [...islandNoToIsland.values()];
}
/**
* Triggers shards rediscovery and finishes as soon as it's done. To be used
* in unit tests mostly, because in real life, it's enough to just modify the
* cluster configuration.
*/
async rediscover(what?: "islands" | "shards"): Promise<void> {
if (!what || what === "islands") {
await this.islandsCache.refreshAndWait();
}
if (!what || what === "shards") {
await this.shardsDiscoverCache.refreshAndWait();
}
}
/**
* Runs the body function with retries. The Island injected into the body
* function is located automatically by the Shard number. In case of an error
* after any run attempt, calls onAttemptError().
*/
private async runOnShard<TRes>(
shardNo: number,
body: (island: Island<TClient>, attempt: number) => Promise<TRes>,
onAttemptError?: (error: unknown, attempt: number) => void,
): Promise<TRes> {
for (let attempt = 0; ; attempt++) {
let island: Island<TClient>;
try {
// Re-read Islands map on every retry, because it might change.
const startTime = performance.now();
const { shardNoToIslandNo, islandNoToIsland, errors } =
await this.shardsDiscoverCache.cached();
const islandNo = shardNoToIslandNo.get(shardNo);
if (islandNo === undefined) {
// Notice that we don't retry ShardIsNotDiscoverableError below (it's
// not a ClientError) to avoid DoS, since it could be e.g. a fake ID
// passed to us in some URL or something else. We still want to log it
// through runOnShardErrorLogger() though.
throw new ShardIsNotDiscoverableError(
shardNo,
errors,
[...islandNoToIsland.values()],
Math.round(performance.now() - startTime),
);
}
// Retry the entire call to body(), to let it re-elect Client if needed.
island = nullthrows(islandNoToIsland.get(islandNo));
return await body(island, attempt);
} catch (cause: unknown) {
const error = cause as MaybeError | ClientError;
if (typeof error?.stack === "string") {
const suffix = `\n after ${attempt + 1} attempt${attempt > 0 ? "s" : ""}`;
if (!error.stack.endsWith(suffix)) {
error.stack = error.stack.trimEnd() + suffix;
}
}
onAttemptError?.(error, attempt);
this.options.loggers.runOnShardErrorLogger?.({ error, attempt });
if (
!(error instanceof ClientError) ||
attempt >= maybeCall(this.options.runOnShardErrorRetryCount)
) {
throw error;
}
switch (error.postAction) {
case "rediscover-cluster":
await this.rediscoverCluster();
continue;
case "rediscover-island":
await this.rediscoverIsland(island!);
continue;
case "choose-another-client":
continue;
case "fail":
throw error;
}
}
}
}
/**
* Runs the whole-cluster rediscover after a delay, hoping that we'll load the
* new Shards-to-Island mapping.
*
* Multiple concurrent calls to this method will be coalesced into one
* (including the delay period):
* 1. This protects against the burst of rediscover requests caused by
* multiple failing concurrent queries.
* 2. It also allows to keep the queries batched when they are retried (i.e.
* the whole batch will be retried, not individual queries).
*/
({ clearOnResolve: true })
private async rediscoverCluster(): Promise<void> {
await delay(
maybeCall(this.options.runOnShardErrorRediscoverClusterDelayMs),
);
// We don't want to wait forever if some Island is completely down.
const startTime = performance.now();
await pTimeout(
// Notice that we intentionally DO NOT call `islandsCache#refreshAndWait()
// here: changes in the list of Islands never reveal moved Shards.
this.shardsDiscoverCache.refreshAndWait(),
Math.round(
maybeCall(this.options.shardsDiscoverIntervalMs) *
jitter(maybeCall(this.options.shardsDiscoverIntervalJitter)) *
2,
),
"Timed out while waiting for whole-Cluster Shards discovery.",
).catch((error) =>
this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.rediscoverCluster`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "normal",
}),
);
}
/**
* Runs Island#rediscover() after a delay.
*
* Multiple concurrent calls to this method will be coalesced into one
* (including the delay period):
* 1. This protects against the burst of rediscover requests caused by
* multiple failing concurrent queries.
* 2. It also allows to keep the queries batched when they are retried (i.e.
* the whole batch will be retried, not individual queries).
*/
((island) => island.no, { clearOnResolve: true })
private async rediscoverIsland(island: Island<TClient>): Promise<void> {
await delay(maybeCall(this.options.runOnShardErrorRediscoverIslandDelayMs));
// We don't want to wait forever if the Island is completely down.
const startTime = performance.now();
await pTimeout(
island.rediscover(),
Math.round(
maybeCall(this.options.shardsDiscoverIntervalMs) *
jitter(maybeCall(this.options.shardsDiscoverIntervalJitter)) *
2,
),
`Timed out while waiting for Island ${island.no} Shards discovery.`,
).catch((error) =>
this.options.loggers.swallowedErrorLogger({
where: `${this.constructor.name}.rediscoverIsland(${island.no})`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "normal",
}),
);
}
/**
* Runs the actual Shards discovery queries over all Islands and updates the
* mapping from each Shard number to an Island where it lives. These queries
* may be expensive, so it's expected that the returned Promise is heavily
* cached by the caller code.
*/
private async shardsDiscoverExpensive(): Promise<ShardsDiscovered<TClient>> {
const islands = await this.islandsCache.cached();
const seenKeys = new Set<string>();
const islandNoToIsland = new Map<number, Island<TClient>>(
islands.map(({ no, nodes }) => {
const clients = nodes.map((node) => {
const [client, key] = this.clientRegistry.getOrCreate(node);
seenKeys.add(key);
this.prewarmEnabled && client.prewarm();
return client;
});
const [island, key] = this.islandRegistry.getOrCreate({
no,
nodes,
clients,
});
seenKeys.add(key);
return [no, island as Island<TClient>];
}),
);
const shardNoToIslandNo = new Map<number, number>();
const nonGlobalShardNos: number[] = [];
const errors: SwallowedErrorLoggerProps[] = [];
const shards: Array<Shard<TClient>> = [];
await mapJoin(
[...islandNoToIsland.entries()],
async ([islandNo, island]) => {
errors.push(...(await island.rediscover()));
for (const shard of island.shards()) {
shards.push(shard);
const otherIslandNo = shardNoToIslandNo.get(shard.no);
if (otherIslandNo !== undefined) {
throw Error(
`Shard #${shard.no} exists in more than one island: ` +
islandNoToIsland.get(otherIslandNo)?.master().options.name +
`(${otherIslandNo})` +
" and " +
island.master().options.name +
`(${islandNo})`,
);
}
shardNoToIslandNo.set(shard.no, islandNo);
if (shard.no !== 0) {
nonGlobalShardNos.push(shard.no);
}
}
},
);
// Assign the last known Island number to all Shards synchronously.
for (const shard of shards) {
(shard as Writeable<Shard<TClient>>).lastKnownIslandNo =
shardNoToIslandNo.get(shard.no) ?? null;
}
// Gracefully delete and disconnect the Clients which didn't correspond to
// the list of nodes mentioned in this.options.islands, and also, delete
// leftover Islands which are not used anymore. In case we don't reach this
// point and threw earlier, it will eventually be reached on the next Shards
// discovery iterations.
for (const registry of [this.clientRegistry, this.islandRegistry]) {
runInVoid(registry.deleteExcept(seenKeys));
}
// Return the updated ENTIRE snapshot.
return {
islandNoToIsland,
shardNoToIslandNo,
nonGlobalShardNos: nonGlobalShardNos.sort((a, b) => a - b),
errors,
};
}
}