@clickup/ent-framework
Version:
A PostgreSQL graph-database-alike library with microsharding and row-level security
343 lines (319 loc) • 14.5 kB
text/typescript
import compact from "lodash/compact";
import defaults from "lodash/defaults";
import first from "lodash/first";
import sample from "lodash/sample";
import sortBy from "lodash/sortBy";
import pTimeout from "p-timeout";
import type { PickPartial } from "../internal/misc";
import { mapJoin, nullthrows } from "../internal/misc";
import type { Client, ClientRole } from "./Client";
import type { LocalCache } from "./LocalCache";
import type { SwallowedErrorLoggerProps } from "./Loggers";
import type { Shard } from "./Shard";
/**
* The list of Clients grouped into master, replica and unknown groups. In each
* group, there are healthy and unhealthy Clients.
*/
type ClassifiedClients<TClient extends Client> = Record<
ClientRole,
{ healthy: TClient[]; unhealthy: TClient[] }
>;
/**
* Options for Island constructor.
*/
export interface IslandOptions<TClient extends Client> {
/** Island number. */
no: number;
/** Clients of that Island (the order is arbitrary). */
clients: readonly TClient[];
/** Should return a Memoize'd Shards object by its number. */
createShard: (no: number) => Shard<TClient>;
/** An auxillary LocalCache used to fallback-infer master/replica role in case
* some Client is unavailable right now. */
localCache?: LocalCache<{
address: string;
role: ClientRole;
// We must not put non-deterministic fields (like timestamps) here,
// otherwise the cache file will be overwritten over and over even when
// writing the same data.
}> | null;
/** If nonzero, runs the second shardNos() call attempt on a Client if the 1st
* call on that Client gets stuck for longer than the provided number of ms.
*
* This option is used to detect the unhealthy DB connection quicker, and
* thus, exit from rediscover() faster (the Shards map can likely be loaded
* from a replica still, so the down DB is not the end of the world). The idea
* is that the 1st shardNos() could get stuck due to the load balancer trying
* to wait until the DB goes back up again (e.g. for PgBouncer, that is
* query_wait_timeout situation; "pause_client" is printed to PgBouncer debug
* logs, and then the Client gets frozen for up to query_wait_timeout; other
* engines may have similar behavior). But for the NEW connections/queries,
* after a small delay, the load balancer may realize that the DB is really
* down (the load balancer typically can get "connection refused" while
* connecting to the DB server really quickly), and the 2nd shardNos() call
* will reject almost immediately ("fast fail" workflow), way before the 1st
* call rejects (e.g. for PgBouncer and query_wait_timeout=15s, the 1st call
* may get stuck for up to 15 seconds!). So, we will not wait that long to
* figure out that the DB is down, and will detect that situation quicker.
*
* Typically, the connection attempt from load balancer to an unhealthy DB
* server ends up quickly with "connection refused" TCP error (e.g. when the
* load balancer and the DB server run on the same host), so the value in this
* option can be small. But not always. Sometimes, the new connection from
* load balancer to the DB server gets stuck in "connecting..." state (e.g.
* this happens when the load balancer runs in a Docker container, and the DB
* container gets killed; the connection attempt will eventually fail, but in
* 1+ minutes and with "no route to host" error). In this case, the value in
* the option must be greater than e.g. server_connect_timeout (example for
* PgBouncer; basically, server_connect_timeout is PgBouncer's tool to detect
* "stuck" connection attempts (the connections which don't get "connection
* refused" quickly). */
shardNosConcurrentRetryDelayMs?: number;
}
/**
* Island is a moderately short-lived collection of DB connections (represented
* as Clients) that contains a single master Client and any number of replicas.
*
* - In normal situations, you don't likely need to work with Islands directly,
* you can rely on higher level abstractions which support automatic
* rediscovery and retries: Ent (or lower level Shard and Schema).
* - Islands are helpful mostly when working with cross-Shards logic.
* - Island is somewhat temporary: if the Cluster is reconfigured in real-time,
* then its Island objects may be recycled and re-created, and the
* corresponding Clients may be ended. This also applies to any given Client
* instance. Don't retain and reuse those objects for too long. The reliable
* abstractions (resilient to disconnects, shards migration, failover etc.)
* start from Shard level.
* - There is no guarantee that the data returned by shards(), master() or
* replica() will be up to date. Shards may be just migrated to another
* Island. Master may become a replica, or vice versa.
*/
export class Island<TClient extends Client> {
/** Default values for the constructor options. */
static readonly DEFAULT_OPTIONS: Required<
PickPartial<IslandOptions<Client>>
> = {
localCache: null,
// https://willbryant.net/overriding_the_default_linux_kernel_20_second_tcp_socket_connect_timeout
// Convenient when tcp_syn_retries=1 is set in docker-compose.yml file:
// "Linux ... sends 5 SYNs ... the retries are after 3s, 6s, 12s, 24s"
shardNosConcurrentRetryDelayMs: 3500,
};
/** Clients grouped based on their roles and health. */
private classifiedClients!: ClassifiedClients<TClient>;
/** In case shardNos discovery for some Client hasn't succeeded yet, and thus,
* we are not sure about the role of that Client, then we try to load the role
* from fallback cache in this map and use further instead of "unknown". */
private fallbackRoles = new WeakMap<TClient, ClientRole>();
/** Recently discovered Shard numbers. */
private shardNos: number[] | null = null;
/** Island configuration options. */
readonly options: Required<IslandOptions<TClient>>;
/**
* Initializes the Island by copying the Client references into it.
*/
constructor(options: IslandOptions<TClient>) {
this.options = defaults({}, options, Island.DEFAULT_OPTIONS);
if (options.clients.length === 0) {
throw Error("Island does not have nodes");
}
this.reclassifyClients();
}
/**
* Island number.
*/
get no(): number {
return this.options.no;
}
/**
* The list of Clients in this Island. No assumptions about the order.
*/
get clients(): readonly TClient[] {
return this.options.clients;
}
/**
* Queries for Shards on the best available Client (preferably master, then
* replicas) and stores the result internally, available for the further
* shards() call.
* - If some Clients are unavailable, tries its best to infer the data from
* other Clients.
* - The method queries ALL clients in parallel, because the caller logic
* anyways needs to know, who's master and who's replica, as a side effect
* of the very 1st query after the Client creation. We infer that as a piggy
* back after calling Client#shardNos().
* - In case we could not discover shards, returns the list of errors happened
* during the discovery.
*/
async rediscover(): Promise<SwallowedErrorLoggerProps[]> {
// Load fallback roles as early as possible (since shardNo() queries below
// may take a lot of time in case they time out).
await mapJoin(this.clients, async (client) => {
if (!this.fallbackRoles.has(client)) {
const fallback = await this.options.localCache?.get(client.address());
if (fallback) {
this.fallbackRoles.set(client, fallback.role);
}
}
});
// We don't use Promise.race() here! We really want to wait until ALL
// clients either respond or reject, which is what mapJoin() is doing. If we
// used Promise.race(), then timing out Clients could've been requested by
// the caller logic concurrently over and over, so the number of pending
// requests to them would grow. We want to control that parallelism.
const errors: SwallowedErrorLoggerProps[] = [];
const res = sortBy(
compact(
await mapJoin(this.clients, async (client) => {
const startTime = performance.now();
try {
const shardNos = await this.clientShardNos(client);
const address = client.address();
const role = client.role();
await this.options.localCache?.set(address, { address, role });
this.fallbackRoles.set(client, role);
return { role, shardNos };
} catch (error: unknown) {
errors.push({
where: `${client.constructor.name}(${client.options.name}).shardNos`,
error,
elapsed: Math.round(performance.now() - startTime),
importance: "low",
});
client.options.loggers?.swallowedErrorLogger(
errors[errors.length - 1],
);
return null;
}
}),
),
({ role }) => (role === "master" ? 0 : role === "replica" ? 1 : 2),
({ shardNos }) => -1 * shardNos.length,
);
this.reclassifyClients();
if (res.length > 0) {
this.shardNos = [...res[0].shardNos].sort((a, b) => a - b);
return [];
} else {
// Being unable to access all DB Clients is not a critical error here,
// we'll just miss some Shards (and other Shards will work). DO NOT throw
// through here yet! This needs to be addressed holistically and with
// careful retries. Also, we have Shards rediscovery every N seconds, so a
// missing Island will self-heal eventually.
this.shardNos = [];
return errors;
}
}
/**
* Returns the currently best-known Shards on this Island. This method is
* needed only when working with cross-Shards logic; in normal situations, it
* is not called much.
*/
shards(): Array<Shard<TClient>> {
const shardNos = nullthrows(
this.shardNos,
"Before shards() can be used, rediscover() must finish",
);
return shardNos.map((no) => this.options.createShard(no));
}
/**
* Returns the currently best-known master Client among the Clients of this
* Island.
*
* - If all masters are unhealthy, we still return one of them and prefer not
* to fall back on a replica, because otherwise, we'll see non-obvious
* errors in logs ("can't write in a read-only Client" or so) and suspect
* that there is a bug in logic, although there is really no bug, it's just
* the master node went down. It's way better to throw a straightforward
* error like "Client is down".
* - If we can't find a master, but there is a list of Clients with unknown
* roles, prefer returning one of them vs. any known replica, since there is
* a chance that among those unknown Clients, there will be a master.
* - In case all Clients are read-only (replicas), still returns the 1st of
* them, assuming that it's better to throw at the caller side on a failed
* write (at worst) rather than here. It is not common to have an Island
* without a master Client, that happens only temporarily during
* failover/switchover, so the caller will likely rediscover and find a new
* master on a next retry.
*/
master(): TClient {
if (this.clients.length === 1) {
return first(this.clients)!;
}
const master = sample(this.classifiedClients.master.healthy);
if (master && master.role() === "master" && !master.connectionIssue()) {
// Fast and most frequent path.
return master;
}
this.reclassifyClients();
return (
sample(this.classifiedClients.master.healthy) ??
sample(this.classifiedClients.master.unhealthy) ?? // prefer unhealthy master
sample(this.classifiedClients.unknown.healthy) ??
sample(this.classifiedClients.unknown.unhealthy) ??
sample(this.classifiedClients.replica.healthy) ??
sample(this.classifiedClients.replica.unhealthy) ??
first(this.clients)! // should never reach here
);
}
/**
* Returns a currently best-known random replica Client. In case there are no
* replicas, returns the master Client.
*/
replica(): TClient {
if (this.clients.length === 1) {
return first(this.clients)!;
}
const replica = sample(this.classifiedClients.replica.healthy);
if (replica && replica.role() === "replica" && !replica.connectionIssue()) {
// Fast and most frequent path.
return replica;
}
this.reclassifyClients();
return (
sample(this.classifiedClients.replica.healthy) ??
sample(this.classifiedClients.unknown.healthy) ??
sample(this.classifiedClients.master.healthy) ??
sample(this.classifiedClients.replica.unhealthy) ??
sample(this.classifiedClients.unknown.unhealthy) ??
sample(this.classifiedClients.master.unhealthy) ??
first(this.clients)! // should never reach here
);
}
/**
* Updates the list of classified Clients. We try hard to not put Clients in
* "unknown" group by falling back to fallbackRoles.
*/
private reclassifyClients(): void {
const classifiedClients: ClassifiedClients<TClient> = {
master: { healthy: [], unhealthy: [] },
unknown: { healthy: [], unhealthy: [] },
replica: { healthy: [], unhealthy: [] },
};
for (const client of this.clients) {
const health = client.connectionIssue() ? "unhealthy" : "healthy";
const role = client.role();
classifiedClients[
role === "unknown" ? this.fallbackRoles.get(client) ?? role : role
][health].push(client);
}
this.classifiedClients = classifiedClients;
}
/**
* Tries to pull shardNos() out of the Client and fail fast if the DB is down.
* See details in shardNosConcurrentRetryDelayMs option description.
*/
private async clientShardNos(client: TClient): Promise<readonly number[]> {
if (this.options.shardNosConcurrentRetryDelayMs === 0) {
return client.shardNos();
}
const promise = client.shardNos();
const maybeShardNos = await pTimeout(
promise,
this.options.shardNosConcurrentRetryDelayMs,
() => "TIMEOUT" as const,
);
return maybeShardNos instanceof Array
? maybeShardNos
: Promise.race([promise, client.shardNos()]); // try again once
}
}