@clickup/ent-framework
Version:
A PostgreSQL graph-database-alike library with microsharding and row-level security
155 lines • 8.13 kB
TypeScript
import type { PickPartial } from "../internal/misc";
import type { Client, ClientRole } from "./Client";
import type { LocalCache } from "./LocalCache";
import type { SwallowedErrorLoggerProps } from "./Loggers";
import type { Shard } from "./Shard";
/**
* Options for Island constructor.
*/
export interface IslandOptions<TClient extends Client> {
/** Island number. */
no: number;
/** Clients of that Island (the order is arbitrary). */
clients: readonly TClient[];
/** Should return a Memoize'd Shards object by its number. */
createShard: (no: number) => Shard<TClient>;
/** An auxillary LocalCache used to fallback-infer master/replica role in case
* some Client is unavailable right now. */
localCache?: LocalCache<{
address: string;
role: ClientRole;
}> | null;
/** If nonzero, runs the second shardNos() call attempt on a Client if the 1st
* call on that Client gets stuck for longer than the provided number of ms.
*
* This option is used to detect the unhealthy DB connection quicker, and
* thus, exit from rediscover() faster (the Shards map can likely be loaded
* from a replica still, so the down DB is not the end of the world). The idea
* is that the 1st shardNos() could get stuck due to the load balancer trying
* to wait until the DB goes back up again (e.g. for PgBouncer, that is
* query_wait_timeout situation; "pause_client" is printed to PgBouncer debug
* logs, and then the Client gets frozen for up to query_wait_timeout; other
* engines may have similar behavior). But for the NEW connections/queries,
* after a small delay, the load balancer may realize that the DB is really
* down (the load balancer typically can get "connection refused" while
* connecting to the DB server really quickly), and the 2nd shardNos() call
* will reject almost immediately ("fast fail" workflow), way before the 1st
* call rejects (e.g. for PgBouncer and query_wait_timeout=15s, the 1st call
* may get stuck for up to 15 seconds!). So, we will not wait that long to
* figure out that the DB is down, and will detect that situation quicker.
*
* Typically, the connection attempt from load balancer to an unhealthy DB
* server ends up quickly with "connection refused" TCP error (e.g. when the
* load balancer and the DB server run on the same host), so the value in this
* option can be small. But not always. Sometimes, the new connection from
* load balancer to the DB server gets stuck in "connecting..." state (e.g.
* this happens when the load balancer runs in a Docker container, and the DB
* container gets killed; the connection attempt will eventually fail, but in
* 1+ minutes and with "no route to host" error). In this case, the value in
* the option must be greater than e.g. server_connect_timeout (example for
* PgBouncer; basically, server_connect_timeout is PgBouncer's tool to detect
* "stuck" connection attempts (the connections which don't get "connection
* refused" quickly). */
shardNosConcurrentRetryDelayMs?: number;
}
/**
* Island is a moderately short-lived collection of DB connections (represented
* as Clients) that contains a single master Client and any number of replicas.
*
* - In normal situations, you don't likely need to work with Islands directly,
* you can rely on higher level abstractions which support automatic
* rediscovery and retries: Ent (or lower level Shard and Schema).
* - Islands are helpful mostly when working with cross-Shards logic.
* - Island is somewhat temporary: if the Cluster is reconfigured in real-time,
* then its Island objects may be recycled and re-created, and the
* corresponding Clients may be ended. This also applies to any given Client
* instance. Don't retain and reuse those objects for too long. The reliable
* abstractions (resilient to disconnects, shards migration, failover etc.)
* start from Shard level.
* - There is no guarantee that the data returned by shards(), master() or
* replica() will be up to date. Shards may be just migrated to another
* Island. Master may become a replica, or vice versa.
*/
export declare class Island<TClient extends Client> {
/** Default values for the constructor options. */
static readonly DEFAULT_OPTIONS: Required<PickPartial<IslandOptions<Client>>>;
/** Clients grouped based on their roles and health. */
private classifiedClients;
/** In case shardNos discovery for some Client hasn't succeeded yet, and thus,
* we are not sure about the role of that Client, then we try to load the role
* from fallback cache in this map and use further instead of "unknown". */
private fallbackRoles;
/** Recently discovered Shard numbers. */
private shardNos;
/** Island configuration options. */
readonly options: Required<IslandOptions<TClient>>;
/**
* Initializes the Island by copying the Client references into it.
*/
constructor(options: IslandOptions<TClient>);
/**
* Island number.
*/
get no(): number;
/**
* The list of Clients in this Island. No assumptions about the order.
*/
get clients(): readonly TClient[];
/**
* Queries for Shards on the best available Client (preferably master, then
* replicas) and stores the result internally, available for the further
* shards() call.
* - If some Clients are unavailable, tries its best to infer the data from
* other Clients.
* - The method queries ALL clients in parallel, because the caller logic
* anyways needs to know, who's master and who's replica, as a side effect
* of the very 1st query after the Client creation. We infer that as a piggy
* back after calling Client#shardNos().
* - In case we could not discover shards, returns the list of errors happened
* during the discovery.
*/
rediscover(): Promise<SwallowedErrorLoggerProps[]>;
/**
* Returns the currently best-known Shards on this Island. This method is
* needed only when working with cross-Shards logic; in normal situations, it
* is not called much.
*/
shards(): Array<Shard<TClient>>;
/**
* Returns the currently best-known master Client among the Clients of this
* Island.
*
* - If all masters are unhealthy, we still return one of them and prefer not
* to fall back on a replica, because otherwise, we'll see non-obvious
* errors in logs ("can't write in a read-only Client" or so) and suspect
* that there is a bug in logic, although there is really no bug, it's just
* the master node went down. It's way better to throw a straightforward
* error like "Client is down".
* - If we can't find a master, but there is a list of Clients with unknown
* roles, prefer returning one of them vs. any known replica, since there is
* a chance that among those unknown Clients, there will be a master.
* - In case all Clients are read-only (replicas), still returns the 1st of
* them, assuming that it's better to throw at the caller side on a failed
* write (at worst) rather than here. It is not common to have an Island
* without a master Client, that happens only temporarily during
* failover/switchover, so the caller will likely rediscover and find a new
* master on a next retry.
*/
master(): TClient;
/**
* Returns a currently best-known random replica Client. In case there are no
* replicas, returns the master Client.
*/
replica(): TClient;
/**
* Updates the list of classified Clients. We try hard to not put Clients in
* "unknown" group by falling back to fallbackRoles.
*/
private reclassifyClients;
/**
* Tries to pull shardNos() out of the Client and fail fast if the DB is down.
* See details in shardNosConcurrentRetryDelayMs option description.
*/
private clientShardNos;
}
//# sourceMappingURL=Island.d.ts.map