UNPKG

@clickup/ent-framework

Version:

A PostgreSQL graph-database-alike library with microsharding and row-level security

github.com/clickup/ent-framework

clickup/ent-framework

239 lines • 11.7 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Island = void 0; const compact_1 = __importDefault(require("lodash/compact")); const defaults_1 = __importDefault(require("lodash/defaults")); const first_1 = __importDefault(require("lodash/first")); const sample_1 = __importDefault(require("lodash/sample")); const sortBy_1 = __importDefault(require("lodash/sortBy")); const p_timeout_1 = __importDefault(require("p-timeout")); const misc_1 = require("../internal/misc"); /** * Island is a moderately short-lived collection of DB connections (represented * as Clients) that contains a single master Client and any number of replicas. * * - In normal situations, you don't likely need to work with Islands directly, * you can rely on higher level abstractions which support automatic * rediscovery and retries: Ent (or lower level Shard and Schema). * - Islands are helpful mostly when working with cross-Shards logic. * - Island is somewhat temporary: if the Cluster is reconfigured in real-time, * then its Island objects may be recycled and re-created, and the * corresponding Clients may be ended. This also applies to any given Client * instance. Don't retain and reuse those objects for too long. The reliable * abstractions (resilient to disconnects, shards migration, failover etc.) * start from Shard level. * - There is no guarantee that the data returned by shards(), master() or * replica() will be up to date. Shards may be just migrated to another * Island. Master may become a replica, or vice versa. */ class Island { /** * Initializes the Island by copying the Client references into it. */ constructor(options) { /** In case shardNos discovery for some Client hasn't succeeded yet, and thus, * we are not sure about the role of that Client, then we try to load the role * from fallback cache in this map and use further instead of "unknown". */ this.fallbackRoles = new WeakMap(); /** Recently discovered Shard numbers. */ this.shardNos = null; this.options = (0, defaults_1.default)({}, options, Island.DEFAULT_OPTIONS); if (options.clients.length === 0) { throw Error("Island does not have nodes"); } this.reclassifyClients(); } /** * Island number. */ get no() { return this.options.no; } /** * The list of Clients in this Island. No assumptions about the order. */ get clients() { return this.options.clients; } /** * Queries for Shards on the best available Client (preferably master, then * replicas) and stores the result internally, available for the further * shards() call. * - If some Clients are unavailable, tries its best to infer the data from * other Clients. * - The method queries ALL clients in parallel, because the caller logic * anyways needs to know, who's master and who's replica, as a side effect * of the very 1st query after the Client creation. We infer that as a piggy * back after calling Client#shardNos(). * - In case we could not discover shards, returns the list of errors happened * during the discovery. */ async rediscover() { // Load fallback roles as early as possible (since shardNo() queries below // may take a lot of time in case they time out). await (0, misc_1.mapJoin)(this.clients, async (client) => { if (!this.fallbackRoles.has(client)) { const fallback = await this.options.localCache?.get(client.address()); if (fallback) { this.fallbackRoles.set(client, fallback.role); } } }); // We don't use Promise.race() here! We really want to wait until ALL // clients either respond or reject, which is what mapJoin() is doing. If we // used Promise.race(), then timing out Clients could've been requested by // the caller logic concurrently over and over, so the number of pending // requests to them would grow. We want to control that parallelism. const errors = []; const res = (0, sortBy_1.default)((0, compact_1.default)(await (0, misc_1.mapJoin)(this.clients, async (client) => { const startTime = performance.now(); try { const shardNos = await this.clientShardNos(client); const address = client.address(); const role = client.role(); await this.options.localCache?.set(address, { address, role }); this.fallbackRoles.set(client, role); return { role, shardNos }; } catch (error) { errors.push({ where: `${client.constructor.name}(${client.options.name}).shardNos`, error, elapsed: Math.round(performance.now() - startTime), importance: "low", }); client.options.loggers?.swallowedErrorLogger(errors[errors.length - 1]); return null; } })), ({ role }) => (role === "master" ? 0 : role === "replica" ? 1 : 2), ({ shardNos }) => -1 * shardNos.length); this.reclassifyClients(); if (res.length > 0) { this.shardNos = [...res[0].shardNos].sort((a, b) => a - b); return []; } else { // Being unable to access all DB Clients is not a critical error here, // we'll just miss some Shards (and other Shards will work). DO NOT throw // through here yet! This needs to be addressed holistically and with // careful retries. Also, we have Shards rediscovery every N seconds, so a // missing Island will self-heal eventually. this.shardNos = []; return errors; } } /** * Returns the currently best-known Shards on this Island. This method is * needed only when working with cross-Shards logic; in normal situations, it * is not called much. */ shards() { const shardNos = (0, misc_1.nullthrows)(this.shardNos, "Before shards() can be used, rediscover() must finish"); return shardNos.map((no) => this.options.createShard(no)); } /** * Returns the currently best-known master Client among the Clients of this * Island. * * - If all masters are unhealthy, we still return one of them and prefer not * to fall back on a replica, because otherwise, we'll see non-obvious * errors in logs ("can't write in a read-only Client" or so) and suspect * that there is a bug in logic, although there is really no bug, it's just * the master node went down. It's way better to throw a straightforward * error like "Client is down". * - If we can't find a master, but there is a list of Clients with unknown * roles, prefer returning one of them vs. any known replica, since there is * a chance that among those unknown Clients, there will be a master. * - In case all Clients are read-only (replicas), still returns the 1st of * them, assuming that it's better to throw at the caller side on a failed * write (at worst) rather than here. It is not common to have an Island * without a master Client, that happens only temporarily during * failover/switchover, so the caller will likely rediscover and find a new * master on a next retry. */ master() { if (this.clients.length === 1) { return (0, first_1.default)(this.clients); } const master = (0, sample_1.default)(this.classifiedClients.master.healthy); if (master && master.role() === "master" && !master.connectionIssue()) { // Fast and most frequent path. return master; } this.reclassifyClients(); return ((0, sample_1.default)(this.classifiedClients.master.healthy) ?? (0, sample_1.default)(this.classifiedClients.master.unhealthy) ?? // prefer unhealthy master (0, sample_1.default)(this.classifiedClients.unknown.healthy) ?? (0, sample_1.default)(this.classifiedClients.unknown.unhealthy) ?? (0, sample_1.default)(this.classifiedClients.replica.healthy) ?? (0, sample_1.default)(this.classifiedClients.replica.unhealthy) ?? (0, first_1.default)(this.clients) // should never reach here ); } /** * Returns a currently best-known random replica Client. In case there are no * replicas, returns the master Client. */ replica() { if (this.clients.length === 1) { return (0, first_1.default)(this.clients); } const replica = (0, sample_1.default)(this.classifiedClients.replica.healthy); if (replica && replica.role() === "replica" && !replica.connectionIssue()) { // Fast and most frequent path. return replica; } this.reclassifyClients(); return ((0, sample_1.default)(this.classifiedClients.replica.healthy) ?? (0, sample_1.default)(this.classifiedClients.unknown.healthy) ?? (0, sample_1.default)(this.classifiedClients.master.healthy) ?? (0, sample_1.default)(this.classifiedClients.replica.unhealthy) ?? (0, sample_1.default)(this.classifiedClients.unknown.unhealthy) ?? (0, sample_1.default)(this.classifiedClients.master.unhealthy) ?? (0, first_1.default)(this.clients) // should never reach here ); } /** * Updates the list of classified Clients. We try hard to not put Clients in * "unknown" group by falling back to fallbackRoles. */ reclassifyClients() { const classifiedClients = { master: { healthy: [], unhealthy: [] }, unknown: { healthy: [], unhealthy: [] }, replica: { healthy: [], unhealthy: [] }, }; for (const client of this.clients) { const health = client.connectionIssue() ? "unhealthy" : "healthy"; const role = client.role(); classifiedClients[role === "unknown" ? this.fallbackRoles.get(client) ?? role : role][health].push(client); } this.classifiedClients = classifiedClients; } /** * Tries to pull shardNos() out of the Client and fail fast if the DB is down. * See details in shardNosConcurrentRetryDelayMs option description. */ async clientShardNos(client) { if (this.options.shardNosConcurrentRetryDelayMs === 0) { return client.shardNos(); } const promise = client.shardNos(); const maybeShardNos = await (0, p_timeout_1.default)(promise, this.options.shardNosConcurrentRetryDelayMs, () => "TIMEOUT"); return maybeShardNos instanceof Array ? maybeShardNos : Promise.race([promise, client.shardNos()]); // try again once } } exports.Island = Island; /** Default values for the constructor options. */ Island.DEFAULT_OPTIONS = { localCache: null, // https://willbryant.net/overriding_the_default_linux_kernel_20_second_tcp_socket_connect_timeout // Convenient when tcp_syn_retries=1 is set in docker-compose.yml file: // "Linux ... sends 5 SYNs ... the retries are after 3s, 6s, 12s, 24s" shardNosConcurrentRetryDelayMs: 3500, }; //# sourceMappingURL=Island.js.map