UNPKG

@hotmeshio/hotmesh

Version:

Permanent-Memory Workflows & AI Agents

717 lines (716 loc) 23.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.HotMesh = void 0; const key_1 = require("../../modules/key"); const utils_1 = require("../../modules/utils"); const factory_1 = require("../connector/factory"); const engine_1 = require("../engine"); const logger_1 = require("../logger"); const quorum_1 = require("../quorum"); const router_1 = require("../router"); const worker_1 = require("../worker"); const enums_1 = require("../../modules/enums"); /** * HotMesh transforms Postgres into a durable workflow orchestration engine capable of running * fault-tolerant workflows across multiple services and systems. * * ## Key Features * * - **Fault Tolerance**: Automatic retry with exponential backoff and configurable policies * - **Distributed Execution**: No single point of failure * - **YAML-Driven**: Model-driven development with declarative workflow definitions * - **OpenTelemetry**: Built-in observability and tracing * - **Durable State**: Workflow state persists across system restarts * - **Pattern Matching**: Pub/sub with wildcard pattern support * - **Throttling**: Dynamic flow control and backpressure management * - **Retry Policies**: PostgreSQL-native retry configuration with exponential backoff * * ## Architecture * * HotMesh consists of several specialized modules: * - **HotMesh**: Core orchestration engine (this class) * - **MemFlow**: Temporal.io-compatible workflow framework * - **MeshCall**: Durable function execution (Temporal-like clone) * * ## Lifecycle Overview * * 1. **Initialize**: Create HotMesh instance with provider configuration * 2. **Deploy**: Upload YAML workflow definitions to the backend * 3. **Activate**: Coordinate quorum to enable the workflow version * 4. **Execute**: Publish events to trigger workflow execution * 5. **Monitor**: Track progress via OpenTelemetry and built-in observability * * ## Basic Usage * * @example * ```typescript * import { HotMesh } from '@hotmeshio/hotmesh'; * import { Client as Postgres } from 'pg'; * * // Initialize with Postgres backend * const hotMesh = await HotMesh.init({ * appId: 'my-app', * engine: { * connection: { * class: Postgres, * options: { * connectionString: 'postgresql://user:pass@localhost:5432/db' * } * } * } * }); * * // Deploy workflow definition * await hotMesh.deploy(` * app: * id: my-app * version: '1' * graphs: * - subscribes: order.process * activities: * validate: * type: worker * topic: order.validate * approve: * type: hook * topic: order.approve * fulfill: * type: worker * topic: order.fulfill * transitions: * validate: * - to: approve * approve: * - to: fulfill * `); * * // Activate the workflow version * await hotMesh.activate('1'); * * // Execute workflow (fire-and-forget) * const jobId = await hotMesh.pub('order.process', { * orderId: '12345', * amount: 99.99 * }); * * // Execute workflow and wait for result * const result = await hotMesh.pubsub('order.process', { * orderId: '12345', * amount: 99.99 * }); * ``` * * ## Postgres Backend Example * * @example * ```typescript * import { HotMesh } from '@hotmeshio/hotmesh'; * import { Client as Postgres } from 'pg'; * * const hotMesh = await HotMesh.init({ * appId: 'my-app', * engine: { * connection: { * class: Postgres, * options: { * connectionString: 'postgresql://user:pass@localhost:5432/db' * } * } * } * }); * ``` * * ## Advanced Features * * **Pattern Subscriptions**: Listen to multiple workflow topics * ```typescript * await hotMesh.psub('order.*', (topic, message) => { * console.log(`Received ${topic}:`, message); * }); * ``` * * **Throttling**: Control processing rates * ```typescript * // Pause all processing for 5 seconds * await hotMesh.throttle({ throttle: 5000 }); * * // Emergency stop (pause indefinitely) * await hotMesh.throttle({ throttle: -1 }); * ``` * * **Workflow Interruption**: Gracefully stop running workflows * ```typescript * await hotMesh.interrupt('order.process', jobId, { * reason: 'User cancellation' * }); * ``` * * **State Inspection**: Query workflow state and progress * ```typescript * const state = await hotMesh.getState('order.process', jobId); * const status = await hotMesh.getStatus(jobId); * ``` * * ## Distributed Coordination * * HotMesh automatically handles distributed coordination through its quorum system: * * ```typescript * // Check quorum health * const members = await hotMesh.rollCall(); * * // Coordinate version activation across all instances * await hotMesh.activate('2', 1000); // 1 second delay for consensus * ``` * * ## Integration with Higher-Level Modules * * For most use cases, consider using the higher-level modules: * - **MemFlow**: For Temporal.io-style workflows with TypeScript functions * - **MeshCall**: For durable function calls and RPC patterns * * ## Cleanup * * Always clean up resources when shutting down: * ```typescript * // Stop this instance * hotMesh.stop(); * * // Stop all instances (typically in signal handlers) * await HotMesh.stop(); * ``` * * @see {@link https://docs.hotmesh.io/} - Complete documentation * @see {@link https://github.com/hotmeshio/samples-typescript} - Examples and tutorials * @see {@link https://zenodo.org/records/12168558} - White paper on the architecture */ class HotMesh { /** * @private */ verifyAndSetNamespace(namespace) { if (!namespace) { this.namespace = key_1.HMNS; } else if (!namespace.match(/^[A-Za-z0-9-]+$/)) { throw new Error(`config.namespace [${namespace}] is invalid`); } else { this.namespace = namespace; } } /** * @private */ verifyAndSetAppId(appId) { if (!appId?.match(/^[A-Za-z0-9-]+$/)) { throw new Error(`config.appId [${appId}] is invalid`); } else if (appId === 'a') { throw new Error(`config.appId [${appId}] is reserved`); } else { this.appId = appId; } } /** * Instance initializer. Workers are configured * similarly to the engine, but as an array with * multiple worker objects. * * ## Retry Policy Configuration * * HotMesh supports robust retry policies with exponential backoff for PostgreSQL. * Configure retry behavior at the stream level for automatic fault tolerance. * * @example Basic Configuration * ```typescript * const config: HotMeshConfig = { * appId: 'myapp', * engine: { * connection: { * class: Postgres, * options: { * connectionString: 'postgresql://usr:pwd@localhost:5432/db', * } * } * }, * workers [...] * }; * const hotMesh = await HotMesh.init(config); * ``` * * @example With Retry Policy (PostgreSQL) * ```typescript * import { HotMesh } from '@hotmeshio/hotmesh'; * import { Client as Postgres } from 'pg'; * * const hotMesh = await HotMesh.init({ * appId: 'my-app', * engine: { * connection: { * class: Postgres, * options: { connectionString: 'postgresql://...' } * }, * // Default retry policy for engine streams * retryPolicy: { * maximumAttempts: 5, // Retry up to 5 times * backoffCoefficient: 2, // Exponential: 2^0, 2^1, 2^2... * maximumInterval: '300s' // Cap delay at 5 minutes * } * }, * workers: [{ * topic: 'order.process', * connection: { * class: Postgres, * options: { connectionString: 'postgresql://...' } * }, * // Worker-specific retry policy * retryPolicy: { * maximumAttempts: 10, * backoffCoefficient: 1.5, * maximumInterval: '600s' * }, * callback: async (data) => { * // Your business logic here * // Failures will automatically retry with exponential backoff * return { status: 'success', data: processedData }; * } * }] * }); * ``` * * **Retry Policy Options**: * - `maximumAttempts` - Maximum retry attempts before failure (default: 3) * - `backoffCoefficient` - Base for exponential backoff calculation (default: 10) * - `maximumInterval` - Maximum delay between retries in seconds or duration string (default: '120s') * * **Retry Delays**: For `backoffCoefficient: 2`, delays are: 2s, 4s, 8s, 16s, 32s... * capped at `maximumInterval`. * * **Note**: Retry policies are stored in PostgreSQL columns for efficient querying and * observability. Each retry creates a new message, preserving message immutability. */ static async init(config) { const instance = new HotMesh(); instance.guid = config.guid ?? (0, utils_1.guid)(); instance.verifyAndSetNamespace(config.namespace); instance.verifyAndSetAppId(config.appId); instance.logger = new logger_1.LoggerService(config.appId, instance.guid, config.name || '', config.logLevel); await instance.initEngine(config, instance.logger); await instance.initQuorum(config, instance.engine, instance.logger); await instance.doWork(config, instance.logger); return instance; } /** * returns a guid using the same core guid * generator used by the HotMesh (nanoid) */ static guid() { return (0, utils_1.guid)(); } /** * @private */ async initEngine(config, logger) { if (config.engine) { //connections that are 'readonly' transfer //this property directly to the engine, //and ALWAYS take precendence. if (config.engine.connection.readonly) { config.engine.readonly = true; } // Apply retry policy to stream connection if provided if (config.engine.retryPolicy) { this.applyRetryPolicy(config.engine.connection, config.engine.retryPolicy); } // Initialize task queue for engine config.engine.taskQueue = this.initTaskQueue(config.engine.taskQueue, config.taskQueue); await factory_1.ConnectorService.initClients(config.engine); this.engine = await engine_1.EngineService.init(this.namespace, this.appId, this.guid, config, logger); } } /** * @private */ async initQuorum(config, engine, logger) { if (engine) { this.quorum = await quorum_1.QuorumService.init(this.namespace, this.appId, this.guid, config, engine, logger); } } /** * @private */ constructor() { /** * @private */ this.engine = null; /** * @private */ this.quorum = null; /** * @private */ this.workers = []; } /** * @private */ async doWork(config, logger) { // Initialize task queues for workers if (config.workers) { for (const worker of config.workers) { // Apply retry policy to stream connection if provided if (worker.retryPolicy) { this.applyRetryPolicy(worker.connection, worker.retryPolicy); } worker.taskQueue = this.initTaskQueue(worker.taskQueue, config.taskQueue); } } this.workers = await worker_1.WorkerService.init(this.namespace, this.appId, this.guid, config, logger); } /** * Initialize task queue with proper precedence: * 1. Use component-specific queue if set (engine/worker) * 2. Use global config queue if set * 3. Use default queue as fallback * @private */ initTaskQueue(componentQueue, globalQueue) { // Component-specific queue takes precedence if (componentQueue) { return componentQueue; } // Global config queue is next if (globalQueue) { return globalQueue; } // Default queue as fallback return enums_1.DEFAULT_TASK_QUEUE; } /** * Apply retry policy to the stream connection within a ProviderConfig or ProvidersConfig. * Handles both short-form (ProviderConfig) and long-form (ProvidersConfig) connection configs. * @private */ applyRetryPolicy(connection, retryPolicy) { // Check if this is ProvidersConfig (has 'stream' property) if ('stream' in connection && connection.stream) { // Long-form: apply to the stream sub-config connection.stream.retryPolicy = retryPolicy; } else { // Short-form: apply directly to the connection connection.retryPolicy = retryPolicy; } } // ************* PUB/SUB METHODS ************* /** * Starts a workflow * @example * ```typescript * await hotMesh.pub('a.b.c', { key: 'value' }); * ``` */ async pub(topic, data = {}, context, extended) { return await this.engine?.pub(topic, data, context, extended); } /** * Subscribe (listen) to all output and interim emissions of a single * workflow topic. NOTE: Postgres does not support patterned * unsubscription, so this method is not supported for Postgres. * * @example * ```typescript * await hotMesh.psub('a.b.c', (topic, message) => { * console.log(message); * }); * ``` */ async sub(topic, callback) { return await this.engine?.sub(topic, callback); } /** * Stop listening in on a single workflow topic */ async unsub(topic) { return await this.engine?.unsub(topic); } /** * Listen to all output and interim emissions of a workflow topic * matching a wildcard pattern. * @example * ```typescript * await hotMesh.psub('a.b.c*', (topic, message) => { * console.log(message); * }); * ``` */ async psub(wild, callback) { return await this.engine?.psub(wild, callback); } /** * Patterned unsubscribe. NOTE: Postgres does not support patterned * unsubscription, so this method is not supported for Postgres. */ async punsub(wild) { return await this.engine?.punsub(wild); } /** * Starts a workflow and awaits the response * @example * ```typescript * await hotMesh.pubsub('a.b.c', { key: 'value' }); * ``` */ async pubsub(topic, data = {}, context, timeout) { return await this.engine?.pubsub(topic, data, context, timeout); } /** * Add a transition message to the workstream, resuming leg 2 of a paused * reentrant activity (e.g., await, worker, hook) */ async add(streamData) { return (await this.engine.add(streamData)); } // ************* QUORUM METHODS ************* /** * Request a roll call from the quorum (engine and workers) */ async rollCall(delay) { return await this.quorum?.rollCall(delay); } /** * Sends a throttle message to the quorum (engine and/or workers) * to limit the rate of processing. Pass `-1` to throttle indefinitely. * The value must be a non-negative integer and not exceed `MAX_DELAY` ms. * * When throttling is set, the quorum will pause for the specified time * before processing the next message. Target specific engines and * workers by passing a `guid` and/or `topic`. Pass no arguments to * throttle the entire quorum. * * In this example, all processing has been paused indefinitely for * the entire quorum. This is equivalent to an emergency stop. * * HotMesh is a stateless sequence engine, so the throttle can be adjusted up * and down with no loss of data. * * * @example * ```typescript * await hotMesh.throttle({ throttle: -1 }); * ``` */ async throttle(options) { let throttle; if (options.throttle === -1) { throttle = enums_1.MAX_DELAY; } else { throttle = options.throttle; } if (!Number.isInteger(throttle) || throttle < 0 || throttle > enums_1.MAX_DELAY) { throw new Error(`Throttle must be a non-negative integer and not exceed ${enums_1.MAX_DELAY} ms; send -1 to throttle indefinitely`); } const throttleMessage = { type: 'throttle', throttle: throttle, }; if (options.guid) { throttleMessage.guid = options.guid; } if (options.topic !== undefined) { throttleMessage.topic = options.topic; } await this.engine.store.setThrottleRate(throttleMessage); return await this.quorum?.pub(throttleMessage); } /** * Publish a message to the quorum (engine and/or workers) */ async pubQuorum(quorumMessage) { return await this.quorum?.pub(quorumMessage); } /** * Subscribe to quorum events (engine and workers) */ async subQuorum(callback) { return await this.quorum?.sub(callback); } /** * Unsubscribe from quorum events (engine and workers) */ async unsubQuorum(callback) { return await this.quorum?.unsub(callback); } // ************* LIFECYCLE METHODS ************* /** * Preview changes and provide an analysis of risk * prior to deployment * @private */ async plan(path) { return await this.engine?.plan(path); } /** * When the app YAML descriptor file is ready, the `deploy` function can be called. * This function is responsible for merging all referenced YAML source * files and writing the JSON output to the file system and to the provider backend. It * is also possible to embed the YAML in-line as a string. * * *The version will not be active until activation is explicitly called.* */ async deploy(pathOrYAML) { return await this.engine?.deploy(pathOrYAML); } /** * Once the app YAML file is deployed to the provider backend, the `activate` function can be * called to enable it for the entire quorum at the same moment. * * The approach is to establish the coordinated health of the system through series * of call/response exchanges. Once it is established that the quorum is healthy, * the quorum is instructed to run their engine in `no-cache` mode, ensuring * that the provider backend is consulted for the active app version each time a * call is processed. This ensures that all engines are running the same version * of the app, switching over at the same moment and then enabling `cache` mode * to improve performance. * * *Add a delay for the quorum to reach consensus if traffic is busy, but * also consider throttling traffic flow to an acceptable level.* */ async activate(version, delay) { return await this.quorum?.activate(version, delay); } /** * Returns the job state as a JSON object, useful * for understanding dependency chains */ async export(jobId) { return await this.engine?.export(jobId); } /** * Returns all data (HGETALL) for a job. */ async getRaw(jobId) { return await this.engine?.getRaw(jobId); } /** * Reporter-related method to get the status of a job * @private */ async getStats(topic, query) { return await this.engine?.getStats(topic, query); } /** * Returns the status of a job. This is a numeric * semaphore value that indicates the job's state. * Any non-positive value indicates a completed job. * Jobs with a value of `-1` are pending and will * automatically be scrubbed after a set period. * Jobs a value around -1billion have been interrupted * and will be scrubbed after a set period. Jobs with * a value of 0 completed normally. Jobs with a * positive value are still running. */ async getStatus(jobId) { return this.engine?.getStatus(jobId); } /** * Returns the job state (data and metadata) for a job. */ async getState(topic, jobId) { return this.engine?.getState(topic, jobId); } /** * Returns searchable/queryable data for a job. In this * example a literal field is also searched (the colon * is used to track job status and is a reserved field; * it can be read but not written). * * @example * ```typescript * const fields = ['fred', 'barney', '":"']; * const queryState = await hotMesh.getQueryState('123', fields); * //returns { fred: 'flintstone', barney: 'rubble', ':': '1' } * ``` */ async getQueryState(jobId, fields) { return await this.engine?.getQueryState(jobId, fields); } /** * @private */ async getIds(topic, query, queryFacets = []) { return await this.engine?.getIds(topic, query, queryFacets); } /** * @private */ async resolveQuery(topic, query) { return await this.engine?.resolveQuery(topic, query); } /** * Interrupt an active job */ async interrupt(topic, jobId, options = {}) { return await this.engine?.interrupt(topic, jobId, options); } /** * Immediately deletes (DEL) a completed job from the system. * * *Scrubbed jobs must be complete with a non-positive `status` value* */ async scrub(jobId) { await this.engine?.scrub(jobId); } /** * Re/entry point for an active job. This is used to resume a paused job * and close the reentry point or leave it open for subsequent reentry. * Because `hooks` are public entry points, they include a `topic` * which is established in the app YAML file. * * When this method is called, a hook rule will be located to establish * the exact activity and activity dimension for reentry. */ async hook(topic, data, status, code) { return await this.engine?.hook(topic, data, status, code); } /** * @private */ async hookAll(hookTopic, data, query, queryFacets = []) { return await this.engine?.hookAll(hookTopic, data, query, queryFacets); } /** * Stop all points of presence, workers and engines */ static async stop() { if (!this.disconnecting) { this.disconnecting = true; await router_1.Router.stopConsuming(); await factory_1.ConnectorService.disconnectAll(); } } /** * Stop this point of presence, workers and engines */ stop() { this.engine?.taskService.cancelCleanup(); this.quorum?.stop(); this.workers?.forEach((worker) => { worker.stop(); }); } /** * @private * @deprecated */ async compress(terms) { return await this.engine?.compress(terms); } } exports.HotMesh = HotMesh; HotMesh.disconnecting = false;