UNPKG

@getanthill/datastore

Version:

Event-Sourced Datastore

992 lines (853 loc) 25.5 kB
import type { ProcessDestroySignal, HandlerBuilderFunc, RunnerServices, StopFunc, HandlerFunc, RunnerTrigger, AnyObject, Source, } from '../typings'; import type { IncomingMessage, Server, ServerResponse } from 'http'; import type { MessageOptions, Route } from '../services/broker'; import { Command } from 'commander'; import path from 'path'; import { ok } from 'node:assert'; import express from 'express'; import omit from 'lodash/omit'; import * as telemetry from '@getanthill/telemetry'; import type Datastore from './Datastore'; import * as utils from './utils'; import * as metrics from '../constants/metrics'; let server: Server<typeof IncomingMessage, typeof ServerResponse> | null; export interface ProcessingStats { processingTimeWindowInMilliseconds: number; queuing: number; waiting: number; waited: number; processing: number; processed: number; progress: number; totalWaitingDurationInMilliseconds: number; averageWaitingDurationInMilliseconds: number; totalProcessingDurationInMilliseconds: number; averageProcessingDurationInMilliseconds: number; maxParallelEvents: number; } export function getServer() { return server; } export function setServer(_server: typeof server) { server = _server; return server; } export function log( services: RunnerServices | undefined, options: { verbose?: boolean }, level: string, message: string, context?: { [key: string]: any }, ) { if (options?.verbose !== true) { return; } const logger = services?.telemetry?.logger ?? telemetry.logger; /* @ts-ignore */ logger[level](message, context); } export function init( services: RunnerServices, stopHandler: StopFunc, options: any, ) { if (options.skipProcessBinding === true) { return; } process.once( 'SIGTERM', signalHandler(services, stopHandler, options, 'SIGTERM'), ); process.once( 'SIGINT', signalHandler(services, stopHandler, options, 'SIGINT'), ); process.once( 'uncaughtException', errorHandler(services, stopHandler, options, 'uncaughtException'), ); process.once( 'unhandledRejection', errorHandler(services, stopHandler, options, 'unhandledRejection'), ); } export function signalHandler( services: RunnerServices, stopHandler: StopFunc, options: any, signal: ProcessDestroySignal, destroyHandler = destroy, ): () => Promise<NodeJS.Timeout> { return () => { return destroyHandler(services, stopHandler, options, signal); }; } export function errorHandler( services: RunnerServices, stopHandler: StopFunc, options: any, signal: ProcessDestroySignal, destroyHandler = destroy, ): (err: Error) => Promise<NodeJS.Timeout> { return (err) => { return destroyHandler(services, stopHandler, options, signal, err); }; } /** * Cleanup and stop the process properly, then exit the process. * @param signal - Signal to stop the process with * @param err - Error that caused the destruction of the process */ export function destroy( services: RunnerServices | undefined, stopHandler: StopFunc | undefined, options: any, signal: ProcessDestroySignal, err?: Error, ): Promise<NodeJS.Timeout> { metrics.incrementProcessStatus({ state: err ? 'crashing' : 'stopping' }); /* istanbul ignore next */ const exitTimeout = options.exitTimeout || services?.config?.exitTimeout; if (err) { log(services, { verbose: true }, 'error', '[runner] Application error', { err, signal, }); } log(services, options, 'info', '[runner] Stopping application', { err, signal, exit_timeout: exitTimeout, }); return stop(services, options, stopHandler) .then(() => { metrics.incrementProcessStatus({ state: err ? 'crashed' : 'stopped' }); log(services, options, 'info', '[runner] Application stopped', err); return new Promise<NodeJS.Timeout>((resolve) => { const timeout = setTimeout(() => { timeout.unref(); process.exit(err ? 1 : 0); resolve(timeout); }, exitTimeout); }); }) .catch((stopErr) => { console.error('[runner] Application crashed', { err: stopErr, firstErr: err, }); return new Promise<NodeJS.Timeout>((resolve) => { const timeout = setTimeout(() => { timeout.unref(); process.exit(err ? 1 : 0); resolve(timeout); }, exitTimeout); }); }); } export async function stop( services?: RunnerServices, options?: any, stopHandler?: StopFunc, ): Promise<void> { log(services, options, 'debug', '[runner] Stopping'); if (typeof stopHandler === 'function') { log(services, options, 'debug', '[runner] Calling handler stop...'); await stopHandler(); } if (server) { log(services, options, 'debug', '[runner] Closing the hearbeat...'); await server.close(); server = null; } if (!services) { log(services, options, 'info', '[runner] Stopped'); return; } // Close all Datastores streaming for (const ds in services.datastores) { services.datastores[ds].streams.closeAll(); } log(services, options, 'info', '[runner] Stopped'); } export async function heartbeat(port = process.env.PORT) { const app = express(); // You can also use Express app.disable('x-powered-by'); /** * Heartbeat route (Unauthenticated) */ app.get('/heartbeat', (_req, res): void => { res.json({ is_alive: true }); }); // Listen server = await app.listen(port); return { app, server, port }; } export async function getDatastoreFromTrigger( services: RunnerServices, trigger: RunnerTrigger, options: { timeout: number }, ): Promise<Datastore> { ok(!!trigger.model, 'Model is not defined'); ok( trigger.source === 'entities' || trigger.source === 'events', 'Source must be either `entities` or `events`', ); const datastore = services.datastores[trigger.datastore!]; // Setting the Timeout explicitly datastore.core.setTimeout(options.timeout); await datastore.heartbeat(); return datastore; } export function mapReceivedData( input: string | object, raw: boolean, ): string | object { if (raw === false && typeof input === 'string') { return JSON.parse(input); } if (raw === true && typeof input !== 'string') { return JSON.stringify(input); } return input; } export function readyForProcessing(stats: ProcessingStats): boolean { return stats.processing + 1 <= stats.maxParallelEvents; } export async function waitForProcessing( stats: ProcessingStats, queueId: number, waitTimeInMilliseconds = 10, maxWaitInMilliseconds = 3600000, // 1 hour ): Promise<void> { const tic = Date.now(); let waitingTimeInMilliseconds = 0; let waited = false; while ( waitingTimeInMilliseconds < maxWaitInMilliseconds && stats.queuing <= queueId && readyForProcessing(stats) !== true ) { waited === false && (stats.waiting += 1); waited = true; waitingTimeInMilliseconds = Date.now() - tic; await new Promise((resolve) => setTimeout(resolve, waitTimeInMilliseconds)); } waited === true && (stats.waited += 1) && (stats.waiting -= 1); } export async function isDataProcessed( services: RunnerServices, datastore: string, model: string, input: any, processingStateIndex: number, processing?: RunnerTrigger['processing'], ): Promise<boolean> { const processingState = processing?.states?.[processingStateIndex]; if (!processingState) { return false; } const correlationField = processing.correlation_field; const processingField = processing.field; if ( input?.[correlationField] === undefined || input?.[processingField] === undefined ) { return false; } await services.datastores[datastore].update(model, input[correlationField], { [processingField]: processingState, }); return true; } export async function shouldProcessData( services: RunnerServices, datastore: string, model: string, input: any, processing?: RunnerTrigger['processing'], ): Promise<boolean> { if (!processing) { return true; } const correlationField = processing.correlation_field; const processingField = processing.field; const processingStates = processing.states; if ( input?.[correlationField] === undefined || input?.[processingField] !== processingStates[0] ) { return false; } try { await services.datastores[datastore].update( model, input[correlationField], { [processingField]: processingStates[1], }, { version: input.version + 1, }, ); return true; } catch (err) { return false; } } export function getProcessingStats( from?: Partial<ProcessingStats>, ): ProcessingStats { return { processingTimeWindowInMilliseconds: Infinity, progress: 1_000, queuing: 0, waiting: 0, waited: 0, processing: 0, processed: 0, totalWaitingDurationInMilliseconds: 0, averageWaitingDurationInMilliseconds: 0, totalProcessingDurationInMilliseconds: 0, averageProcessingDurationInMilliseconds: 0, maxParallelEvents: 10, // 10 first request for sampling ...from, }; } export function localEventHandler( services: RunnerServices, handler: HandlerFunc, handlerId: string, datastore: string, model: string, source: Source, raw: boolean, stats: ProcessingStats = getProcessingStats(), processing?: RunnerTrigger['processing'], ) { const [handlerPath] = handlerId.split('#'); const metadata = { handlerId, path: handlerPath, datastore, model, source, raw, }; return async ( input: string | object, route?: Route, headers?: AnyObject, opts?: MessageOptions, ) => { const ticQueuing = Date.now(); let ticProcessing = Date.now(); let data; try { metrics.incrementProcessing({ state: 'request', model, }); // Handle queuing locally if ( typeof stats.processingTimeWindowInMilliseconds === 'number' && stats.processingTimeWindowInMilliseconds !== Infinity ) { await new Promise((resolve) => setTimeout(resolve, 1)); stats.queuing += 1; const queueId = stats.queuing; await waitForProcessing(stats, queueId); ticProcessing = Date.now(); stats.queuing -= 1; } stats.processing += 1; data = mapReceivedData(input, raw); const _shouldProcessData = (opts?.delivery ?? 0) > 0 || (await shouldProcessData(services, datastore, model, data, processing)); let res; if (_shouldProcessData === true) { res = await handler(data, metadata); await isDataProcessed(services, datastore, model, data, 2, processing); } typeof opts?.ack === 'function' && (await opts.ack()); metrics.incrementProcessing({ state: 'success', model, }); return res; } catch (err: any) { metrics.incrementProcessing({ state: 'error', model, }); if (typeof opts?.delivery === 'number' && opts?.delivery > 0) { services?.telemetry?.logger.warn('[runner] Event discarded', { input, route, headers: omit(headers, 'authorization'), deliver: opts?.delivery, }); await isDataProcessed(services, datastore, model, data, 3, processing); typeof opts?.ack === 'function' && (await opts.ack()); return; } services?.telemetry?.logger.error('Event handler error', { // err, message: err.message, response: err?.response?.data, details: err?.response?.data?.details?.[0], msg: input, ...metadata, }); const isRetriable = typeof opts?.nack === 'function'; if (isRetriable === true) { await opts.nack(); } else { await isDataProcessed(services, datastore, model, data, 3, processing); } } finally { const tac = Date.now(); stats.processing -= 1; stats.processed += 1; stats.totalWaitingDurationInMilliseconds += ticProcessing - ticQueuing; stats.averageWaitingDurationInMilliseconds = stats.totalWaitingDurationInMilliseconds / stats.processed; stats.totalProcessingDurationInMilliseconds += tac - ticProcessing; stats.averageProcessingDurationInMilliseconds = stats.totalProcessingDurationInMilliseconds / stats.processed; stats.maxParallelEvents = Math.max( 1, stats.processingTimeWindowInMilliseconds / stats.averageProcessingDurationInMilliseconds, ); stats.processed % stats.progress === 0 && services?.telemetry?.logger?.info('[runner] Processing statistics', { queuing: stats.queuing, waiting: stats.waiting, processing: stats.processing, waited: stats.waited, processed: stats.processed, average_waiting_duration_in_ms: stats.averageWaitingDurationInMilliseconds, average_processing_duration_in_ms: stats.averageProcessingDurationInMilliseconds, max_supported_parallel_events: stats.maxParallelEvents, }); } }; } export async function buildHandler( handlerId: string, cmd: any, handlersForTest?: { [key: string]: HandlerBuilderFunc }, ) { const handlerUrl = new URL(handlerId, 'ds://handlers'); const handlerPath: string = path.resolve( cmd.cwd, handlerUrl.pathname.slice(1), ); const handlers = handlersForTest ?? require(handlerPath); const factoryId: string = handlerUrl.hash.slice(1) || 'main'; const factory: HandlerBuilderFunc = handlers[factoryId]; ok( typeof factory === 'function', 'A valid handler factory must be requested', ); const config = await factory(handlerUrl); ok(typeof config === 'object', 'Handler configuration must be an object'); ok( typeof config.start === 'function', 'Handler configuration must have a `start` method', ); const services = await config.start(); if (services.datastores instanceof Map) { services.datastores = Object.fromEntries(services.datastores.entries()); } const triggers: Array<RunnerTrigger> = ( config.triggers ?? [ { datastore: config.datastore, model: config.model, source: config.source, raw: config.raw, query: config.query, headers: config.headers, queryAsJSONSchema: config.queryAsJSONSchema, processing: config.processing, }, ] ).map((trigger) => ({ datastore: cmd.datastore ?? trigger.datastore, model: cmd.model ?? trigger.model, source: cmd.source ?? trigger.source, raw: cmd.raw ?? trigger.raw ?? false, query: cmd.query ?? trigger.query ?? {}, headers: cmd.headers ?? trigger.headers ?? {}, queryAsJSONSchema: cmd.queryAsJSONSchema ?? trigger.queryAsJSONSchema ?? false, processing: trigger.processing, })); return { services, triggers, stop: config.stop, handler: config.handler, }; } export function start() { return async ( handlerIds: string[], options: any, command: any, handlersForTest?: { [key: string]: HandlerBuilderFunc }, ) => { let services: RunnerServices | undefined = undefined; try { metrics.incrementProcessStatus({ state: 'starting' }); log(undefined, options, 'info', '[runner] Starting streaming', { handler_ids: handlerIds, options, }); for (const handlerId of handlerIds) { const { services: _services, triggers, stop: stopHandler, handler, } = await buildHandler(handlerId, options, handlersForTest); services = _services; for (const trigger of triggers) { const datastore = await getDatastoreFromTrigger( services, trigger, options, ); const query: any = trigger.query; if (trigger.processing?.field) { query[trigger.processing?.field] = trigger.processing?.states[0]; } const streamId = datastore.streams.getStreamId( trigger.model!, trigger.source!, query, ); datastore.streams.on( streamId, localEventHandler( services, handler, handlerId, trigger.datastore!, trigger.model!, trigger.source!, trigger.raw!, getProcessingStats({ processingTimeWindowInMilliseconds: options.processingTimeWindow, progress: options.progress, maxParallelEvents: options.initParallelEvents, }), trigger.processing, ), ); init(services, stopHandler, options); log(services, options, 'info', '[runner] Starting streaming', { ...trigger, opts: { reconnectionMaxAttempts: options.maxReconnectionAttempts, reconnectionInterval: options.reconnectionInterval, connectionMaxLifeSpanInSeconds: options.connectionMaxLifeSpanInSeconds, }, }); await datastore.streams.listen( trigger.model!, trigger.source!, trigger.query, { reconnectionMaxAttempts: options.maxReconnectionAttempts, reconnectionInterval: options.reconnectionInterval, connectionMaxLifeSpanInSeconds: options.connectionMaxLifeSpanInSeconds, queueName: handlerId, queryAsJSONSchema: trigger.queryAsJSONSchema, }, ); metrics.incrementProcessStatus({ state: 'started' }); } } if (options.heartbeat === true) { /* istanbul ignore next */ heartbeat().catch((err) => { /* istanbul ignore next */ log(services, options, 'warn', 'Failed to expose heartbeat', { err, }); }); } } catch (err: any) { log(services, { verbose: true }, 'error', 'Initialization error', err); await destroy(services, undefined, options, 'uncaughtException', err); } }; } function debugHandler( stopHandler: StopFunc, handler: HandlerFunc, stats: { count: number }, ): HandlerFunc { return async (entity, metadata) => { if (stats.count > 0) { return stopHandler(); } stats.count += 1; return handler(entity, metadata); }; } export async function replayHandler( handlerId: string, options: any, services: RunnerServices, triggers: RunnerTrigger[], stopHandler: StopFunc, handler: HandlerFunc, ) { init(services, stopHandler, options); const pageSize: number = options.debug === true ? 1 : options.pageSize; log(services, options, 'info', '[runner] Starting the replay', { handler_id: handlerId, page_size: pageSize, triggers, opts: { sleep: options.sleep, }, }); log(services, options, 'debug', '[runner] Replay informations', { handler_id: handlerId, triggers, }); const stats = { count: 0 }; const processingStats = getProcessingStats({ progress: options.progress, }); await utils.walkMulti( new Map( Object.keys(services.datastores).map((k) => [k, services.datastores[k]]), ), triggers.map((trigger) => { const query: any = trigger.query; if (trigger.processing?.field) { query[trigger.processing?.field] = trigger.processing?.states[0]; } return { datastore: trigger.datastore, model: trigger.model, query, source: trigger.source, headers: trigger.headers, }; }) as utils.MultiQuery[], pageSize, (input, query, queryIteration) => { return localEventHandler( services, options.debug !== true ? handler : debugHandler(stopHandler, handler, stats), handlerId, query.datastore, query.model, query.source, query.raw, processingStats, triggers[queryIteration?.query_index]?.processing, )(input); }, { sleep: options.debug === true ? 1 : options.sleep, is_mutating: options.mutating, }, ); await stopHandler(); } export function replay() { return async ( handlerIds: string[], options: any, command: any, handlersForTest?: { [key: string]: HandlerBuilderFunc }, ) => { let services: RunnerServices | undefined = undefined; try { metrics.incrementProcessStatus({ state: 'starting' }); log(undefined, options, 'info', '[runner] Starting replay', { handler_ids: handlerIds, options, }); /* istanbul ignore next */ if (options.heartbeat === true) { heartbeat().catch((err) => { /* istanbul ignore next */ log(undefined, options, 'warn', 'Failed to expose heartbeat', { err, }); }); } for (const handlerId of handlerIds) { const { services: _services, triggers, stop: stopHandler, handler, } = await buildHandler(handlerId, options, handlersForTest); services = _services; await replayHandler( handlerId, options, services, triggers, stopHandler, handler, ); } await destroy(services, undefined, options, 'SIGTERM'); } catch (err: any) { log(services, { verbose: true }, 'error', 'Replay error', { err, response: err?.response?.data, }); await destroy(services, undefined, options, 'uncaughtException', err); } }; } /* istanbul ignore next */ function commonOptions(command: Command) { command .option('--datastore <datastore>', 'Datastore to listen on') .option('--model <model>', 'Model to listen on') .option('--source <source>', 'Source of messages') .option( '--raw', 'If present, no JSON parsing is applied to messages', false, ) .option( '--timeout <timeout>', 'Requests timeout in milliseconds', (v) => Number.parseInt(v, 10), 30000, ) .option( '--exit-timeout <timeout>', 'Graceful exit timeout', (v) => Number.parseInt(v, 10), NaN, ) .option('--verbose', 'Display runner logs', false) .option('--safe', 'Executes all handlers even with errors', false) .option('--heartbeat', 'If present, an heartbeat route is exposed', false) .option( '--cwd <cwd>', 'Current working directory', process.env.DATASTORE_RUNNER_CWD || '', ); return command; } /* istanbul ignore next */ export default function register(name = 'runner') { const program = new Command(name); const startCommand = program.command('start <handlerIds...>'); commonOptions(startCommand); startCommand .option( '--query <query>', 'Forced query', (v) => !!v && JSON.parse(v), null, ) .option( '--ptw, --processing-time-window <processing_time_window>', 'Event processing time window', (v) => Number.parseInt(v, 10), Infinity, ) .option( '-p, --progress <progress>', 'Progress feedback interval', (v) => Number.parseInt(v, 10), 1_000, ) .option( '--ipr, --init-parallel-events <init_parallel_events>', 'Initial maximal parallel events for sampling', (v) => Number.parseInt(v, 10), 10, ) .option( '--max-reconnection-attempts <max_reconnection_attempts>', 'Max reconnection attempts in case of error or unexpected closing', (v) => Number.parseInt(v, 10), 1000, ) .option( '--reconnection-interval <reconnection_interval>', 'Interval in milliseconds between two reconnection attempts', (v) => Number.parseInt(v, 10), 100, ) .option( '--connection-max-life-span-in-seconds <connection_max_life_span_in_seconds>', 'Max life span in seconds of a Datastore HTTP streaming connection', (v) => Number.parseInt(v, 10), 3600, ) .description('Start the queries handlers') .action(start()); const replayCommand = program.command('replay <handlerIds...>'); commonOptions(replayCommand); replayCommand .option('--date-field <dateField>', 'Date field', 'created_at') .option( '--page-size <pageSize>', 'Page size', (v) => Number.parseInt(v, 10), 100, ) .option( '-s, --sleep <sleep>', 'Sleep period between each batch', (v) => Number.parseInt(v, 10), 0, ) .option( '--query <query>', 'Forced query', (v) => !!v && JSON.parse(v), null, ) .option( '--mutating', 'Activates replay mode with possible mutations', false, ) .option( '-p, --progress <progress>', 'Progress feedback interval', (v) => Number.parseInt(v, 10), 1_000, ) .option('--debug', 'Execute the handler only once', false) .description('Replay events in order') .action(replay()); return program; }