@getanthill/datastore
Version:
Event-Sourced Datastore
992 lines (853 loc) • 25.5 kB
text/typescript
import type {
ProcessDestroySignal,
HandlerBuilderFunc,
RunnerServices,
StopFunc,
HandlerFunc,
RunnerTrigger,
AnyObject,
Source,
} from '../typings';
import type { IncomingMessage, Server, ServerResponse } from 'http';
import type { MessageOptions, Route } from '../services/broker';
import { Command } from 'commander';
import path from 'path';
import { ok } from 'node:assert';
import express from 'express';
import omit from 'lodash/omit';
import * as telemetry from '@getanthill/telemetry';
import type Datastore from './Datastore';
import * as utils from './utils';
import * as metrics from '../constants/metrics';
let server: Server<typeof IncomingMessage, typeof ServerResponse> | null;
export interface ProcessingStats {
processingTimeWindowInMilliseconds: number;
queuing: number;
waiting: number;
waited: number;
processing: number;
processed: number;
progress: number;
totalWaitingDurationInMilliseconds: number;
averageWaitingDurationInMilliseconds: number;
totalProcessingDurationInMilliseconds: number;
averageProcessingDurationInMilliseconds: number;
maxParallelEvents: number;
}
export function getServer() {
return server;
}
export function setServer(_server: typeof server) {
server = _server;
return server;
}
export function log(
services: RunnerServices | undefined,
options: { verbose?: boolean },
level: string,
message: string,
context?: { [key: string]: any },
) {
if (options?.verbose !== true) {
return;
}
const logger = services?.telemetry?.logger ?? telemetry.logger;
/* @ts-ignore */
logger[level](message, context);
}
export function init(
services: RunnerServices,
stopHandler: StopFunc,
options: any,
) {
if (options.skipProcessBinding === true) {
return;
}
process.once(
'SIGTERM',
signalHandler(services, stopHandler, options, 'SIGTERM'),
);
process.once(
'SIGINT',
signalHandler(services, stopHandler, options, 'SIGINT'),
);
process.once(
'uncaughtException',
errorHandler(services, stopHandler, options, 'uncaughtException'),
);
process.once(
'unhandledRejection',
errorHandler(services, stopHandler, options, 'unhandledRejection'),
);
}
export function signalHandler(
services: RunnerServices,
stopHandler: StopFunc,
options: any,
signal: ProcessDestroySignal,
destroyHandler = destroy,
): () => Promise<NodeJS.Timeout> {
return () => {
return destroyHandler(services, stopHandler, options, signal);
};
}
export function errorHandler(
services: RunnerServices,
stopHandler: StopFunc,
options: any,
signal: ProcessDestroySignal,
destroyHandler = destroy,
): (err: Error) => Promise<NodeJS.Timeout> {
return (err) => {
return destroyHandler(services, stopHandler, options, signal, err);
};
}
/**
* Cleanup and stop the process properly, then exit the process.
* @param signal - Signal to stop the process with
* @param err - Error that caused the destruction of the process
*/
export function destroy(
services: RunnerServices | undefined,
stopHandler: StopFunc | undefined,
options: any,
signal: ProcessDestroySignal,
err?: Error,
): Promise<NodeJS.Timeout> {
metrics.incrementProcessStatus({ state: err ? 'crashing' : 'stopping' });
/* istanbul ignore next */
const exitTimeout = options.exitTimeout || services?.config?.exitTimeout;
if (err) {
log(services, { verbose: true }, 'error', '[runner] Application error', {
err,
signal,
});
}
log(services, options, 'info', '[runner] Stopping application', {
err,
signal,
exit_timeout: exitTimeout,
});
return stop(services, options, stopHandler)
.then(() => {
metrics.incrementProcessStatus({ state: err ? 'crashed' : 'stopped' });
log(services, options, 'info', '[runner] Application stopped', err);
return new Promise<NodeJS.Timeout>((resolve) => {
const timeout = setTimeout(() => {
timeout.unref();
process.exit(err ? 1 : 0);
resolve(timeout);
}, exitTimeout);
});
})
.catch((stopErr) => {
console.error('[runner] Application crashed', {
err: stopErr,
firstErr: err,
});
return new Promise<NodeJS.Timeout>((resolve) => {
const timeout = setTimeout(() => {
timeout.unref();
process.exit(err ? 1 : 0);
resolve(timeout);
}, exitTimeout);
});
});
}
export async function stop(
services?: RunnerServices,
options?: any,
stopHandler?: StopFunc,
): Promise<void> {
log(services, options, 'debug', '[runner] Stopping');
if (typeof stopHandler === 'function') {
log(services, options, 'debug', '[runner] Calling handler stop...');
await stopHandler();
}
if (server) {
log(services, options, 'debug', '[runner] Closing the hearbeat...');
await server.close();
server = null;
}
if (!services) {
log(services, options, 'info', '[runner] Stopped');
return;
}
// Close all Datastores streaming
for (const ds in services.datastores) {
services.datastores[ds].streams.closeAll();
}
log(services, options, 'info', '[runner] Stopped');
}
export async function heartbeat(port = process.env.PORT) {
const app = express(); // You can also use Express
app.disable('x-powered-by');
/**
* Heartbeat route (Unauthenticated)
*/
app.get('/heartbeat', (_req, res): void => {
res.json({ is_alive: true });
});
// Listen
server = await app.listen(port);
return { app, server, port };
}
export async function getDatastoreFromTrigger(
services: RunnerServices,
trigger: RunnerTrigger,
options: { timeout: number },
): Promise<Datastore> {
ok(!!trigger.model, 'Model is not defined');
ok(
trigger.source === 'entities' || trigger.source === 'events',
'Source must be either `entities` or `events`',
);
const datastore = services.datastores[trigger.datastore!];
// Setting the Timeout explicitly
datastore.core.setTimeout(options.timeout);
await datastore.heartbeat();
return datastore;
}
export function mapReceivedData(
input: string | object,
raw: boolean,
): string | object {
if (raw === false && typeof input === 'string') {
return JSON.parse(input);
}
if (raw === true && typeof input !== 'string') {
return JSON.stringify(input);
}
return input;
}
export function readyForProcessing(stats: ProcessingStats): boolean {
return stats.processing + 1 <= stats.maxParallelEvents;
}
export async function waitForProcessing(
stats: ProcessingStats,
queueId: number,
waitTimeInMilliseconds = 10,
maxWaitInMilliseconds = 3600000, // 1 hour
): Promise<void> {
const tic = Date.now();
let waitingTimeInMilliseconds = 0;
let waited = false;
while (
waitingTimeInMilliseconds < maxWaitInMilliseconds &&
stats.queuing <= queueId &&
readyForProcessing(stats) !== true
) {
waited === false && (stats.waiting += 1);
waited = true;
waitingTimeInMilliseconds = Date.now() - tic;
await new Promise((resolve) => setTimeout(resolve, waitTimeInMilliseconds));
}
waited === true && (stats.waited += 1) && (stats.waiting -= 1);
}
export async function isDataProcessed(
services: RunnerServices,
datastore: string,
model: string,
input: any,
processingStateIndex: number,
processing?: RunnerTrigger['processing'],
): Promise<boolean> {
const processingState = processing?.states?.[processingStateIndex];
if (!processingState) {
return false;
}
const correlationField = processing.correlation_field;
const processingField = processing.field;
if (
input?.[correlationField] === undefined ||
input?.[processingField] === undefined
) {
return false;
}
await services.datastores[datastore].update(model, input[correlationField], {
[processingField]: processingState,
});
return true;
}
export async function shouldProcessData(
services: RunnerServices,
datastore: string,
model: string,
input: any,
processing?: RunnerTrigger['processing'],
): Promise<boolean> {
if (!processing) {
return true;
}
const correlationField = processing.correlation_field;
const processingField = processing.field;
const processingStates = processing.states;
if (
input?.[correlationField] === undefined ||
input?.[processingField] !== processingStates[0]
) {
return false;
}
try {
await services.datastores[datastore].update(
model,
input[correlationField],
{
[processingField]: processingStates[1],
},
{
version: input.version + 1,
},
);
return true;
} catch (err) {
return false;
}
}
export function getProcessingStats(
from?: Partial<ProcessingStats>,
): ProcessingStats {
return {
processingTimeWindowInMilliseconds: Infinity,
progress: 1_000,
queuing: 0,
waiting: 0,
waited: 0,
processing: 0,
processed: 0,
totalWaitingDurationInMilliseconds: 0,
averageWaitingDurationInMilliseconds: 0,
totalProcessingDurationInMilliseconds: 0,
averageProcessingDurationInMilliseconds: 0,
maxParallelEvents: 10, // 10 first request for sampling
...from,
};
}
export function localEventHandler(
services: RunnerServices,
handler: HandlerFunc,
handlerId: string,
datastore: string,
model: string,
source: Source,
raw: boolean,
stats: ProcessingStats = getProcessingStats(),
processing?: RunnerTrigger['processing'],
) {
const [handlerPath] = handlerId.split('#');
const metadata = {
handlerId,
path: handlerPath,
datastore,
model,
source,
raw,
};
return async (
input: string | object,
route?: Route,
headers?: AnyObject,
opts?: MessageOptions,
) => {
const ticQueuing = Date.now();
let ticProcessing = Date.now();
let data;
try {
metrics.incrementProcessing({
state: 'request',
model,
});
// Handle queuing locally
if (
typeof stats.processingTimeWindowInMilliseconds === 'number' &&
stats.processingTimeWindowInMilliseconds !== Infinity
) {
await new Promise((resolve) => setTimeout(resolve, 1));
stats.queuing += 1;
const queueId = stats.queuing;
await waitForProcessing(stats, queueId);
ticProcessing = Date.now();
stats.queuing -= 1;
}
stats.processing += 1;
data = mapReceivedData(input, raw);
const _shouldProcessData =
(opts?.delivery ?? 0) > 0 ||
(await shouldProcessData(services, datastore, model, data, processing));
let res;
if (_shouldProcessData === true) {
res = await handler(data, metadata);
await isDataProcessed(services, datastore, model, data, 2, processing);
}
typeof opts?.ack === 'function' && (await opts.ack());
metrics.incrementProcessing({
state: 'success',
model,
});
return res;
} catch (err: any) {
metrics.incrementProcessing({
state: 'error',
model,
});
if (typeof opts?.delivery === 'number' && opts?.delivery > 0) {
services?.telemetry?.logger.warn('[runner] Event discarded', {
input,
route,
headers: omit(headers, 'authorization'),
deliver: opts?.delivery,
});
await isDataProcessed(services, datastore, model, data, 3, processing);
typeof opts?.ack === 'function' && (await opts.ack());
return;
}
services?.telemetry?.logger.error('Event handler error', {
// err,
message: err.message,
response: err?.response?.data,
details: err?.response?.data?.details?.[0],
msg: input,
...metadata,
});
const isRetriable = typeof opts?.nack === 'function';
if (isRetriable === true) {
await opts.nack();
} else {
await isDataProcessed(services, datastore, model, data, 3, processing);
}
} finally {
const tac = Date.now();
stats.processing -= 1;
stats.processed += 1;
stats.totalWaitingDurationInMilliseconds += ticProcessing - ticQueuing;
stats.averageWaitingDurationInMilliseconds =
stats.totalWaitingDurationInMilliseconds / stats.processed;
stats.totalProcessingDurationInMilliseconds += tac - ticProcessing;
stats.averageProcessingDurationInMilliseconds =
stats.totalProcessingDurationInMilliseconds / stats.processed;
stats.maxParallelEvents = Math.max(
1,
stats.processingTimeWindowInMilliseconds /
stats.averageProcessingDurationInMilliseconds,
);
stats.processed % stats.progress === 0 &&
services?.telemetry?.logger?.info('[runner] Processing statistics', {
queuing: stats.queuing,
waiting: stats.waiting,
processing: stats.processing,
waited: stats.waited,
processed: stats.processed,
average_waiting_duration_in_ms:
stats.averageWaitingDurationInMilliseconds,
average_processing_duration_in_ms:
stats.averageProcessingDurationInMilliseconds,
max_supported_parallel_events: stats.maxParallelEvents,
});
}
};
}
export async function buildHandler(
handlerId: string,
cmd: any,
handlersForTest?: { [key: string]: HandlerBuilderFunc },
) {
const handlerUrl = new URL(handlerId, 'ds://handlers');
const handlerPath: string = path.resolve(
cmd.cwd,
handlerUrl.pathname.slice(1),
);
const handlers = handlersForTest ?? require(handlerPath);
const factoryId: string = handlerUrl.hash.slice(1) || 'main';
const factory: HandlerBuilderFunc = handlers[factoryId];
ok(
typeof factory === 'function',
'A valid handler factory must be requested',
);
const config = await factory(handlerUrl);
ok(typeof config === 'object', 'Handler configuration must be an object');
ok(
typeof config.start === 'function',
'Handler configuration must have a `start` method',
);
const services = await config.start();
if (services.datastores instanceof Map) {
services.datastores = Object.fromEntries(services.datastores.entries());
}
const triggers: Array<RunnerTrigger> = (
config.triggers ?? [
{
datastore: config.datastore,
model: config.model,
source: config.source,
raw: config.raw,
query: config.query,
headers: config.headers,
queryAsJSONSchema: config.queryAsJSONSchema,
processing: config.processing,
},
]
).map((trigger) => ({
datastore: cmd.datastore ?? trigger.datastore,
model: cmd.model ?? trigger.model,
source: cmd.source ?? trigger.source,
raw: cmd.raw ?? trigger.raw ?? false,
query: cmd.query ?? trigger.query ?? {},
headers: cmd.headers ?? trigger.headers ?? {},
queryAsJSONSchema:
cmd.queryAsJSONSchema ?? trigger.queryAsJSONSchema ?? false,
processing: trigger.processing,
}));
return {
services,
triggers,
stop: config.stop,
handler: config.handler,
};
}
export function start() {
return async (
handlerIds: string[],
options: any,
command: any,
handlersForTest?: { [key: string]: HandlerBuilderFunc },
) => {
let services: RunnerServices | undefined = undefined;
try {
metrics.incrementProcessStatus({ state: 'starting' });
log(undefined, options, 'info', '[runner] Starting streaming', {
handler_ids: handlerIds,
options,
});
for (const handlerId of handlerIds) {
const {
services: _services,
triggers,
stop: stopHandler,
handler,
} = await buildHandler(handlerId, options, handlersForTest);
services = _services;
for (const trigger of triggers) {
const datastore = await getDatastoreFromTrigger(
services,
trigger,
options,
);
const query: any = trigger.query;
if (trigger.processing?.field) {
query[trigger.processing?.field] = trigger.processing?.states[0];
}
const streamId = datastore.streams.getStreamId(
trigger.model!,
trigger.source!,
query,
);
datastore.streams.on(
streamId,
localEventHandler(
services,
handler,
handlerId,
trigger.datastore!,
trigger.model!,
trigger.source!,
trigger.raw!,
getProcessingStats({
processingTimeWindowInMilliseconds:
options.processingTimeWindow,
progress: options.progress,
maxParallelEvents: options.initParallelEvents,
}),
trigger.processing,
),
);
init(services, stopHandler, options);
log(services, options, 'info', '[runner] Starting streaming', {
...trigger,
opts: {
reconnectionMaxAttempts: options.maxReconnectionAttempts,
reconnectionInterval: options.reconnectionInterval,
connectionMaxLifeSpanInSeconds:
options.connectionMaxLifeSpanInSeconds,
},
});
await datastore.streams.listen(
trigger.model!,
trigger.source!,
trigger.query,
{
reconnectionMaxAttempts: options.maxReconnectionAttempts,
reconnectionInterval: options.reconnectionInterval,
connectionMaxLifeSpanInSeconds:
options.connectionMaxLifeSpanInSeconds,
queueName: handlerId,
queryAsJSONSchema: trigger.queryAsJSONSchema,
},
);
metrics.incrementProcessStatus({ state: 'started' });
}
}
if (options.heartbeat === true) {
/* istanbul ignore next */
heartbeat().catch((err) => {
/* istanbul ignore next */
log(services, options, 'warn', 'Failed to expose heartbeat', {
err,
});
});
}
} catch (err: any) {
log(services, { verbose: true }, 'error', 'Initialization error', err);
await destroy(services, undefined, options, 'uncaughtException', err);
}
};
}
function debugHandler(
stopHandler: StopFunc,
handler: HandlerFunc,
stats: { count: number },
): HandlerFunc {
return async (entity, metadata) => {
if (stats.count > 0) {
return stopHandler();
}
stats.count += 1;
return handler(entity, metadata);
};
}
export async function replayHandler(
handlerId: string,
options: any,
services: RunnerServices,
triggers: RunnerTrigger[],
stopHandler: StopFunc,
handler: HandlerFunc,
) {
init(services, stopHandler, options);
const pageSize: number = options.debug === true ? 1 : options.pageSize;
log(services, options, 'info', '[runner] Starting the replay', {
handler_id: handlerId,
page_size: pageSize,
triggers,
opts: {
sleep: options.sleep,
},
});
log(services, options, 'debug', '[runner] Replay informations', {
handler_id: handlerId,
triggers,
});
const stats = { count: 0 };
const processingStats = getProcessingStats({
progress: options.progress,
});
await utils.walkMulti(
new Map(
Object.keys(services.datastores).map((k) => [k, services.datastores[k]]),
),
triggers.map((trigger) => {
const query: any = trigger.query;
if (trigger.processing?.field) {
query[trigger.processing?.field] = trigger.processing?.states[0];
}
return {
datastore: trigger.datastore,
model: trigger.model,
query,
source: trigger.source,
headers: trigger.headers,
};
}) as utils.MultiQuery[],
pageSize,
(input, query, queryIteration) => {
return localEventHandler(
services,
options.debug !== true
? handler
: debugHandler(stopHandler, handler, stats),
handlerId,
query.datastore,
query.model,
query.source,
query.raw,
processingStats,
triggers[queryIteration?.query_index]?.processing,
)(input);
},
{
sleep: options.debug === true ? 1 : options.sleep,
is_mutating: options.mutating,
},
);
await stopHandler();
}
export function replay() {
return async (
handlerIds: string[],
options: any,
command: any,
handlersForTest?: { [key: string]: HandlerBuilderFunc },
) => {
let services: RunnerServices | undefined = undefined;
try {
metrics.incrementProcessStatus({ state: 'starting' });
log(undefined, options, 'info', '[runner] Starting replay', {
handler_ids: handlerIds,
options,
});
/* istanbul ignore next */
if (options.heartbeat === true) {
heartbeat().catch((err) => {
/* istanbul ignore next */
log(undefined, options, 'warn', 'Failed to expose heartbeat', {
err,
});
});
}
for (const handlerId of handlerIds) {
const {
services: _services,
triggers,
stop: stopHandler,
handler,
} = await buildHandler(handlerId, options, handlersForTest);
services = _services;
await replayHandler(
handlerId,
options,
services,
triggers,
stopHandler,
handler,
);
}
await destroy(services, undefined, options, 'SIGTERM');
} catch (err: any) {
log(services, { verbose: true }, 'error', 'Replay error', {
err,
response: err?.response?.data,
});
await destroy(services, undefined, options, 'uncaughtException', err);
}
};
}
/* istanbul ignore next */
function commonOptions(command: Command) {
command
.option('--datastore <datastore>', 'Datastore to listen on')
.option('--model <model>', 'Model to listen on')
.option('--source <source>', 'Source of messages')
.option(
'--raw',
'If present, no JSON parsing is applied to messages',
false,
)
.option(
'--timeout <timeout>',
'Requests timeout in milliseconds',
(v) => Number.parseInt(v, 10),
30000,
)
.option(
'--exit-timeout <timeout>',
'Graceful exit timeout',
(v) => Number.parseInt(v, 10),
NaN,
)
.option('--verbose', 'Display runner logs', false)
.option('--safe', 'Executes all handlers even with errors', false)
.option('--heartbeat', 'If present, an heartbeat route is exposed', false)
.option(
'--cwd <cwd>',
'Current working directory',
process.env.DATASTORE_RUNNER_CWD || '',
);
return command;
}
/* istanbul ignore next */
export default function register(name = 'runner') {
const program = new Command(name);
const startCommand = program.command('start <handlerIds...>');
commonOptions(startCommand);
startCommand
.option(
'--query <query>',
'Forced query',
(v) => !!v && JSON.parse(v),
null,
)
.option(
'--ptw, --processing-time-window <processing_time_window>',
'Event processing time window',
(v) => Number.parseInt(v, 10),
Infinity,
)
.option(
'-p, --progress <progress>',
'Progress feedback interval',
(v) => Number.parseInt(v, 10),
1_000,
)
.option(
'--ipr, --init-parallel-events <init_parallel_events>',
'Initial maximal parallel events for sampling',
(v) => Number.parseInt(v, 10),
10,
)
.option(
'--max-reconnection-attempts <max_reconnection_attempts>',
'Max reconnection attempts in case of error or unexpected closing',
(v) => Number.parseInt(v, 10),
1000,
)
.option(
'--reconnection-interval <reconnection_interval>',
'Interval in milliseconds between two reconnection attempts',
(v) => Number.parseInt(v, 10),
100,
)
.option(
'--connection-max-life-span-in-seconds <connection_max_life_span_in_seconds>',
'Max life span in seconds of a Datastore HTTP streaming connection',
(v) => Number.parseInt(v, 10),
3600,
)
.description('Start the queries handlers')
.action(start());
const replayCommand = program.command('replay <handlerIds...>');
commonOptions(replayCommand);
replayCommand
.option('--date-field <dateField>', 'Date field', 'created_at')
.option(
'--page-size <pageSize>',
'Page size',
(v) => Number.parseInt(v, 10),
100,
)
.option(
'-s, --sleep <sleep>',
'Sleep period between each batch',
(v) => Number.parseInt(v, 10),
0,
)
.option(
'--query <query>',
'Forced query',
(v) => !!v && JSON.parse(v),
null,
)
.option(
'--mutating',
'Activates replay mode with possible mutations',
false,
)
.option(
'-p, --progress <progress>',
'Progress feedback interval',
(v) => Number.parseInt(v, 10),
1_000,
)
.option('--debug', 'Execute the handler only once', false)
.description('Replay events in order')
.action(replay());
return program;
}