UNPKG

durable-execution

Version:

A durable task engine for running tasks durably and resiliently

1,449 lines (1,365 loc) 58.8 kB
import type { StandardSchemaV1 } from '@standard-schema/spec' import { getErrorMessage } from '@gpahal/std/errors' import { sleep } from '@gpahal/std/promises' import { createCancellablePromise, createCancelSignal, type CancelSignal } from './cancel' import { convertDurableExecutionErrorToStorageObject, DurableExecutionCancelledError, DurableExecutionError, DurableExecutionTimedOutError, } from './errors' import { createConsoleLogger, createLoggerDebugDisabled, type Logger } from './logger' import { createSuperjsonSerializer, WrappedSerializer, type Serializer } from './serializer' import { convertTaskExecutionStorageObjectToTaskExecution, createDurableTaskExecutionStorageObject, getDurableTaskExecutionStorageObjectParentError, updateTaskExecutionsWithLimit, type DurableStorage, type DurableStorageTx, type DurableTaskExecutionStorageObject, type DurableTaskExecutionStorageObjectUpdate, } from './storage' import { ACTIVE_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS, FINISHED_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS, type DurableChildTaskExecution, type DurableChildTaskExecutionOutput, type DurableParentTaskOptions, type DurableTask, type DurableTaskEnqueueOptions, type DurableTaskFinishedExecution, type DurableTaskHandle, type DurableTaskOptions, type DurableTaskRunContext, } from './task' import { DurableTaskInternal, generateTaskExecutionId } from './task-internal' import { generateId, sleepWithJitter, summarizeStandardSchemaIssues } from './utils' export { type CancelSignal, createCancelSignal, createTimeoutCancelSignal, createCancellablePromise, } from './cancel' export { DurableExecutionError, DurableExecutionTimedOutError, DurableExecutionCancelledError, type DurableExecutionErrorStorageObject, } from './errors' export { type Logger, createConsoleLogger } from './logger' export type { DurableTask, DurableTaskCommonOptions, DurableTaskRetryOptions, DurableTaskOptions, DurableParentTaskOptions, DurableFinalizeTaskOptions, DurableTaskOnChildrenCompleteInput, DurableTaskRunContext, DurableTaskExecution, DurableTaskFinishedExecution, DurableTaskReadyExecution, DurableTaskRunningExecution, DurableTaskFailedExecution, DurableTaskTimedOutExecution, DurableTaskCancelledExecution, DurableTaskWaitingForChildrenTasksExecution, DurableTaskChildrenTasksFailedExecution, DurableTaskWaitingForFinalizeTaskExecution, DurableTaskFinalizeTaskFailedExecution, DurableTaskCompletedExecution, DurableChildTask, DurableChildTaskExecution, DurableChildTaskExecutionOutput, DurableChildTaskExecutionError, DurableChildTaskExecutionErrorStorageObject, DurableTaskExecutionStatusStorageObject, DurableTaskEnqueueOptions, DurableTaskHandle, } from './task' export { type Serializer, createSuperjsonSerializer, WrappedSerializer } from './serializer' export { createInMemoryStorage, createTransactionMutex, type TransactionMutex, type DurableStorage, type DurableStorageTx, type DurableTaskExecutionStorageObject, type DurableTaskExecutionStorageWhere, type DurableTaskExecutionStorageObjectUpdate, } from './storage' /** * A durable executor. It is used to execute durable tasks. * * Multiple durable executors can share the same storage. In such a case, all the tasks should be * present for all the durable executors. The work is distributed among the durable executors. * See the [usage](https://gpahal.github.io/durable-execution/index.html#usage) and * [task examples](https://gpahal.github.io/durable-execution/index.html#task-examples) * sections for more details on creating and enqueuing tasks. * * @example * ```ts * const executor = new DurableExecutor(storage) * * // Create tasks * const extractFileTitle = executor * .inputSchema(v.object({ filePath: v.string() })) * .task({ * id: 'extractFileTitle', * timeoutMs: 30_000, // 30 seconds * run: async (ctx, input) => { * // ... extract the file title * return { * title: 'File Title', * } * }, * }) * * const summarizeFile = executor * .validateInput(async (input: { filePath: string }) => { * if (!isValidFilePath(input.filePath)) { * throw new Error('Invalid file path') * } * return { * filePath: input.filePath, * } * }) * .task({ * id: 'summarizeFile', * timeoutMs: 30_000, // 30 seconds * run: async (ctx, input) => { * // ... summarize the file * return { * summary: 'File summary', * } * }, * }) * * const uploadFile = executor * .inputSchema(v.object({ filePath: v.string(), uploadUrl: v.string() })) * .parentTask({ * id: 'uploadFile', * timeoutMs: 60_000, // 1 minute * runParent: async (ctx, input) => { * // ... upload file to the given uploadUrl * // Extract the file title and summarize the file in parallel * return { * output: { * filePath: input.filePath, * uploadUrl: input.uploadUrl, * fileSize: 100, * }, * childrenTasks: [ * { * task: extractFileTitle, * input: { filePath: input.filePath }, * }, * { * task: summarizeFile, * input: { filePath: input.filePath }, * }, * ], * } * }, * finalizeTask: { * id: 'onUploadFileAndChildrenComplete', * timeoutMs: 60_000, // 1 minute * run: async (ctx, { input, output, childrenTasksOutputs }) => { * // ... combine the output of the run function and children tasks * return { * filePath: input.filePath, * uploadUrl: input.uploadUrl, * fileSize: 100, * title: 'File Title', * summary: 'File summary', * } * } * }, * }) * * async function app() { * // Enqueue task and manage its execution lifecycle * const uploadFileHandle = await executor.enqueueTask(uploadFile, {filePath: 'file.txt'}) * const uploadFileExecution = await uploadFileHandle.getExecution() * const uploadFileFinishedExecution = await uploadFileHandle.waitAndGetExecution() * await uploadFileHandle.cancel() * * console.log(uploadFileExecution) * } * * // Start the durable executor and run the app * await Promise.all([ * executor.start(), // Start the durable executor in the background * app(), // Run the app * ]) * * // Shutdown the durable executor when the app is done * await executor.shutdown() * ``` * * @category Executor */ export class DurableExecutor { private readonly storage: DurableStorage private readonly serializer: Serializer private readonly logger: Logger private readonly expireMs: number private readonly backgroundProcessIntraBatchSleepMs: number private readonly taskInternalsMap: Map<string, DurableTaskInternal> private readonly runningTaskExecutionsMap: Map< string, { promise: Promise<void> cancel: () => void } > private readonly shutdownSignal: CancelSignal private readonly internalCancel: () => void private startPromise: Promise<void> | undefined /** * Create a durable executor. * * @param storage - The storage to use for the durable executor. * @param options - The options for the durable executor. * @param options.serializer - The serializer to use for the durable executor. If not provided, a * default serializer using superjson will be used. * @param options.logger - The logger to use for the durable executor. If not provided, a console * logger will be used. * @param options.enableDebug - Whether to enable debug logging. If `true`, debug logging will * be enabled. */ constructor( storage: DurableStorage, { serializer, logger, enableDebug = false, expireMs, backgroundProcessIntraBatchSleepMs, }: { serializer?: Serializer logger?: Logger enableDebug?: boolean expireMs?: number backgroundProcessIntraBatchSleepMs?: number } = {}, ) { this.storage = storage this.serializer = new WrappedSerializer(serializer ?? createSuperjsonSerializer()) this.logger = logger ?? createConsoleLogger('DurableExecutor') if (!enableDebug) { this.logger = createLoggerDebugDisabled(this.logger) } this.expireMs = expireMs && expireMs > 0 ? expireMs : 60_000 // 1 minute this.backgroundProcessIntraBatchSleepMs = backgroundProcessIntraBatchSleepMs && backgroundProcessIntraBatchSleepMs > 0 ? backgroundProcessIntraBatchSleepMs : 500 // 500ms this.taskInternalsMap = new Map() this.runningTaskExecutionsMap = new Map() const [cancelSignal, cancel] = createCancelSignal({ logger: this.logger }) this.shutdownSignal = cancelSignal this.internalCancel = cancel } /** * Execute a function with a transaction. Supports retries. * * @param fn - The function to execute. * @param maxRetryAttempts - The maximum number of times to retry the transaction. * @returns The result of the function. */ private async withTransaction<T>( fn: (tx: DurableStorageTx) => Promise<T>, maxRetryAttempts = 1, ): Promise<T> { if (maxRetryAttempts <= 0) { return await this.storage.withTransaction(fn) } for (let i = 0; ; i++) { try { return await this.storage.withTransaction(fn) } catch (error) { if (error instanceof DurableExecutionError && !error.isRetryable) { throw error } if (i === maxRetryAttempts) { throw error } this.logger.error('Error in withTransaction', error) await sleepWithJitter(50) } } } private throwIfShutdown(): void { if (this.shutdownSignal.isCancelled()) { throw new DurableExecutionError('Executor shutdown', false) } } /** * Start the durable executor. Starts a background process. Use {@link DurableExecutor.shutdown} * to stop the durable executor. */ async start(): Promise<void> { this.throwIfShutdown() this.startPromise = this.startBackgroundProcesses() await this.startPromise } private async startBackgroundProcesses(): Promise<void> { await Promise.all([ this.closeFinishedTaskExecutions(), this.retryExpiredRunningTaskExecutions(), this.cancelNeedPromiseCancellationTaskExecutions(), this.processReadyTaskExecutions(), ]) } /** * Shutdown the durable executor. Cancels all active executions and stops the background * process. * * On shutdown, these happen in this order: * - Stop enqueuing new tasks * - Stop background processes after the current iteration * - Wait for active task executions to finish. Task execution context contains a shutdown signal * that can be used to gracefully shutdown the task when executor is shutting down. */ async shutdown(): Promise<void> { this.logger.info('Shutting down durable executor') if (!this.shutdownSignal.isCancelled()) { try { this.internalCancel() } catch (error) { this.logger.error('Error in cancelling executor', error) } } this.logger.info('Executor cancelled. Waiting for background processes to stop') if (this.startPromise) { await this.startPromise } this.logger.info('Background processes stopped. Waiting for active task executions to finish') const runningExecutions = [...this.runningTaskExecutionsMap.entries()] for (const [executionId, runningExecution] of runningExecutions) { try { await runningExecution.promise this.runningTaskExecutionsMap.delete(executionId) } catch (error) { this.logger.error(`Error in waiting for running task execution ${executionId}`, error) } } this.logger.info('Active task executions finished. Durable executor shut down') } /** * Add a task to the durable executor. See the * [task examples](https://gpahal.github.io/durable-execution/index.html#task-examples) section * for more details on creating tasks. * * @param taskOptions - The task options. See {@link DurableTaskOptions} for more details on the * task options. * @returns The durable task. */ task<TInput = unknown, TOutput = unknown>( taskOptions: DurableTaskOptions<TInput, TOutput>, ): DurableTask<TInput, TOutput> { this.throwIfShutdown() const taskInternal = DurableTaskInternal.fromDurableTaskOptions( this.taskInternalsMap, taskOptions, ) this.logger.debug(`Added task ${taskOptions.id}`) return { id: taskInternal.id, } as DurableTask<TInput, TOutput> } /** * Add a parent task to the durable executor. See the * [task examples](https://gpahal.github.io/durable-execution/index.html#task-examples) section * for more details on creating parent tasks. * * @param parentTaskOptions - The parent task options. See {@link DurableParentTaskOptions} for * more details on the parent task options. * @returns The durable parent task. */ parentTask< TInput = unknown, TRunOutput = unknown, TOutput = { output: TRunOutput childrenTasksOutputs: Array<DurableChildTaskExecutionOutput> }, TFinalizeTaskRunOutput = unknown, >( parentTaskOptions: DurableParentTaskOptions< TInput, TRunOutput, TOutput, TFinalizeTaskRunOutput >, ): DurableTask<TInput, TOutput> { this.throwIfShutdown() const taskInternal = DurableTaskInternal.fromDurableParentTaskOptions( this.taskInternalsMap, parentTaskOptions, ) this.logger.debug(`Added parent task ${parentTaskOptions.id}`) return { id: taskInternal.id, } as DurableTask<TInput, TOutput> } validateInput<TRunInput, TInput>( validateInputFn: (input: TInput) => TRunInput | Promise<TRunInput>, ): { task: <TOutput = unknown>( taskOptions: DurableTaskOptions<TRunInput, TOutput>, ) => DurableTask<TInput, TOutput> parentTask: < TRunOutput = unknown, TOutput = { output: TRunOutput childrenTasksOutputs: Array<DurableChildTaskExecutionOutput> }, TFinalizeTaskRunOutput = unknown, >( parentTaskOptions: DurableParentTaskOptions< TRunInput, TRunOutput, TOutput, TFinalizeTaskRunOutput >, ) => DurableTask<TInput, TOutput> } { this.throwIfShutdown() const wrappedValidateInputFn = async (id: string, input: TInput) => { try { const runInput = await validateInputFn(input) return runInput } catch (error) { throw new DurableExecutionError( `Invalid input to task ${id}: ${getErrorMessage(error)}`, false, ) } } return this.withValidateInputInternal(wrappedValidateInputFn) } inputSchema<TInputSchema extends StandardSchemaV1>( inputSchema: TInputSchema, ): { task: <TOutput = unknown>( taskOptions: DurableTaskOptions<StandardSchemaV1.InferOutput<TInputSchema>, TOutput>, ) => DurableTask<StandardSchemaV1.InferInput<TInputSchema>, TOutput> parentTask: < TRunOutput = unknown, TOutput = { output: TRunOutput childrenTasksOutputs: Array<DurableChildTaskExecutionOutput> }, >( parentTaskOptions: DurableParentTaskOptions< StandardSchemaV1.InferOutput<TInputSchema>, TRunOutput, TOutput >, ) => DurableTask<StandardSchemaV1.InferInput<TInputSchema>, TOutput> } { this.throwIfShutdown() const validateInputFn = async ( id: string, input: StandardSchemaV1.InferInput<TInputSchema>, ): Promise<StandardSchemaV1.InferOutput<TInputSchema>> => { try { const validateResult = await inputSchema['~standard'].validate(input) if (validateResult.issues != null) { throw new DurableExecutionError( `Invalid input to task ${id}: ${summarizeStandardSchemaIssues(validateResult.issues)}`, false, ) } return validateResult.value } catch (error) { if (error instanceof DurableExecutionError) { throw error } throw new DurableExecutionError( `Invalid input to task ${id}: ${getErrorMessage(error)}`, false, ) } } return this.withValidateInputInternal(validateInputFn) } private withValidateInputInternal<TRunInput, TInput>( validateInputFn: (id: string, input: TInput) => TRunInput | Promise<TRunInput>, ): { task: <TOutput>( taskOptions: DurableTaskOptions<TRunInput, TOutput>, ) => DurableTask<TInput, TOutput> parentTask: < TRunOutput = unknown, TOutput = { output: TRunOutput childrenTasksOutputs: Array<DurableChildTaskExecutionOutput> }, TFinalizeTaskRunOutput = unknown, >( parentTaskOptions: DurableParentTaskOptions< TRunInput, TRunOutput, TOutput, TFinalizeTaskRunOutput >, ) => DurableTask<TInput, TOutput> } { this.throwIfShutdown() return { task: <TOutput>( taskOptions: DurableTaskOptions<TRunInput, TOutput>, ): DurableTask<TInput, TOutput> => { this.throwIfShutdown() const taskInternal = DurableTaskInternal.fromDurableTaskOptions( this.taskInternalsMap, taskOptions, validateInputFn, ) this.logger.debug(`Added task ${taskOptions.id}`) return { id: taskInternal.id, } as DurableTask<TInput, TOutput> }, parentTask: < TRunOutput = unknown, TOutput = { output: TRunOutput childrenTasksOutputs: Array<DurableChildTaskExecutionOutput> }, TFinalizeTaskRunOutput = unknown, >( parentTaskOptions: DurableParentTaskOptions< TRunInput, TRunOutput, TOutput, TFinalizeTaskRunOutput >, ): DurableTask<TInput, TOutput> => { this.throwIfShutdown() const taskInternal = DurableTaskInternal.fromDurableParentTaskOptions( this.taskInternalsMap, parentTaskOptions, validateInputFn, ) this.logger.debug(`Added parent task ${parentTaskOptions.id}`) return { id: taskInternal.id, } as DurableTask<TInput, TOutput> }, } } /** * Create a new task that runs a sequence of tasks sequentially. * * The tasks list must be a list of tasks that are compatible with each other. The input of any * task must be the same as the output of the previous task. The output of the last task will be * the output of the sequential task. * * The tasks list cannot be empty. * * @param tasks - The tasks to run sequentially. * @returns The sequential task. */ sequentialTasks<T extends ReadonlyArray<DurableTask<unknown, unknown>>>( ...tasks: SequentialDurableTasks<T> ): DurableTask<ExtractDurableTaskInput<T[0]>, ExtractDurableTaskOutput<LastElement<T>>> { if (tasks.length === 0) { throw new DurableExecutionError('No tasks provided', false) } if (tasks.length === 1) { return tasks[0] } const firstTask = tasks[0] const secondTask = this.sequentialTasks( ...(tasks.slice(1) as SequentialDurableTasks<ReadonlyArray<DurableTask<unknown, unknown>>>), ) const taskId = `st_${generateId(16)}` return this.parentTask({ id: taskId, timeoutMs: 1000, runParent: (_, input) => { return { output: undefined, childrenTasks: [{ task: firstTask, input }], } }, finalizeTask: { id: `${taskId}_finalize_1`, timeoutMs: 1000, runParent: (_, { childrenTasksOutputs }) => { const firstTaskOutput = childrenTasksOutputs[0]!.output return { output: undefined, childrenTasks: [{ task: secondTask, input: firstTaskOutput }], } }, finalizeTask: { id: `${taskId}_finalize_2`, timeoutMs: 1000, run: (_, { childrenTasksOutputs }) => { const secondTaskOutput = childrenTasksOutputs[0]!.output as ExtractDurableTaskOutput< LastElement<T> > return secondTaskOutput }, }, }, }) } /** * Enqueue a task for execution. * * @param task - The task to enqueue. * @param input - The input to the task. * @returns A handle to the task execution. */ async enqueueTask<TTask extends DurableTask<unknown, unknown>>( task: TTask, input: ExtractDurableTaskInput<TTask>, options: DurableTaskEnqueueOptions = {}, ): Promise<DurableTaskHandle<ExtractDurableTaskOutput<TTask>>> { this.throwIfShutdown() const taskInternal = this.taskInternalsMap.get(task.id) if (!taskInternal) { throw new DurableExecutionError( `Task ${task.id} not found. Use DurableExecutor.task() to add it before enqueuing it.`, false, ) } const runInput = await taskInternal.validateInput(input) const executionId = generateTaskExecutionId() const now = new Date() const retryOptions = taskInternal.getRetryOptions(options) const sleepMsBeforeRun = taskInternal.getSleepMsBeforeRun(options) const timeoutMs = taskInternal.getTimeoutMs(options) await this.withTransaction(async (tx) => { await tx.insertTaskExecutions([ createDurableTaskExecutionStorageObject({ now, taskId: taskInternal.id, executionId, retryOptions, sleepMsBeforeRun, timeoutMs, runInput: this.serializer.serialize(runInput), }), ]) }) this.logger.debug(`Enqueued task ${task.id} with execution id ${executionId}`) return this.getTaskHandleInternal(taskInternal.id, executionId) } /** * Get a handle to a task execution. * * @param task - The task to get the handle for. * @param executionId - The id of the execution to get the handle for. * @returns The handle to the task execution. */ async getTaskHandle<TInput = unknown, TOutput = unknown>( task: DurableTask<TInput, TOutput>, executionId: string, ): Promise<DurableTaskHandle<TOutput>> { const taskInternal = this.taskInternalsMap.get(task.id) if (!taskInternal) { throw new DurableExecutionError( `Task ${task.id} not found. Use DurableExecutor.task() to add it before enqueuing it.`, false, ) } const execution = await this.withTransaction(async (tx) => { const executions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: [executionId], }) return executions.length === 0 ? undefined : executions[0] }) if (!execution) { throw new DurableExecutionError(`Execution ${executionId} not found`, false) } return this.getTaskHandleInternal(task.id, executionId) } private getTaskHandleInternal<TOutput>( taskId: string, executionId: string, ): DurableTaskHandle<TOutput> { return { getTaskId: () => taskId, getTaskExecutionId: () => executionId, getTaskExecution: async () => { const execution = await this.withTransaction(async (tx) => { const executions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: [executionId], }) return executions.length === 0 ? undefined : executions[0] }) if (!execution) { throw new DurableExecutionError(`Execution ${executionId} not found`, false) } return convertTaskExecutionStorageObjectToTaskExecution(execution, this.serializer) }, waitAndGetTaskFinishedExecution: async ({ signal, pollingIntervalMs, }: { signal?: CancelSignal | AbortSignal pollingIntervalMs?: number } = {}) => { const cancelSignal = signal instanceof AbortSignal ? createCancelSignal({ abortSignal: signal, logger: this.logger })[0] : signal const resolvedPollingIntervalMs = pollingIntervalMs && pollingIntervalMs > 0 ? pollingIntervalMs : 1000 let isFirstIteration = true while (true) { if (cancelSignal?.isCancelled()) { throw new DurableExecutionCancelledError() } if (isFirstIteration) { isFirstIteration = false } else { await createCancellablePromise(sleep(resolvedPollingIntervalMs), cancelSignal) if (cancelSignal?.isCancelled()) { throw new DurableExecutionCancelledError() } } const execution = await this.withTransaction(async (tx) => { const executions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: [executionId], }) return executions.length === 0 ? undefined : executions[0] }) if (!execution) { throw new DurableExecutionError(`Execution ${executionId} not found`, false) } if (FINISHED_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS.includes(execution.status)) { return convertTaskExecutionStorageObjectToTaskExecution( execution, this.serializer, ) as DurableTaskFinishedExecution<TOutput> } else { this.logger.debug( `Waiting for task ${executionId} to be finished. Status: ${execution.status}`, ) } } }, cancel: async () => { const now = new Date() await this.withTransaction(async (tx) => { await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [executionId], statuses: ACTIVE_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS, }, { error: convertDurableExecutionErrorToStorageObject( new DurableExecutionCancelledError(), ), status: 'cancelled', needsPromiseCancellation: true, finishedAt: now, updatedAt: now, }, ) }) this.logger.debug(`Cancelled function ${executionId}`) }, } } private async runBackgroundProcess( processName: string, singleBatchProcessFn: () => Promise<boolean>, ): Promise<void> { let consecutiveErrors = 0 const maxConsecutiveErrors = 10 let backoffMs = 1000 while (true) { if (this.shutdownSignal.isCancelled()) { this.logger.info(`Executor cancelled. Stopping ${processName}`) return } try { const hasNonEmptyResult = await createCancellablePromise( singleBatchProcessFn(), this.shutdownSignal, ) if (!hasNonEmptyResult) { await sleepWithJitter(this.backgroundProcessIntraBatchSleepMs) } consecutiveErrors = 0 backoffMs = 1000 } catch (error) { if (error instanceof DurableExecutionCancelledError) { this.logger.info(`Executor cancelled. Stopping ${processName}`) return } consecutiveErrors++ this.logger.error(`Error in ${processName}: consecutive_errors=${consecutiveErrors}`, error) const isRetryableError = error instanceof DurableExecutionError ? error.isRetryable : true const waitTime = isRetryableError ? Math.min(backoffMs, 5000) : backoffMs if (consecutiveErrors >= maxConsecutiveErrors) { this.logger.error( `Too many consecutive errors (${consecutiveErrors}) in ${processName}. Backing off for ${backoffMs}ms before retrying.`, ) await sleepWithJitter(waitTime) backoffMs = Math.min(backoffMs * 2, 30_000) consecutiveErrors = 0 } else { await sleepWithJitter(waitTime) } } } } private async closeFinishedTaskExecutions(): Promise<void> { return this.runBackgroundProcess('closing finished task executions', () => this.closeFinishedTaskExecutionsSingleBatch(), ) } /** * Close finished task executions. */ private async closeFinishedTaskExecutionsSingleBatch(): Promise<boolean> { return await this.withTransaction(async (tx) => { const now = new Date() const updatedExecutionIds = await updateTaskExecutionsWithLimit( tx, { type: 'by_statuses', statuses: FINISHED_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS, isClosed: false, }, { isClosed: true, updatedAt: now }, 1, ) this.logger.debug(`Closing ${updatedExecutionIds.length} finished task executions`) if (updatedExecutionIds.length === 0) { return false } const executions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: updatedExecutionIds, }) if (executions.length !== updatedExecutionIds.length) { throw new DurableExecutionError('Some task executions not found', true) } for (const execution of executions) { await this.closeFinishedTaskExecutionParent(tx, execution, now) await this.closeFinishedTaskExecutionChildren(tx, execution, now) } return true }) } /** * Close finished task execution parent. If the execution status is completed, it will update the * status of the parent to completed if it is waiting for children and all the children are * completed. If the parent execution has already finished, it will just update the children * state. */ private async closeFinishedTaskExecutionParent( tx: DurableStorageTx, execution: DurableTaskExecutionStorageObject, now: Date, ): Promise<void> { if (!execution.parentTask) { return } const parentTaskInternal = this.taskInternalsMap.get(execution.parentTask.taskId) if (!parentTaskInternal) { this.logger.error(`Parent task ${execution.parentTask.taskId} not found`) return } const parentExecutions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: [execution.parentTask.executionId], }) if (parentExecutions.length === 0) { this.logger.error(`Parent task execution ${execution.parentTask.executionId} not found`) return } const parentExecution = parentExecutions[0]! const status = execution.status const parentChildren = parentExecution.childrenTasks ?? [] // Handle finished finalize task child. if (execution.parentTask.isFinalizeTask) { const parentExecutionFinalizeTask = parentExecution.finalizeTask if (parentExecution.status === 'waiting_for_finalize_task' && status === 'completed') { // If the parent execution is waiting for the finalize task to complete, and it got // completed, update the output and status to completed. We're done with the parent task // execution. await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [parentExecution.executionId] }, { output: execution.output!, finalizeTask: parentExecutionFinalizeTask, unsetError: true, status: 'completed', finishedAt: now, updatedAt: now, }, ) } else if (status !== 'completed') { // If the child execution is failed, mark the parent execution as failed if it was not // finished. await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [parentExecution.executionId] }, { finalizeTaskError: getDurableTaskExecutionStorageObjectParentError(execution), status: parentExecution.status === 'waiting_for_finalize_task' ? 'finalize_task_failed' : parentExecution.status, finishedAt: now, updatedAt: now, }, ) } return } // Handle finished child. const childIdx = parentChildren.findIndex( (child) => child.executionId === execution.executionId, ) if (childIdx === -1) { this.logger.error( `Child execution ${execution.executionId} not found for parent task execution ${execution.parentTask.executionId}`, ) return } if (status === 'completed') { const areAllChildrenCompleted = parentExecution.childrenTasksCompletedCount >= parentChildren.length - 1 if (parentExecution.status === 'waiting_for_children_tasks' && areAllChildrenCompleted) { const childExecutionIdToIndexMap = new Map<string, number>( parentChildren.map((parentChild, index) => [parentChild.executionId, index]), ) // If the parent execution is waiting for all the children to complete, and all the children // are completed, we can run the finalize task if present, otherwise we can just mark the // parent execution as completed. const childrenExecutions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: parentChildren.map((child) => child.executionId), }) const childrenTasksOutputs = childrenExecutions.map((childExecution) => { return { index: childExecutionIdToIndexMap.get(childExecution.executionId)!, taskId: childExecution.taskId, executionId: childExecution.executionId, output: this.serializer.deserialize(childExecution.output!), } }) childrenTasksOutputs.sort((a, b) => a.index - b.index) if (parentTaskInternal.finalizeTask) { const finalizeTaskTaskInternal = this.taskInternalsMap.get( parentTaskInternal.finalizeTask.id, ) if (!finalizeTaskTaskInternal) { throw new DurableExecutionError( `Parent finalize task ${parentTaskInternal.finalizeTask.id} not found`, false, ) } const finalizeTaskInput = { input: this.serializer.deserialize(parentExecution.runInput), output: this.serializer.deserialize(parentExecution.runOutput!), childrenTasksOutputs, } const finalizeTaskRunInput = await finalizeTaskTaskInternal.validateInput(finalizeTaskInput) const executionId = generateTaskExecutionId() const retryOptions = finalizeTaskTaskInternal.getRetryOptions() const sleepMsBeforeRun = finalizeTaskTaskInternal.getSleepMsBeforeRun() const timeoutMs = finalizeTaskTaskInternal.getTimeoutMs() await tx.insertTaskExecutions([ createDurableTaskExecutionStorageObject({ now, rootTask: execution.rootTask, parentTask: { ...execution.parentTask, isFinalizeTask: true, }, taskId: finalizeTaskTaskInternal.id, executionId, retryOptions, sleepMsBeforeRun, timeoutMs, runInput: this.serializer.serialize(finalizeTaskRunInput), }), ]) await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [parentExecution.executionId] }, { childrenTasksCompletedCount: parentExecution.childrenTasksCompletedCount + 1, finalizeTask: { taskId: finalizeTaskTaskInternal.id, executionId, }, unsetError: true, status: 'waiting_for_finalize_task', updatedAt: now, }, ) } else { await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [parentExecution.executionId] }, { output: parentTaskInternal.disableChildrenTasksOutputsInOutput ? parentExecution.runOutput! : this.serializer.serialize({ output: this.serializer.deserialize(parentExecution.runOutput!), childrenTasksOutputs, }), childrenTasksCompletedCount: parentExecution.childrenTasksCompletedCount + 1, unsetError: true, status: 'completed', finishedAt: now, updatedAt: now, }, ) } } else { // If the parent execution is finished or some children haven't finished yet, we can just // update the children count and children. await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [parentExecution.executionId], }, { childrenTasksCompletedCount: parentExecution.childrenTasksCompletedCount + 1, updatedAt: now, }, ) } } else { // If the child failed, update the children errors. Update the parent execution status if it // is waiting for children to finish. Otherwise, the parent execution status is not updated // because it has already finished (failed). const childrenTasksErrors = parentExecution.childrenTasksErrors ?? [] childrenTasksErrors.push({ index: childIdx, taskId: execution.taskId, executionId: execution.executionId, error: getDurableTaskExecutionStorageObjectParentError(execution), }) await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [parentExecution.executionId] }, { childrenTasksErrors, status: parentExecution.status === 'waiting_for_children_tasks' ? 'children_tasks_failed' : parentExecution.status, finishedAt: now, updatedAt: now, }, ) } } /** * Close finished task execution children and cancel running children task executions. */ private async closeFinishedTaskExecutionChildren( tx: DurableStorageTx, execution: DurableTaskExecutionStorageObject, now: Date, ): Promise<void> { if ( !execution.childrenTasks || execution.childrenTasks.length === 0 || execution.status === 'completed' ) { return } const childrenExecutionIds = execution.childrenTasks.map((child) => child.executionId) await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: childrenExecutionIds, statuses: ACTIVE_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS, }, { error: convertDurableExecutionErrorToStorageObject( new DurableExecutionCancelledError( `Parent task ${execution.taskId} with execution id ${execution.executionId} failed: ${getDurableTaskExecutionStorageObjectParentError(execution).message}`, ), ), status: 'cancelled', needsPromiseCancellation: true, finishedAt: now, updatedAt: now, }, ) } private async retryExpiredRunningTaskExecutions(): Promise<void> { return this.runBackgroundProcess('retrying expired running task executions', () => this.retryExpiredRunningTaskExecutionsSingleBatch(), ) } /** * Retry expired running task executions. This will only happen when the process running the * execution previously crashed. */ private async retryExpiredRunningTaskExecutionsSingleBatch(): Promise<boolean> { return await this.withTransaction(async (tx) => { const now = new Date() const executionIds = await updateTaskExecutionsWithLimit( tx, { type: 'by_statuses', statuses: ['running'], isClosed: false, expiresAtLessThan: now, }, { error: convertDurableExecutionErrorToStorageObject( new DurableExecutionError('Task expired', false), ), status: 'ready', startAt: now, unsetExpiresAt: true, updatedAt: now, }, 3, ) this.logger.debug(`Expiring ${executionIds.length} running task executions`) return executionIds.length > 0 }) } private async cancelNeedPromiseCancellationTaskExecutions(): Promise<void> { return this.runBackgroundProcess('cancelling promise cancellation task executions', () => this.cancelNeedPromiseCancellationTaskExecutionsSingleBatch(), ) } /** * Cancel task executions that need promise cancellation. */ private async cancelNeedPromiseCancellationTaskExecutionsSingleBatch(): Promise<boolean> { if (this.runningTaskExecutionsMap.size === 0) { return false } return await this.withTransaction(async (tx) => { const now = new Date() const executionIds = await updateTaskExecutionsWithLimit( tx, { type: 'by_execution_ids', executionIds: [...this.runningTaskExecutionsMap.keys()], needsPromiseCancellation: true, }, { needsPromiseCancellation: false, updatedAt: now, }, 5, ) this.logger.debug( `Cancelling ${executionIds.length} task executions that need promise cancellation`, ) if (executionIds.length === 0) { return false } for (const executionId of executionIds) { const runningExecution = this.runningTaskExecutionsMap.get(executionId) if (runningExecution) { try { runningExecution.cancel() } catch (error) { this.logger.error(`Error in cancelling task ${executionId}`, error) } this.runningTaskExecutionsMap.delete(executionId) } } return true }) } private async processReadyTaskExecutions(): Promise<void> { return this.runBackgroundProcess('processing ready task executions', () => this.processReadyTaskExecutionsSingleBatch(), ) } /** * Process task executions that are ready to run based on status being ready and startAt * being in the past. */ private async processReadyTaskExecutionsSingleBatch(): Promise<boolean> { return await this.withTransaction(async (tx) => { const now = new Date() const executionIds = await updateTaskExecutionsWithLimit( tx, { type: 'by_start_at_less_than', statuses: ['ready'], startAtLessThan: now, }, { status: 'running', startedAt: now, updatedAt: now }, 1, ) this.logger.debug(`Processing ${executionIds.length} ready task executions`) if (executionIds.length === 0) { return false } const executions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds, }) if (executions.length !== executionIds.length) { throw new DurableExecutionError('Some task executions not found', true) } const toRunExecutions = [] as Array<[DurableTaskInternal, DurableTaskExecutionStorageObject]> for (const execution of executions) { const taskInternal = this.taskInternalsMap.get(execution.taskId) if (!taskInternal) { // This will only happen if the task is not registered in this executor. // Mark the execution as failed. await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [execution.executionId], statuses: ACTIVE_TASK_EXECUTION_STATUSES_STORAGE_OBJECTS, }, { error: convertDurableExecutionErrorToStorageObject( new DurableExecutionError('Task not found', false), ), status: 'failed', finishedAt: now, updatedAt: now, }, ) continue } const expireMs = execution.timeoutMs + this.expireMs const expiresAt = new Date(now.getTime() + expireMs) await tx.updateTaskExecutions( { type: 'by_execution_ids', executionIds: [execution.executionId] }, { expiresAt, updatedAt: now }, ) execution.expiresAt = expiresAt toRunExecutions.push([taskInternal, execution]) } for (const [taskInternal, execution] of toRunExecutions) { this.runTaskExecutionWithCancelSignal(taskInternal, execution) } return true }) } /** * Run a task execution with a cancel signal. It is expected to be in running state. * * It will add the execution to the running executions map and make sure it is removed from the * running executions map if the task execution completes. If the process crashes, the execution * will be retried later on expiration. * * @param taskInternal - The task internal to run. * @param execution - The task execution to run. */ private runTaskExecutionWithCancelSignal( taskInternal: DurableTaskInternal, execution: DurableTaskExecutionStorageObject, ): void { if (this.runningTaskExecutionsMap.has(execution.executionId)) { return } const [cancelSignal, cancel] = createCancelSignal({ logger: this.logger }) const promise = this.runTaskExecutionWithContext(taskInternal, execution, cancelSignal) .catch((error) => { // This should not happen. this.logger.error(`Error in running task execution ${execution.executionId}`, error) }) .finally(() => { // If runTaskExecutionWithContext fails (should not happen), cancel the execution and // remove it from the running executions map. It is retried later on expiration. try { cancel() } catch (error) { this.logger.error(`Error in cancelling task execution ${execution.executionId}`, error) } this.runningTaskExecutionsMap.delete(execution.executionId) }) this.runningTaskExecutionsMap.set(execution.executionId, { promise, cancel, }) } /** * Run a task execution with a context. It is expected to be in running state and present in the * running executions map. * * It will update the execution status to failed, timed_out, cancelled, * waiting_for_children_tasks, waiting_for_finalize_task, completed, or ready depending on the * result of the task. If the task completes successfully, it will update the execution status to * waiting_for_children_tasks, waiting_for_finalize_task, or completed. If the task fails, it will * update the execution status to failed, timed_out, cancelled, or ready depending on the error. * If the error is retryable and the retry attempts are less than the maximum retry attempts, it * will update the execution status to ready. All the errors are saved in storage even if the task * is retried. They only get cleared if the execution is completed later. * * If `runTaskExecutionWithContext` runs to completion, the running executions map is updated to * remove the execution. All this is atomic so the execution is guaranteed to be removed from the * running executions map if `runTaskExecutionWithContext` completes. If it fails but process * does not crash, the running executions map is updated in `runTaskExecutionWithCancelSignal`. * The task execution remains in the running state and retried later on expiration. * * @param taskInternal - The task internal to run. * @param execution - The task execution to run. * @param cancelSignal - The cancel signal. */ private async runTaskExecutionWithContext( taskInternal: DurableTaskInternal, execution: DurableTaskExecutionStorageObject, cancelSignal: CancelSignal, ): Promise<void> { const ctx: DurableTaskRunContext = { taskId: taskInternal.id, executionId: execution.executionId, cancelSignal, shutdownSignal: this.shutdownSignal, attempt: execution.retryAttempts, prevError: execution.error, } // Make sure the execution is in running state to not do any wasteful work. This can happen in // a rare case where an execution is cancelled in between the execution getting picked up and // the function being added to the running executions map. await this.withTransaction(async (tx) => { const existingExecutions = await tx.getTaskExecutions({ type: 'by_execution_ids', executionIds: [execution.executionId], }) if (existingExecutions.length === 0 || existingExecutions[0]?.status !== 'running') { this.logger.error( `Task execution ${execution.executionId} is not running: ${existingExecutions[0]!.status}`, ) return } }) try { const result = await taskInternal.runParentWithTimeoutAndCancellation( ctx, this.serializer.deserialize(execution.runInput), execution.timeoutMs, cancelSignal, ) const runOutput = result.output const runOutputSerialized = this.serializer.serialize(runOutput) const childrenTasks = result.childrenTasks const now = new Date() if (childrenTasks.length === 0) { if (taskInternal.finalizeTask) { const finalizeTaskTaskInternal = this.taskInternalsMap.get(taskInternal.finalizeTask.id) if (!finalizeTaskTaskInternal) { throw new DurableExecutionError( `Finalize task ${taskInternal.finalizeTask.id} not found`, false, ) } const finalizeTaskInput = { input: this.serializer.deserialize(execution.runInput),