UNPKG

@systemfsoftware/trigger.dev

Version:

A Command-Line Interface for Trigger.dev (v3) projects

1,583 lines (1,575 loc) 71.6 kB
// src/workers/prod/entry-point.ts import { CoordinatorToProdWorkerMessages, PostStartCauses, PreStopCauses, ProdWorkerToCoordinatorMessages, TaskRunErrorCodes as TaskRunErrorCodes2 } from "@systemfsoftware/trigger.dev_core/v3"; import { ZodSocketConnection } from "@systemfsoftware/trigger.dev_core/v3/zodSocket"; // ../core-apps/src/http.ts var HttpReply = class { constructor(response) { this.response = response; } empty(status) { return this.response.writeHead(status ?? 200).end(); } text(text, status, contentType) { return this.response.writeHead(status ?? 200, { "Content-Type": contentType || "text/plain" }).end(text.endsWith("\n") ? text : `${text} `); } json(value, pretty) { return this.text( JSON.stringify(value, void 0, pretty ? 2 : void 0), 200, "application/json" ); } }; function getRandomInteger(min, max) { const intMin = Math.ceil(min); const intMax = Math.floor(max); return Math.floor(Math.random() * (intMax - intMin + 1)) + intMin; } function getRandomPortNumber() { return getRandomInteger(8e3, 9999); } // ../core-apps/src/logger.ts var SimpleLogger = class { constructor(prefix) { this.prefix = prefix; } #debugEnabled = ["1", "true"].includes(process.env.DEBUG ?? ""); log(arg0, ...argN) { console.log(...this.#getPrefixedArgs(arg0, ...argN)); return arg0; } debug(arg0, ...argN) { if (!this.#debugEnabled) { return arg0; } console.debug(...this.#getPrefixedArgs("DEBUG", arg0, ...argN)); return arg0; } error(arg0, ...argN) { console.error(...this.#getPrefixedArgs(arg0, ...argN)); return arg0; } #getPrefixedArgs(...args) { if (!this.prefix) { return args; } return [this.prefix, ...args]; } }; // ../core-apps/src/process.ts var EXIT_CODE_ALREADY_HANDLED = 111; var EXIT_CODE_CHILD_NONZERO = 112; // ../core-apps/src/backoff.ts import { setTimeout as timeout } from "node:timers/promises"; var StopRetrying = class extends Error { constructor(message) { super(message); this.name = "StopRetrying"; } }; var AttemptTimeout = class extends Error { constructor(message) { super(message); this.name = "AttemptTimeout"; } }; var RetryLimitExceeded = class extends Error { constructor(message) { super(message); this.name = "RetryLimitExceeded"; } }; var ExponentialBackoff = class _ExponentialBackoff { #retries = 0; #type; #base; #factor; #min; #max; #maxRetries; #maxElapsed; constructor(type, opts = {}) { this.#type = type ?? "NoJitter"; this.#base = opts.base ?? 2; this.#factor = opts.factor ?? 1; this.#min = opts.min ?? -Infinity; this.#max = opts.max ?? Infinity; this.#maxRetries = opts.maxRetries ?? Infinity; this.#maxElapsed = opts.maxElapsed ?? Infinity; } #clone(type, opts = {}) { return new _ExponentialBackoff(type ?? this.#type, { base: opts.base ?? this.#base, factor: opts.factor ?? this.#factor, min: opts.min ?? this.#min, max: opts.max ?? this.#max, maxRetries: opts.maxRetries ?? this.#maxRetries, maxElapsed: opts.maxElapsed ?? this.#maxElapsed }); } type(type) { return this.#clone(type); } base(base) { return this.#clone(void 0, { base }); } factor(factor) { return this.#clone(void 0, { factor }); } min(min) { return this.#clone(void 0, { min }); } max(max) { return this.#clone(void 0, { max }); } maxRetries(maxRetries) { return this.#clone(void 0, { maxRetries }); } // TODO: With .execute(), should this also include the time it takes to execute the callback? maxElapsed(maxElapsed) { return this.#clone(void 0, { maxElapsed }); } retries(retries) { if (typeof retries !== "undefined") { if (retries > this.#maxRetries) { console.error( `Can't set retries ${retries} higher than maxRetries (${this.#maxRetries}), setting to maxRetries instead.` ); this.#retries = this.#maxRetries; } else { this.#retries = retries; } } return this.#clone(); } async *retryAsync(maxRetries = this.#maxRetries ?? Infinity) { let elapsed = 0; let retry = 0; while (retry <= maxRetries) { const delay = this.delay(retry); elapsed += delay; if (elapsed > this.#maxElapsed) { break; } yield { delay: { seconds: delay, milliseconds: delay * 1e3 }, retry }; retry++; } } async *[Symbol.asyncIterator]() { yield* this.retryAsync(); } /** Returns the delay for the current retry in seconds. */ delay(retries = this.#retries, jitter = true) { if (retries > this.#maxRetries) { console.error( `Can't set retries ${retries} higher than maxRetries (${this.#maxRetries}), setting to maxRetries instead.` ); retries = this.#maxRetries; } let delay = this.#factor * this.#base ** retries; switch (this.#type) { case "NoJitter": { break; } case "FullJitter": { if (!jitter) { delay = 0; break; } delay *= Math.random(); break; } case "EqualJitter": { if (!jitter) { delay *= 0.5; break; } delay *= 0.5 * (1 + Math.random()); break; } default: { throw new Error(`Unknown backoff type: ${this.#type}`); } } if (delay < this.#min) { delay = this.#min + Math.random() * (this.#min * 0.2); } if (delay > this.#max) { delay = this.#max - Math.random() * (this.#max * 0.2); } delay = Math.round(delay); return delay; } /** Waits with the appropriate delay for the current retry. */ async wait(retries = this.#retries, jitter = true) { if (retries > this.#maxRetries) { console.error(`Retry limit exceeded: ${retries} > ${this.#maxRetries}`); throw new RetryLimitExceeded(); } const delay = this.delay(retries, jitter); return await timeout(delay * 1e3); } elapsed(retries = this.#retries, jitter = true) { let elapsed = 0; for (let i = 0; i <= retries; i++) { elapsed += this.delay(i, jitter); } const total = elapsed; let days = 0; if (elapsed > 3600 * 24) { days = Math.floor(elapsed / 3600 / 24); elapsed -= days * 3600 * 24; } let hours = 0; if (elapsed > 3600) { hours = Math.floor(elapsed / 3600); elapsed -= hours * 3600; } let minutes = 0; if (elapsed > 60) { minutes = Math.floor(elapsed / 60); elapsed -= minutes * 60; } const seconds = elapsed; return { seconds, minutes, hours, days, total }; } reset() { this.#retries = 0; return this; } next() { this.#retries++; return this.delay(); } stop() { throw new StopRetrying(); } get state() { return { retries: this.#retries, type: this.#type, base: this.#base, factor: this.#factor, min: this.#min, max: this.#max, maxRetries: this.#maxRetries, maxElapsed: this.#maxElapsed }; } async execute(callback, { attemptTimeoutMs = 0 } = {}) { let elapsedMs = 0; let finalError = void 0; for await (const { delay, retry } of this) { const start = Date.now(); if (retry > 0) { console.log(`Retrying in ${delay.milliseconds}ms`); await timeout(delay.milliseconds); } let attemptTimeout = void 0; try { const result = await new Promise(async (resolve, reject) => { if (attemptTimeoutMs > 0) { attemptTimeout = setTimeout(() => { reject(new AttemptTimeout()); }, attemptTimeoutMs); } try { const callbackResult = await callback({ delay, retry, elapsedMs }); resolve(callbackResult); } catch (error) { reject(error); } }); return { success: true, result }; } catch (error) { finalError = error; if (error instanceof StopRetrying) { return { success: false, cause: "StopRetrying", error: error.message }; } if (error instanceof AttemptTimeout) { continue; } } finally { elapsedMs += Date.now() - start; clearTimeout(attemptTimeout); } } if (finalError instanceof AttemptTimeout) { return { success: false, cause: "Timeout" }; } else { return { success: false, cause: "MaxRetries", error: finalError }; } } static RetryLimitExceeded = RetryLimitExceeded; static StopRetrying = StopRetrying; }; // src/workers/prod/backgroundWorker.ts import { ProdChildToWorkerMessages, ProdWorkerToChildMessages, SemanticInternalAttributes, TaskRunErrorCodes, correctErrorStackTrace } from "@systemfsoftware/trigger.dev_core/v3"; import { ZodIpcConnection } from "@systemfsoftware/trigger.dev_core/v3/zodIpc"; import { Evt } from "evt"; import { fork } from "node:child_process"; // src/workers/common/errors.ts var UncaughtExceptionError = class extends Error { constructor(originalError, origin) { super(`Uncaught exception: ${originalError.message}`); this.originalError = originalError; this.origin = origin; this.name = "UncaughtExceptionError"; } }; var TaskMetadataParseError = class extends Error { constructor(zodIssues, tasks) { super(`Failed to parse task metadata`); this.zodIssues = zodIssues; this.tasks = tasks; this.name = "TaskMetadataParseError"; } }; var UnexpectedExitError = class extends Error { constructor(code, signal, stderr) { super(`Unexpected exit with code ${code}`); this.code = code; this.signal = signal; this.stderr = stderr; this.name = "UnexpectedExitError"; } }; var CleanupProcessError = class extends Error { constructor() { super("Cancelled"); this.name = "CleanupProcessError"; } }; var CancelledProcessError = class extends Error { constructor() { super("Cancelled"); this.name = "CancelledProcessError"; } }; var SigKillTimeoutProcessError = class extends Error { constructor() { super("Process kill timeout"); this.name = "SigKillTimeoutProcessError"; } }; var GracefulExitTimeoutError = class extends Error { constructor() { super("Graceful exit timeout"); this.name = "GracefulExitTimeoutError"; } }; function getFriendlyErrorMessage(code, signal, stderr, dockerMode = true) { const message = (text) => { if (signal) { return `[${signal}] ${text}`; } else { return text; } }; if (code === 137) { if (dockerMode) { return message( "Process ran out of memory! Try choosing a machine preset with more memory for this task." ); } else { return message( "Process most likely ran out of memory, but we can't be certain. Try choosing a machine preset with more memory for this task." ); } } if (stderr?.includes("OOMErrorHandler")) { return message( "Process ran out of memory! Try choosing a machine preset with more memory for this task." ); } return message(`Process exited with code ${code}.`); } // src/workers/prod/backgroundWorker.ts var ProdBackgroundWorker = class { constructor(path, params) { this.path = path; this.params = params; } _initialized = false; /** * @deprecated use onTaskRunHeartbeat instead */ onTaskHeartbeat = new Evt(); onTaskRunHeartbeat = new Evt(); onWaitForDuration = new Evt(); onWaitForTask = new Evt(); onWaitForBatch = new Evt(); onCreateTaskRunAttempt = Evt.create(); attemptCreatedNotification = Evt.create(); _onClose = new Evt(); tasks = []; stderr = []; _taskRunProcess; _taskRunProcessesBeingKilled = /* @__PURE__ */ new Map(); _closed = false; async close(gracefulExitTimeoutElapsed = false) { console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed }); if (this._closed) { return; } this._closed = true; this.onTaskHeartbeat.detach(); this.onTaskRunHeartbeat.detach(); await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed); } async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") { console.log("Killing task run process", { flush, initialSignal, closed: this._closed }); if (this._closed || !this._taskRunProcess) { return; } if (flush) { await this.flushTelemetry(); } const currentTaskRunProcess = this._taskRunProcess; this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => { console.error("Error while trying graceful exit", error); }); console.log("Killed task run process, setting closed to true", { closed: this._closed, pid: currentTaskRunProcess.pid }); this._closed = true; } async flushTelemetry() { console.log("Flushing telemetry"); const start = performance.now(); await this._taskRunProcess?.cleanup(false); console.log("Flushed telemetry", { duration: performance.now() - start }); } async initialize(options) { if (this._initialized) { throw new Error("Worker already initialized"); } let resolved = false; this.tasks = await new Promise((resolve, reject) => { const child = fork(this.path, { stdio: [ /*stdin*/ "ignore", /*stdout*/ "pipe", /*stderr*/ "pipe", "ipc" ], env: { ...this.params.env, ...options?.env } }); const timeout3 = setTimeout(() => { if (resolved) { return; } resolved = true; child.kill(); reject(new Error("Worker timed out")); }, 1e4); child.stdout?.on("data", (data) => { console.log(data.toString()); }); child.stderr?.on("data", (data) => { console.error(data.toString()); this.stderr.push(data.toString()); }); child.on("exit", (code) => { if (!resolved) { clearTimeout(timeout3); resolved = true; reject(new Error(`Worker exited with code ${code}`)); } }); new ZodIpcConnection({ listenSchema: ProdChildToWorkerMessages, emitSchema: ProdWorkerToChildMessages, process: child, handlers: { TASKS_READY: async (message) => { if (!resolved) { clearTimeout(timeout3); resolved = true; resolve(message.tasks); child.kill(); } }, UNCAUGHT_EXCEPTION: async (message) => { if (!resolved) { clearTimeout(timeout3); resolved = true; reject(new UncaughtExceptionError(message.error, message.origin)); child.kill(); } }, TASKS_FAILED_TO_PARSE: async (message) => { if (!resolved) { clearTimeout(timeout3); resolved = true; reject(new TaskMetadataParseError(message.zodIssues, message.tasks)); child.kill(); } } } }); }); this._initialized = true; } getMetadata(workerId, version) { return { contentHash: this.params.contentHash, id: workerId, version }; } // We need to notify all the task run processes that a task run has completed, // in case they are waiting for it through triggerAndWait async taskRunCompletedNotification(completion) { this._taskRunProcess?.taskRunCompletedNotification(completion); } async waitCompletedNotification() { this._taskRunProcess?.waitCompletedNotification(); } async #getFreshTaskRunProcess(payload, messageId) { const metadata = this.getMetadata( payload.execution.worker.id, payload.execution.worker.version ); console.log("Getting fresh task run process, setting closed to false", { closed: this._closed }); this._closed = false; await this.#killCurrentTaskRunProcessBeforeAttempt(); const taskRunProcess = new TaskRunProcess( payload.execution.run.id, payload.execution.run.isTest, this.path, { ...this.params.env, ...payload.environment ?? {} }, metadata, this.params, messageId ); taskRunProcess.onExit.attach(({ pid }) => { console.log("Task run process exited", { pid }); if (this._taskRunProcess?.pid === pid) { this._taskRunProcess = void 0; } if (pid) { this._taskRunProcessesBeingKilled.delete(pid); } }); taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => { if (taskRunProcess2?.pid) { this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2); } }); taskRunProcess.onTaskHeartbeat.attach((id) => { this.onTaskHeartbeat.post(id); }); taskRunProcess.onTaskRunHeartbeat.attach((id) => { this.onTaskRunHeartbeat.post(id); }); taskRunProcess.onWaitForBatch.attach((message) => { this.onWaitForBatch.post(message); }); taskRunProcess.onWaitForDuration.attach((message) => { this.onWaitForDuration.post(message); }); taskRunProcess.onWaitForTask.attach((message) => { this.onWaitForTask.post(message); }); await taskRunProcess.initialize(); this._taskRunProcess = taskRunProcess; return this._taskRunProcess; } async forceKillOldTaskRunProcesses() { for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) { try { await taskRunProcess.kill("SIGKILL"); } catch (error) { console.error("Error while force killing old task run processes", error); } } } async #killCurrentTaskRunProcessBeforeAttempt() { console.log("killCurrentTaskRunProcessBeforeAttempt()", { hasTaskRunProcess: !!this._taskRunProcess }); if (!this._taskRunProcess) { return; } const currentTaskRunProcess = this._taskRunProcess; console.log("Killing current task run process", { isBeingKilled: currentTaskRunProcess?.isBeingKilled, totalBeingKilled: this._taskRunProcessesBeingKilled.size }); if (currentTaskRunProcess.isBeingKilled) { if (this._taskRunProcessesBeingKilled.size > 1) { await this.#tryGracefulExit(currentTaskRunProcess); } else { } } else { if (this._taskRunProcessesBeingKilled.size > 0) { await this.#tryGracefulExit(currentTaskRunProcess); } else { currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => { }); } } } async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") { console.log("Trying graceful exit", { kill, initialSignal }); try { const initialExit = taskRunProcess.onExit.waitFor(5e3); if (kill) { taskRunProcess.kill(initialSignal); } await initialExit; } catch (error) { console.error("TaskRunProcess graceful kill timeout exceeded", error); this.#tryForcefulExit(taskRunProcess); } } async #tryForcefulExit(taskRunProcess) { console.log("Trying forceful exit"); try { const forcedKill = taskRunProcess.onExit.waitFor(5e3); taskRunProcess.kill("SIGKILL"); await forcedKill; } catch (error) { console.error("TaskRunProcess forced kill timeout exceeded", error); throw new SigKillTimeoutProcessError(); } } // We need to fork the process before we can execute any tasks, use a fresh process for each execution async executeTaskRun(payload, messageId) { try { const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId); console.log("executing task run", { attempt: payload.execution.attempt.id, taskRunPid: taskRunProcess.pid }); const result = await taskRunProcess.executeTaskRun(payload); if (result.ok) { return result; } const error = result.error; if (error.type === "BUILT_IN_ERROR") { const mappedError = await this.#correctError(error, payload.execution); return { ...result, error: mappedError }; } return result; } catch (e) { if (e instanceof CancelledProcessError) { return { id: payload.execution.attempt.id, ok: false, retry: void 0, error: { type: "INTERNAL_ERROR", code: TaskRunErrorCodes.TASK_RUN_CANCELLED } }; } if (e instanceof CleanupProcessError) { return { id: payload.execution.attempt.id, ok: false, retry: void 0, error: { type: "INTERNAL_ERROR", code: TaskRunErrorCodes.TASK_EXECUTION_ABORTED } }; } if (e instanceof UnexpectedExitError) { return { id: payload.execution.attempt.id, ok: false, retry: void 0, error: { type: "INTERNAL_ERROR", code: TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE, message: getFriendlyErrorMessage(e.code, e.signal, e.stderr), stackTrace: e.stderr } }; } if (e instanceof SigKillTimeoutProcessError) { return { id: payload.execution.attempt.id, ok: false, retry: void 0, error: { type: "INTERNAL_ERROR", code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT } }; } if (e instanceof GracefulExitTimeoutError) { return { id: payload.execution.attempt.id, ok: false, retry: void 0, error: { type: "INTERNAL_ERROR", code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT, message: "Worker process killed while attempt in progress." } }; } return { id: payload.execution.attempt.id, ok: false, retry: void 0, error: { type: "INTERNAL_ERROR", code: TaskRunErrorCodes.TASK_EXECUTION_FAILED } }; } finally { await this.#killTaskRunProcess(); } } async cancelAttempt(attemptId) { if (!this._taskRunProcess) { console.error("No task run process to cancel attempt", { attemptId }); return; } await this._taskRunProcess.cancel(); } async executeTaskRunLazyAttempt(payload) { this.onCreateTaskRunAttempt.post({ runId: payload.runId }); let execution; try { const start = performance.now(); const attemptCreated = await this.attemptCreatedNotification.waitFor(12e4); if (!attemptCreated.success) { throw new Error(`${attemptCreated.reason ?? "Unknown error"}`); } console.log("Attempt created", { number: attemptCreated.execution.attempt.number, duration: performance.now() - start }); execution = attemptCreated.execution; } catch (error) { console.error("Error while creating attempt", error); throw new Error(`Failed to create attempt: ${error}`); } const completion = await this.executeTaskRun( { execution, traceContext: payload.traceContext, environment: payload.environment }, payload.messageId ); return { execution, completion }; } async #correctError(error, execution) { return { ...error, stackTrace: correctErrorStackTrace(error.stackTrace, this.params.projectConfig.projectDir) }; } }; var TaskRunProcess = class { constructor(runId, isTest, path, env, metadata, worker, messageId) { this.runId = runId; this.isTest = isTest; this.path = path; this.env = env; this.metadata = metadata; this.worker = worker; this.messageId = messageId; } _ipc; _child; _childPid; _attemptPromises = /* @__PURE__ */ new Map(); _attemptStatuses = /* @__PURE__ */ new Map(); _currentExecution; _isBeingKilled = false; _isBeingCancelled = false; _gracefulExitTimeoutElapsed = false; _stderr = []; /** * @deprecated use onTaskRunHeartbeat instead */ onTaskHeartbeat = new Evt(); onTaskRunHeartbeat = new Evt(); onExit = new Evt(); onIsBeingKilled = new Evt(); onWaitForDuration = new Evt(); onWaitForTask = new Evt(); onWaitForBatch = new Evt(); preCheckpointNotification = Evt.create(); async initialize() { this._child = fork(this.path, { stdio: [ /*stdin*/ "ignore", /*stdout*/ "pipe", /*stderr*/ "pipe", "ipc" ], env: { ...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {}, ...this.env, OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({ [SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir }), ...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {} } }); this._childPid = this._child?.pid; this._ipc = new ZodIpcConnection({ listenSchema: ProdChildToWorkerMessages, emitSchema: ProdWorkerToChildMessages, process: this._child, handlers: { TASK_RUN_COMPLETED: async (message) => { const { result, execution } = message; const promiseStatus = this._attemptStatuses.get(execution.attempt.id); if (promiseStatus !== "PENDING") { return; } this._attemptStatuses.set(execution.attempt.id, "RESOLVED"); const attemptPromise = this._attemptPromises.get(execution.attempt.id); if (!attemptPromise) { return; } const { resolver } = attemptPromise; resolver(result); }, READY_TO_DISPOSE: async (message) => { process.exit(0); }, TASK_HEARTBEAT: async (message) => { if (this.messageId) { this.onTaskRunHeartbeat.post(this.messageId); } else { console.error( "No message id for task heartbeat, falling back to (deprecated) attempt heartbeat", { id: message.id } ); this.onTaskHeartbeat.post(message.id); } }, TASKS_READY: async (message) => { }, WAIT_FOR_TASK: async (message) => { this.onWaitForTask.post(message); }, WAIT_FOR_BATCH: async (message) => { this.onWaitForBatch.post(message); }, WAIT_FOR_DURATION: async (message) => { this.onWaitForDuration.post(message); } } }); this._child.on("exit", this.#handleExit.bind(this)); this._child.stdout?.on("data", this.#handleLog.bind(this)); this._child.stderr?.on("data", this.#handleStdErr.bind(this)); } async cancel() { this._isBeingCancelled = true; await this.cleanup(true); } async cleanup(kill = false, gracefulExitTimeoutElapsed = false) { console.log("cleanup()", { kill, gracefulExitTimeoutElapsed }); if (kill && this._isBeingKilled) { return; } if (kill) { this._isBeingKilled = true; this.onIsBeingKilled.post(this); } const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution; const killParentProcess = kill && !killChildProcess; console.log("Cleaning up task run process", { killChildProcess, killParentProcess, ipc: this._ipc, childPid: this._childPid, realChildPid: this._child?.pid }); try { await this._ipc?.sendWithAck( "CLEANUP", { flush: true, kill: killParentProcess }, 3e4 ); } catch (error) { console.error("Error while cleaning up task run process", error); if (killParentProcess) { process.exit(0); } } if (killChildProcess) { this._gracefulExitTimeoutElapsed = true; await this.kill("SIGKILL"); } } async executeTaskRun(payload) { let resolver; let rejecter; const promise = new Promise((resolve, reject) => { resolver = resolve; rejecter = reject; }); this._attemptStatuses.set(payload.execution.attempt.id, "PENDING"); this._attemptPromises.set(payload.execution.attempt.id, { resolver, rejecter }); const { execution, traceContext } = payload; this._currentExecution = execution; if (this._child?.connected && !this._isBeingKilled && !this._child.killed) { await this._ipc?.send("EXECUTE_TASK_RUN", { execution, traceContext, metadata: this.metadata }); } const result = await promise; this._currentExecution = void 0; return result; } taskRunCompletedNotification(completion) { if (!completion.ok && typeof completion.retry !== "undefined") { console.error( "Task run completed with error and wants to retry, won't send task run completed notification" ); return; } if (!this._child?.connected || this._isBeingKilled || this._child.killed) { console.error( "Child process not connected or being killed, can't send task run completed notification" ); return; } this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", { version: "v2", completion }); } waitCompletedNotification() { if (!this._child?.connected || this._isBeingKilled || this._child.killed) { console.error( "Child process not connected or being killed, can't send wait completed notification" ); return; } this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {}); } async #handleExit(code, signal) { console.log("handling child exit", { code, signal }); for (const [id, status] of this._attemptStatuses.entries()) { if (status === "PENDING") { console.log("found pending attempt", { id }); this._attemptStatuses.set(id, "REJECTED"); const attemptPromise = this._attemptPromises.get(id); if (!attemptPromise) { continue; } const { rejecter } = attemptPromise; if (this._isBeingCancelled) { rejecter(new CancelledProcessError()); } else if (this._gracefulExitTimeoutElapsed) { rejecter(new GracefulExitTimeoutError()); } else if (this._isBeingKilled) { rejecter(new CleanupProcessError()); } else { rejecter( new UnexpectedExitError( code ?? -1, signal, this._stderr.length ? this._stderr.join("\n") : void 0 ) ); } } } this.onExit.post({ code, signal, pid: this.pid }); } #handleLog(data) { console.log(data.toString()); } #handleStdErr(data) { const text = data.toString(); console.error(text); if (this._stderr.length > 100) { this._stderr.shift(); } this._stderr.push(text); } async kill(signal, timeoutInMs) { this._isBeingKilled = true; const killTimeout = this.onExit.waitFor(timeoutInMs); this.onIsBeingKilled.post(this); this._child?.kill(signal); if (timeoutInMs) { await killTimeout; } } get isBeingKilled() { return this._isBeingKilled || this._child?.killed; } get pid() { return this._childPid; } }; // src/workers/prod/entry-point.ts import { checkpointSafeTimeout, unboundedTimeout } from "@systemfsoftware/trigger.dev_core/v3/utils/timers"; import { randomUUID } from "node:crypto"; import { readFile } from "node:fs/promises"; import { createServer } from "node:http"; import { setTimeout as timeout2 } from "node:timers/promises"; import { Evt as Evt2 } from "evt"; var HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber()); var COORDINATOR_HOST = process.env.COORDINATOR_HOST || "127.0.0.1"; var COORDINATOR_PORT = Number(process.env.COORDINATOR_PORT || 50080); var MACHINE_NAME = process.env.MACHINE_NAME || "local"; var POD_NAME = process.env.POD_NAME || "some-pod"; var SHORT_HASH = process.env.TRIGGER_CONTENT_HASH.slice(0, 9); var logger = new SimpleLogger(`[${MACHINE_NAME}][${SHORT_HASH}]`); var defaultBackoff = new ExponentialBackoff("FullJitter", { maxRetries: 5 }); var ProdWorker = class { constructor(port, host = "0.0.0.0") { this.host = host; process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM")); this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST); this.#backgroundWorker = this.#createBackgroundWorker(); this.#httpPort = port; this.#httpServer = this.#createHttpServer(); } apiUrl = process.env.TRIGGER_API_URL; apiKey = process.env.TRIGGER_SECRET_KEY; contentHash = process.env.TRIGGER_CONTENT_HASH; projectRef = process.env.TRIGGER_PROJECT_REF; envId = process.env.TRIGGER_ENV_ID; runId = process.env.TRIGGER_RUN_ID || "index-only"; deploymentId = process.env.TRIGGER_DEPLOYMENT_ID; deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION; runningInKubernetes = !!process.env.KUBERNETES_PORT; executing = false; completed = /* @__PURE__ */ new Set(); paused = false; attemptFriendlyId; attemptNumber; nextResumeAfter; waitForPostStart = false; connectionCount = 0; restoreNotification = Evt2.create(); waitForTaskReplay; waitForBatchReplay; readyForLazyAttemptReplay; durationResumeFallback; #httpPort; #backgroundWorker; #httpServer; #coordinatorSocket; async #handleSignal(signal) { logger.log("Received signal", { signal }); if (signal === "SIGTERM") { let gracefulExitTimeoutElapsed = false; if (this.executing) { const terminationGracePeriodSeconds = 60 * 60; logger.log("Waiting for attempt to complete before exiting", { terminationGracePeriodSeconds }); await timeout2(terminationGracePeriodSeconds * 1e3 - 5e3); gracefulExitTimeoutElapsed = true; logger.log("Termination timeout reached, exiting gracefully."); } else { logger.log("Not executing, exiting immediately."); } await this.#exitGracefully(gracefulExitTimeoutElapsed); return; } logger.log("Unhandled signal", { signal }); } async #exitGracefully(gracefulExitTimeoutElapsed = false, exitCode = 0) { await this.#backgroundWorker.close(gracefulExitTimeoutElapsed); if (!gracefulExitTimeoutElapsed) { process.exit(exitCode); } } async #reconnectAfterPostStart() { this.waitForPostStart = false; this.#coordinatorSocket.close(); this.connectionCount = 0; let coordinatorHost = COORDINATOR_HOST; try { if (this.runningInKubernetes) { coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace( "\n", "" ); logger.log("reconnecting", { coordinatorHost: { fromEnv: COORDINATOR_HOST, fromVolume: coordinatorHost, current: this.#coordinatorSocket.socket.io.opts.hostname } }); } } catch (error) { logger.error("taskinfo read error during reconnect", { error: error instanceof Error ? error.message : error }); } finally { this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost); } } // MARK: TASK WAIT #waitForTaskHandlerFactory(workerId) { return async (message, replayIdempotencyKey) => { logger.log("onWaitForTask", { workerId, message }); if (this.nextResumeAfter) { logger.error("Already waiting for resume, skipping wait for task", { nextResumeAfter: this.nextResumeAfter }); return; } const waitForTask = await defaultBackoff.execute(async ({ retry }) => { logger.log("Wait for task with backoff", { retry }); if (!this.attemptFriendlyId) { logger.error("Failed to send wait message, attempt friendly ID not set", { message }); throw new ExponentialBackoff.StopRetrying("No attempt ID"); } return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_TASK", { version: "v2", friendlyId: message.friendlyId, attemptFriendlyId: this.attemptFriendlyId }); }); if (!waitForTask.success) { logger.error("Failed to wait for task with backoff", { cause: waitForTask.cause, error: waitForTask.error }); this.#emitUnrecoverableError( "WaitForTaskFailed", `${waitForTask.cause}: ${waitForTask.error}` ); return; } const { willCheckpointAndRestore } = waitForTask.result; await this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore); if (willCheckpointAndRestore) { if (!this.waitForTaskReplay) { this.waitForTaskReplay = { message, attempt: 1, idempotencyKey: randomUUID() }; } else { if (replayIdempotencyKey && replayIdempotencyKey !== this.waitForTaskReplay.idempotencyKey) { logger.error( "wait for task handler called with mismatched idempotency key, won't overwrite replay request" ); return; } this.waitForTaskReplay.attempt++; } } }; } // MARK: BATCH WAIT #waitForBatchHandlerFactory(workerId) { return async (message, replayIdempotencyKey) => { logger.log("onWaitForBatch", { workerId, message }); if (this.nextResumeAfter) { logger.error("Already waiting for resume, skipping wait for batch", { nextResumeAfter: this.nextResumeAfter }); return; } const waitForBatch = await defaultBackoff.execute(async ({ retry }) => { logger.log("Wait for batch with backoff", { retry }); if (!this.attemptFriendlyId) { logger.error("Failed to send wait message, attempt friendly ID not set", { message }); throw new ExponentialBackoff.StopRetrying("No attempt ID"); } return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_BATCH", { version: "v2", batchFriendlyId: message.batchFriendlyId, runFriendlyIds: message.runFriendlyIds, attemptFriendlyId: this.attemptFriendlyId }); }); if (!waitForBatch.success) { logger.error("Failed to wait for batch with backoff", { cause: waitForBatch.cause, error: waitForBatch.error }); this.#emitUnrecoverableError( "WaitForBatchFailed", `${waitForBatch.cause}: ${waitForBatch.error}` ); return; } const { willCheckpointAndRestore } = waitForBatch.result; await this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore); if (willCheckpointAndRestore) { if (!this.waitForBatchReplay) { this.waitForBatchReplay = { message, attempt: 1, idempotencyKey: randomUUID() }; } else { if (replayIdempotencyKey && replayIdempotencyKey !== this.waitForBatchReplay.idempotencyKey) { logger.error( "wait for task handler called with mismatched idempotency key, won't overwrite replay request" ); return; } this.waitForBatchReplay.attempt++; } } }; } // MARK: WORKER CREATION #createBackgroundWorker() { const workerId = randomUUID(); logger.log("Creating background worker", { workerId }); const backgroundWorker = new ProdBackgroundWorker("worker.js", { projectConfig: __PROJECT_CONFIG__, env: { ...gatherProcessEnv(), TRIGGER_API_URL: this.apiUrl, TRIGGER_SECRET_KEY: this.apiKey, OTEL_EXPORTER_OTLP_ENDPOINT: process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://0.0.0.0:4318" }, contentHash: this.contentHash }); backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => { logger.log("onTaskHeartbeat", { workerId, attemptFriendlyId }); this.#coordinatorSocket.socket.volatile.emit("TASK_HEARTBEAT", { version: "v1", attemptFriendlyId }); }); backgroundWorker.onTaskRunHeartbeat.attach((runId) => { logger.log("onTaskRunHeartbeat", { workerId, runId }); this.#coordinatorSocket.socket.volatile.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId }); }); backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => { logger.log("onCreateTaskRunAttempt()", { workerId, message }); const createAttempt = await defaultBackoff.execute(async ({ retry }) => { logger.log("Create task run attempt with backoff", { retry }); return await this.#coordinatorSocket.socket.timeout(15e3).emitWithAck("CREATE_TASK_RUN_ATTEMPT", { version: "v1", runId: message.runId }); }); if (!createAttempt.success) { backgroundWorker.attemptCreatedNotification.post({ success: false, reason: `Failed to create attempt with backoff due to ${createAttempt.cause}. ${createAttempt.error}` }); return; } if (!createAttempt.result.success) { backgroundWorker.attemptCreatedNotification.post({ success: false, reason: createAttempt.result.reason }); return; } backgroundWorker.attemptCreatedNotification.post({ success: true, execution: createAttempt.result.executionPayload.execution }); }); backgroundWorker.attemptCreatedNotification.attach((message) => { logger.log("attemptCreatedNotification", { workerId, success: message.success, ...message.success ? { attempt: message.execution.attempt, queue: message.execution.queue, worker: message.execution.worker, machine: message.execution.machine } : { reason: message.reason } }); if (!message.success) { return; } this.attemptFriendlyId = message.execution.attempt.id; this.attemptNumber = message.execution.attempt.number; }); backgroundWorker.onWaitForDuration.attach(async (message) => { logger.log("onWaitForDuration", { workerId, ...message, drift: Date.now() - message.now }); if (this.nextResumeAfter) { logger.error("Already waiting for resume, skipping wait for duration", { nextResumeAfter: this.nextResumeAfter }); return; } noResume: { const { ms, waitThresholdInMs } = message; const internalTimeout = unboundedTimeout(ms, "internal"); const checkpointSafeInternalTimeout = checkpointSafeTimeout(ms); if (ms < waitThresholdInMs) { await internalTimeout; break noResume; } const waitForDuration = await defaultBackoff.execute(async ({ retry }) => { logger.log("Wait for duration with backoff", { retry }); if (!this.attemptFriendlyId) { logger.error("Failed to send wait message, attempt friendly ID not set", { message }); throw new ExponentialBackoff.StopRetrying("No attempt ID"); } return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_DURATION", { ...message, attemptFriendlyId: this.attemptFriendlyId }); }); if (!waitForDuration.success) { logger.error("Failed to wait for duration with backoff", { cause: waitForDuration.cause, error: waitForDuration.error }); this.#emitUnrecoverableError( "WaitForDurationFailed", `${waitForDuration.cause}: ${waitForDuration.error}` ); return; } const { willCheckpointAndRestore } = waitForDuration.result; if (!willCheckpointAndRestore) { await internalTimeout; break noResume; } await this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore); await Promise.race([internalTimeout, checkpointSafeInternalTimeout]); const idempotencyKey = randomUUID(); this.durationResumeFallback = { idempotencyKey }; try { await this.restoreNotification.waitFor(5e3); } catch (error) { logger.error("Did not receive restore notification in time", { error }); } try { const { checkpointCanceled } = await this.#coordinatorSocket.socket.timeout(15e3).emitWithAck("CANCEL_CHECKPOINT", { version: "v2", reason: "WAIT_FOR_DURATION" }); logger.log("onCancelCheckpoint coordinator response", { checkpointCanceled }); if (checkpointCanceled) { break noResume; } logger.log("Waiting for external duration resume as we may have been restored"); setTimeout(() => { if (!this.durationResumeFallback) { logger.error("Already resumed after duration, skipping fallback"); return; } if (this.durationResumeFallback.idempotencyKey !== idempotencyKey) { logger.error("Duration resume idempotency key mismatch, skipping fallback"); return; } logger.log("Resuming after duration with fallback"); this.#resumeAfterDuration(); }, 15e3); } catch (error) { logger.debug("Checkpoint cancellation timed out", { workerId, message, error }); } return; } this.#resumeAfterDuration(); }); backgroundWorker.onWaitForTask.attach(this.#waitForTaskHandlerFactory(workerId).bind(this)); backgroundWorker.onWaitForBatch.attach(this.#waitForBatchHandlerFactory(workerId).bind(this)); return backgroundWorker; } async #prepareForWait(reason, willCheckpointAndRestore) { logger.log(`prepare for ${reason}`, { willCheckpointAndRestore }); if (this.nextResumeAfter) { logger.error("Already waiting for resume, skipping prepare for wait", { nextResumeAfter: this.nextResumeAfter, params: { reason, willCheckpointAndRestore } }); return; } if (!willCheckpointAndRestore) { return; } this.paused = true; this.nextResumeAfter = reason; this.waitForPostStart = true; await this.#prepareForCheckpoint(); } // MARK: RETRY PREP async #prepareForRetry(shouldExit, exitCode) { logger.log("prepare for retry", { shouldExit, exitCode }); if (shouldExit) { await this.#exitGracefully(false, exitCode); return; } this.paused = false; this.waitForPostStart = false; this.executing = false; this.attemptFriendlyId = void 0; this.attemptNumber = void 0; } // MARK: CHECKPOINT PREP async #prepareForCheckpoint(flush = true) { if (flush) { try { await this.#backgroundWorker.flushTelemetry(); } catch (error) { logger.error( "Failed to flush telemetry while preparing for checkpoint, will proceed anyway", { error } ); } } try { await this.#backgroundWorker.forceKillOldTaskRunProcesses(); } catch (error) { logger.error( "Failed to kill previous worker while preparing for checkpoint, will proceed anyway", { error } ); } this.#readyForCheckpoint(); } #resumeAfterDuration() { this.paused = false; this.nextResumeAfter = void 0; this.waitForPostStart = false; this.durationResumeFallback = void 0; this.#backgroundWorker.waitCompletedNotification(); } async #readyForLazyAttempt() { const idempotencyKey = randomUUID(); this.readyForLazyAttemptReplay = { idempotencyKey }; for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(3)) { if (retry > 0) { logger.log("retrying ready for lazy attempt", { retry }); } this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", { version: "v1", runId: this.runId, totalCompletions: this.completed.size }); await timeout2(delay.milliseconds); if (!this.readyForLazyAttemptReplay) { logger.error("replay ready for lazy attempt cancelled, discarding", { idempotencyKey }); return; } if (idempotencyKey !== this.readyForLazyAttemptReplay.idempotencyKey) { logger.error("replay ready for lazy attempt idempotency key mismatch, discarding", { idempotencyKey, newIdempotencyKey: this.readyForLazyAttemptReplay.idempotencyKey }); return; } } this.#failRun(this.runId, "Failed to receive execute request in a reasonable time"); } #readyForCheckpoint() { this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" }); } #failRun(anyRunId, error) { logger.error("Failing run", { anyRunId, error }); const completion = { ok: false, id: anyRunId, retry: void 0, error: error instanceof Error ? { type: "BUILT_IN_ERROR", name: error.name, message: error.message, stackTrace: error.stack ?? "" } : { type: "BUILT_IN_ERROR", name: "UnknownError", message: String(error), sta