@systemfsoftware/trigger.dev
Version:
A Command-Line Interface for Trigger.dev (v3) projects
1,583 lines (1,575 loc) • 71.6 kB
JavaScript
// src/workers/prod/entry-point.ts
import {
CoordinatorToProdWorkerMessages,
PostStartCauses,
PreStopCauses,
ProdWorkerToCoordinatorMessages,
TaskRunErrorCodes as TaskRunErrorCodes2
} from "@systemfsoftware/trigger.dev_core/v3";
import { ZodSocketConnection } from "@systemfsoftware/trigger.dev_core/v3/zodSocket";
// ../core-apps/src/http.ts
var HttpReply = class {
constructor(response) {
this.response = response;
}
empty(status) {
return this.response.writeHead(status ?? 200).end();
}
text(text, status, contentType) {
return this.response.writeHead(status ?? 200, { "Content-Type": contentType || "text/plain" }).end(text.endsWith("\n") ? text : `${text}
`);
}
json(value, pretty) {
return this.text(
JSON.stringify(value, void 0, pretty ? 2 : void 0),
200,
"application/json"
);
}
};
function getRandomInteger(min, max) {
const intMin = Math.ceil(min);
const intMax = Math.floor(max);
return Math.floor(Math.random() * (intMax - intMin + 1)) + intMin;
}
function getRandomPortNumber() {
return getRandomInteger(8e3, 9999);
}
// ../core-apps/src/logger.ts
var SimpleLogger = class {
constructor(prefix) {
this.prefix = prefix;
}
#debugEnabled = ["1", "true"].includes(process.env.DEBUG ?? "");
log(arg0, ...argN) {
console.log(...this.#getPrefixedArgs(arg0, ...argN));
return arg0;
}
debug(arg0, ...argN) {
if (!this.#debugEnabled) {
return arg0;
}
console.debug(...this.#getPrefixedArgs("DEBUG", arg0, ...argN));
return arg0;
}
error(arg0, ...argN) {
console.error(...this.#getPrefixedArgs(arg0, ...argN));
return arg0;
}
#getPrefixedArgs(...args) {
if (!this.prefix) {
return args;
}
return [this.prefix, ...args];
}
};
// ../core-apps/src/process.ts
var EXIT_CODE_ALREADY_HANDLED = 111;
var EXIT_CODE_CHILD_NONZERO = 112;
// ../core-apps/src/backoff.ts
import { setTimeout as timeout } from "node:timers/promises";
var StopRetrying = class extends Error {
constructor(message) {
super(message);
this.name = "StopRetrying";
}
};
var AttemptTimeout = class extends Error {
constructor(message) {
super(message);
this.name = "AttemptTimeout";
}
};
var RetryLimitExceeded = class extends Error {
constructor(message) {
super(message);
this.name = "RetryLimitExceeded";
}
};
var ExponentialBackoff = class _ExponentialBackoff {
#retries = 0;
#type;
#base;
#factor;
#min;
#max;
#maxRetries;
#maxElapsed;
constructor(type, opts = {}) {
this.#type = type ?? "NoJitter";
this.#base = opts.base ?? 2;
this.#factor = opts.factor ?? 1;
this.#min = opts.min ?? -Infinity;
this.#max = opts.max ?? Infinity;
this.#maxRetries = opts.maxRetries ?? Infinity;
this.#maxElapsed = opts.maxElapsed ?? Infinity;
}
#clone(type, opts = {}) {
return new _ExponentialBackoff(type ?? this.#type, {
base: opts.base ?? this.#base,
factor: opts.factor ?? this.#factor,
min: opts.min ?? this.#min,
max: opts.max ?? this.#max,
maxRetries: opts.maxRetries ?? this.#maxRetries,
maxElapsed: opts.maxElapsed ?? this.#maxElapsed
});
}
type(type) {
return this.#clone(type);
}
base(base) {
return this.#clone(void 0, { base });
}
factor(factor) {
return this.#clone(void 0, { factor });
}
min(min) {
return this.#clone(void 0, { min });
}
max(max) {
return this.#clone(void 0, { max });
}
maxRetries(maxRetries) {
return this.#clone(void 0, { maxRetries });
}
// TODO: With .execute(), should this also include the time it takes to execute the callback?
maxElapsed(maxElapsed) {
return this.#clone(void 0, { maxElapsed });
}
retries(retries) {
if (typeof retries !== "undefined") {
if (retries > this.#maxRetries) {
console.error(
`Can't set retries ${retries} higher than maxRetries (${this.#maxRetries}), setting to maxRetries instead.`
);
this.#retries = this.#maxRetries;
} else {
this.#retries = retries;
}
}
return this.#clone();
}
async *retryAsync(maxRetries = this.#maxRetries ?? Infinity) {
let elapsed = 0;
let retry = 0;
while (retry <= maxRetries) {
const delay = this.delay(retry);
elapsed += delay;
if (elapsed > this.#maxElapsed) {
break;
}
yield {
delay: {
seconds: delay,
milliseconds: delay * 1e3
},
retry
};
retry++;
}
}
async *[Symbol.asyncIterator]() {
yield* this.retryAsync();
}
/** Returns the delay for the current retry in seconds. */
delay(retries = this.#retries, jitter = true) {
if (retries > this.#maxRetries) {
console.error(
`Can't set retries ${retries} higher than maxRetries (${this.#maxRetries}), setting to maxRetries instead.`
);
retries = this.#maxRetries;
}
let delay = this.#factor * this.#base ** retries;
switch (this.#type) {
case "NoJitter": {
break;
}
case "FullJitter": {
if (!jitter) {
delay = 0;
break;
}
delay *= Math.random();
break;
}
case "EqualJitter": {
if (!jitter) {
delay *= 0.5;
break;
}
delay *= 0.5 * (1 + Math.random());
break;
}
default: {
throw new Error(`Unknown backoff type: ${this.#type}`);
}
}
if (delay < this.#min) {
delay = this.#min + Math.random() * (this.#min * 0.2);
}
if (delay > this.#max) {
delay = this.#max - Math.random() * (this.#max * 0.2);
}
delay = Math.round(delay);
return delay;
}
/** Waits with the appropriate delay for the current retry. */
async wait(retries = this.#retries, jitter = true) {
if (retries > this.#maxRetries) {
console.error(`Retry limit exceeded: ${retries} > ${this.#maxRetries}`);
throw new RetryLimitExceeded();
}
const delay = this.delay(retries, jitter);
return await timeout(delay * 1e3);
}
elapsed(retries = this.#retries, jitter = true) {
let elapsed = 0;
for (let i = 0; i <= retries; i++) {
elapsed += this.delay(i, jitter);
}
const total = elapsed;
let days = 0;
if (elapsed > 3600 * 24) {
days = Math.floor(elapsed / 3600 / 24);
elapsed -= days * 3600 * 24;
}
let hours = 0;
if (elapsed > 3600) {
hours = Math.floor(elapsed / 3600);
elapsed -= hours * 3600;
}
let minutes = 0;
if (elapsed > 60) {
minutes = Math.floor(elapsed / 60);
elapsed -= minutes * 60;
}
const seconds = elapsed;
return {
seconds,
minutes,
hours,
days,
total
};
}
reset() {
this.#retries = 0;
return this;
}
next() {
this.#retries++;
return this.delay();
}
stop() {
throw new StopRetrying();
}
get state() {
return {
retries: this.#retries,
type: this.#type,
base: this.#base,
factor: this.#factor,
min: this.#min,
max: this.#max,
maxRetries: this.#maxRetries,
maxElapsed: this.#maxElapsed
};
}
async execute(callback, { attemptTimeoutMs = 0 } = {}) {
let elapsedMs = 0;
let finalError = void 0;
for await (const { delay, retry } of this) {
const start = Date.now();
if (retry > 0) {
console.log(`Retrying in ${delay.milliseconds}ms`);
await timeout(delay.milliseconds);
}
let attemptTimeout = void 0;
try {
const result = await new Promise(async (resolve, reject) => {
if (attemptTimeoutMs > 0) {
attemptTimeout = setTimeout(() => {
reject(new AttemptTimeout());
}, attemptTimeoutMs);
}
try {
const callbackResult = await callback({ delay, retry, elapsedMs });
resolve(callbackResult);
} catch (error) {
reject(error);
}
});
return {
success: true,
result
};
} catch (error) {
finalError = error;
if (error instanceof StopRetrying) {
return {
success: false,
cause: "StopRetrying",
error: error.message
};
}
if (error instanceof AttemptTimeout) {
continue;
}
} finally {
elapsedMs += Date.now() - start;
clearTimeout(attemptTimeout);
}
}
if (finalError instanceof AttemptTimeout) {
return {
success: false,
cause: "Timeout"
};
} else {
return {
success: false,
cause: "MaxRetries",
error: finalError
};
}
}
static RetryLimitExceeded = RetryLimitExceeded;
static StopRetrying = StopRetrying;
};
// src/workers/prod/backgroundWorker.ts
import {
ProdChildToWorkerMessages,
ProdWorkerToChildMessages,
SemanticInternalAttributes,
TaskRunErrorCodes,
correctErrorStackTrace
} from "@systemfsoftware/trigger.dev_core/v3";
import { ZodIpcConnection } from "@systemfsoftware/trigger.dev_core/v3/zodIpc";
import { Evt } from "evt";
import { fork } from "node:child_process";
// src/workers/common/errors.ts
var UncaughtExceptionError = class extends Error {
constructor(originalError, origin) {
super(`Uncaught exception: ${originalError.message}`);
this.originalError = originalError;
this.origin = origin;
this.name = "UncaughtExceptionError";
}
};
var TaskMetadataParseError = class extends Error {
constructor(zodIssues, tasks) {
super(`Failed to parse task metadata`);
this.zodIssues = zodIssues;
this.tasks = tasks;
this.name = "TaskMetadataParseError";
}
};
var UnexpectedExitError = class extends Error {
constructor(code, signal, stderr) {
super(`Unexpected exit with code ${code}`);
this.code = code;
this.signal = signal;
this.stderr = stderr;
this.name = "UnexpectedExitError";
}
};
var CleanupProcessError = class extends Error {
constructor() {
super("Cancelled");
this.name = "CleanupProcessError";
}
};
var CancelledProcessError = class extends Error {
constructor() {
super("Cancelled");
this.name = "CancelledProcessError";
}
};
var SigKillTimeoutProcessError = class extends Error {
constructor() {
super("Process kill timeout");
this.name = "SigKillTimeoutProcessError";
}
};
var GracefulExitTimeoutError = class extends Error {
constructor() {
super("Graceful exit timeout");
this.name = "GracefulExitTimeoutError";
}
};
function getFriendlyErrorMessage(code, signal, stderr, dockerMode = true) {
const message = (text) => {
if (signal) {
return `[${signal}] ${text}`;
} else {
return text;
}
};
if (code === 137) {
if (dockerMode) {
return message(
"Process ran out of memory! Try choosing a machine preset with more memory for this task."
);
} else {
return message(
"Process most likely ran out of memory, but we can't be certain. Try choosing a machine preset with more memory for this task."
);
}
}
if (stderr?.includes("OOMErrorHandler")) {
return message(
"Process ran out of memory! Try choosing a machine preset with more memory for this task."
);
}
return message(`Process exited with code ${code}.`);
}
// src/workers/prod/backgroundWorker.ts
var ProdBackgroundWorker = class {
constructor(path, params) {
this.path = path;
this.params = params;
}
_initialized = false;
/**
* @deprecated use onTaskRunHeartbeat instead
*/
onTaskHeartbeat = new Evt();
onTaskRunHeartbeat = new Evt();
onWaitForDuration = new Evt();
onWaitForTask = new Evt();
onWaitForBatch = new Evt();
onCreateTaskRunAttempt = Evt.create();
attemptCreatedNotification = Evt.create();
_onClose = new Evt();
tasks = [];
stderr = [];
_taskRunProcess;
_taskRunProcessesBeingKilled = /* @__PURE__ */ new Map();
_closed = false;
async close(gracefulExitTimeoutElapsed = false) {
console.log("Closing worker", { gracefulExitTimeoutElapsed, closed: this._closed });
if (this._closed) {
return;
}
this._closed = true;
this.onTaskHeartbeat.detach();
this.onTaskRunHeartbeat.detach();
await this._taskRunProcess?.cleanup(true, gracefulExitTimeoutElapsed);
}
async #killTaskRunProcess(flush = true, initialSignal = "SIGTERM") {
console.log("Killing task run process", { flush, initialSignal, closed: this._closed });
if (this._closed || !this._taskRunProcess) {
return;
}
if (flush) {
await this.flushTelemetry();
}
const currentTaskRunProcess = this._taskRunProcess;
this.#tryGracefulExit(currentTaskRunProcess, true, initialSignal).catch((error) => {
console.error("Error while trying graceful exit", error);
});
console.log("Killed task run process, setting closed to true", {
closed: this._closed,
pid: currentTaskRunProcess.pid
});
this._closed = true;
}
async flushTelemetry() {
console.log("Flushing telemetry");
const start = performance.now();
await this._taskRunProcess?.cleanup(false);
console.log("Flushed telemetry", { duration: performance.now() - start });
}
async initialize(options) {
if (this._initialized) {
throw new Error("Worker already initialized");
}
let resolved = false;
this.tasks = await new Promise((resolve, reject) => {
const child = fork(this.path, {
stdio: [
/*stdin*/
"ignore",
/*stdout*/
"pipe",
/*stderr*/
"pipe",
"ipc"
],
env: {
...this.params.env,
...options?.env
}
});
const timeout3 = setTimeout(() => {
if (resolved) {
return;
}
resolved = true;
child.kill();
reject(new Error("Worker timed out"));
}, 1e4);
child.stdout?.on("data", (data) => {
console.log(data.toString());
});
child.stderr?.on("data", (data) => {
console.error(data.toString());
this.stderr.push(data.toString());
});
child.on("exit", (code) => {
if (!resolved) {
clearTimeout(timeout3);
resolved = true;
reject(new Error(`Worker exited with code ${code}`));
}
});
new ZodIpcConnection({
listenSchema: ProdChildToWorkerMessages,
emitSchema: ProdWorkerToChildMessages,
process: child,
handlers: {
TASKS_READY: async (message) => {
if (!resolved) {
clearTimeout(timeout3);
resolved = true;
resolve(message.tasks);
child.kill();
}
},
UNCAUGHT_EXCEPTION: async (message) => {
if (!resolved) {
clearTimeout(timeout3);
resolved = true;
reject(new UncaughtExceptionError(message.error, message.origin));
child.kill();
}
},
TASKS_FAILED_TO_PARSE: async (message) => {
if (!resolved) {
clearTimeout(timeout3);
resolved = true;
reject(new TaskMetadataParseError(message.zodIssues, message.tasks));
child.kill();
}
}
}
});
});
this._initialized = true;
}
getMetadata(workerId, version) {
return {
contentHash: this.params.contentHash,
id: workerId,
version
};
}
// We need to notify all the task run processes that a task run has completed,
// in case they are waiting for it through triggerAndWait
async taskRunCompletedNotification(completion) {
this._taskRunProcess?.taskRunCompletedNotification(completion);
}
async waitCompletedNotification() {
this._taskRunProcess?.waitCompletedNotification();
}
async #getFreshTaskRunProcess(payload, messageId) {
const metadata = this.getMetadata(
payload.execution.worker.id,
payload.execution.worker.version
);
console.log("Getting fresh task run process, setting closed to false", {
closed: this._closed
});
this._closed = false;
await this.#killCurrentTaskRunProcessBeforeAttempt();
const taskRunProcess = new TaskRunProcess(
payload.execution.run.id,
payload.execution.run.isTest,
this.path,
{
...this.params.env,
...payload.environment ?? {}
},
metadata,
this.params,
messageId
);
taskRunProcess.onExit.attach(({ pid }) => {
console.log("Task run process exited", { pid });
if (this._taskRunProcess?.pid === pid) {
this._taskRunProcess = void 0;
}
if (pid) {
this._taskRunProcessesBeingKilled.delete(pid);
}
});
taskRunProcess.onIsBeingKilled.attach((taskRunProcess2) => {
if (taskRunProcess2?.pid) {
this._taskRunProcessesBeingKilled.set(taskRunProcess2.pid, taskRunProcess2);
}
});
taskRunProcess.onTaskHeartbeat.attach((id) => {
this.onTaskHeartbeat.post(id);
});
taskRunProcess.onTaskRunHeartbeat.attach((id) => {
this.onTaskRunHeartbeat.post(id);
});
taskRunProcess.onWaitForBatch.attach((message) => {
this.onWaitForBatch.post(message);
});
taskRunProcess.onWaitForDuration.attach((message) => {
this.onWaitForDuration.post(message);
});
taskRunProcess.onWaitForTask.attach((message) => {
this.onWaitForTask.post(message);
});
await taskRunProcess.initialize();
this._taskRunProcess = taskRunProcess;
return this._taskRunProcess;
}
async forceKillOldTaskRunProcesses() {
for (const taskRunProcess of this._taskRunProcessesBeingKilled.values()) {
try {
await taskRunProcess.kill("SIGKILL");
} catch (error) {
console.error("Error while force killing old task run processes", error);
}
}
}
async #killCurrentTaskRunProcessBeforeAttempt() {
console.log("killCurrentTaskRunProcessBeforeAttempt()", {
hasTaskRunProcess: !!this._taskRunProcess
});
if (!this._taskRunProcess) {
return;
}
const currentTaskRunProcess = this._taskRunProcess;
console.log("Killing current task run process", {
isBeingKilled: currentTaskRunProcess?.isBeingKilled,
totalBeingKilled: this._taskRunProcessesBeingKilled.size
});
if (currentTaskRunProcess.isBeingKilled) {
if (this._taskRunProcessesBeingKilled.size > 1) {
await this.#tryGracefulExit(currentTaskRunProcess);
} else {
}
} else {
if (this._taskRunProcessesBeingKilled.size > 0) {
await this.#tryGracefulExit(currentTaskRunProcess);
} else {
currentTaskRunProcess.kill("SIGTERM", 5e3).catch(() => {
});
}
}
}
async #tryGracefulExit(taskRunProcess, kill = false, initialSignal = "SIGTERM") {
console.log("Trying graceful exit", { kill, initialSignal });
try {
const initialExit = taskRunProcess.onExit.waitFor(5e3);
if (kill) {
taskRunProcess.kill(initialSignal);
}
await initialExit;
} catch (error) {
console.error("TaskRunProcess graceful kill timeout exceeded", error);
this.#tryForcefulExit(taskRunProcess);
}
}
async #tryForcefulExit(taskRunProcess) {
console.log("Trying forceful exit");
try {
const forcedKill = taskRunProcess.onExit.waitFor(5e3);
taskRunProcess.kill("SIGKILL");
await forcedKill;
} catch (error) {
console.error("TaskRunProcess forced kill timeout exceeded", error);
throw new SigKillTimeoutProcessError();
}
}
// We need to fork the process before we can execute any tasks, use a fresh process for each execution
async executeTaskRun(payload, messageId) {
try {
const taskRunProcess = await this.#getFreshTaskRunProcess(payload, messageId);
console.log("executing task run", {
attempt: payload.execution.attempt.id,
taskRunPid: taskRunProcess.pid
});
const result = await taskRunProcess.executeTaskRun(payload);
if (result.ok) {
return result;
}
const error = result.error;
if (error.type === "BUILT_IN_ERROR") {
const mappedError = await this.#correctError(error, payload.execution);
return {
...result,
error: mappedError
};
}
return result;
} catch (e) {
if (e instanceof CancelledProcessError) {
return {
id: payload.execution.attempt.id,
ok: false,
retry: void 0,
error: {
type: "INTERNAL_ERROR",
code: TaskRunErrorCodes.TASK_RUN_CANCELLED
}
};
}
if (e instanceof CleanupProcessError) {
return {
id: payload.execution.attempt.id,
ok: false,
retry: void 0,
error: {
type: "INTERNAL_ERROR",
code: TaskRunErrorCodes.TASK_EXECUTION_ABORTED
}
};
}
if (e instanceof UnexpectedExitError) {
return {
id: payload.execution.attempt.id,
ok: false,
retry: void 0,
error: {
type: "INTERNAL_ERROR",
code: TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE,
message: getFriendlyErrorMessage(e.code, e.signal, e.stderr),
stackTrace: e.stderr
}
};
}
if (e instanceof SigKillTimeoutProcessError) {
return {
id: payload.execution.attempt.id,
ok: false,
retry: void 0,
error: {
type: "INTERNAL_ERROR",
code: TaskRunErrorCodes.TASK_PROCESS_SIGKILL_TIMEOUT
}
};
}
if (e instanceof GracefulExitTimeoutError) {
return {
id: payload.execution.attempt.id,
ok: false,
retry: void 0,
error: {
type: "INTERNAL_ERROR",
code: TaskRunErrorCodes.GRACEFUL_EXIT_TIMEOUT,
message: "Worker process killed while attempt in progress."
}
};
}
return {
id: payload.execution.attempt.id,
ok: false,
retry: void 0,
error: {
type: "INTERNAL_ERROR",
code: TaskRunErrorCodes.TASK_EXECUTION_FAILED
}
};
} finally {
await this.#killTaskRunProcess();
}
}
async cancelAttempt(attemptId) {
if (!this._taskRunProcess) {
console.error("No task run process to cancel attempt", { attemptId });
return;
}
await this._taskRunProcess.cancel();
}
async executeTaskRunLazyAttempt(payload) {
this.onCreateTaskRunAttempt.post({ runId: payload.runId });
let execution;
try {
const start = performance.now();
const attemptCreated = await this.attemptCreatedNotification.waitFor(12e4);
if (!attemptCreated.success) {
throw new Error(`${attemptCreated.reason ?? "Unknown error"}`);
}
console.log("Attempt created", {
number: attemptCreated.execution.attempt.number,
duration: performance.now() - start
});
execution = attemptCreated.execution;
} catch (error) {
console.error("Error while creating attempt", error);
throw new Error(`Failed to create attempt: ${error}`);
}
const completion = await this.executeTaskRun(
{
execution,
traceContext: payload.traceContext,
environment: payload.environment
},
payload.messageId
);
return { execution, completion };
}
async #correctError(error, execution) {
return {
...error,
stackTrace: correctErrorStackTrace(error.stackTrace, this.params.projectConfig.projectDir)
};
}
};
var TaskRunProcess = class {
constructor(runId, isTest, path, env, metadata, worker, messageId) {
this.runId = runId;
this.isTest = isTest;
this.path = path;
this.env = env;
this.metadata = metadata;
this.worker = worker;
this.messageId = messageId;
}
_ipc;
_child;
_childPid;
_attemptPromises = /* @__PURE__ */ new Map();
_attemptStatuses = /* @__PURE__ */ new Map();
_currentExecution;
_isBeingKilled = false;
_isBeingCancelled = false;
_gracefulExitTimeoutElapsed = false;
_stderr = [];
/**
* @deprecated use onTaskRunHeartbeat instead
*/
onTaskHeartbeat = new Evt();
onTaskRunHeartbeat = new Evt();
onExit = new Evt();
onIsBeingKilled = new Evt();
onWaitForDuration = new Evt();
onWaitForTask = new Evt();
onWaitForBatch = new Evt();
preCheckpointNotification = Evt.create();
async initialize() {
this._child = fork(this.path, {
stdio: [
/*stdin*/
"ignore",
/*stdout*/
"pipe",
/*stderr*/
"pipe",
"ipc"
],
env: {
...this.isTest ? { TRIGGER_LOG_LEVEL: "debug" } : {},
...this.env,
OTEL_RESOURCE_ATTRIBUTES: JSON.stringify({
[SemanticInternalAttributes.PROJECT_DIR]: this.worker.projectConfig.projectDir
}),
...this.worker.debugOtel ? { OTEL_LOG_LEVEL: "debug" } : {}
}
});
this._childPid = this._child?.pid;
this._ipc = new ZodIpcConnection({
listenSchema: ProdChildToWorkerMessages,
emitSchema: ProdWorkerToChildMessages,
process: this._child,
handlers: {
TASK_RUN_COMPLETED: async (message) => {
const { result, execution } = message;
const promiseStatus = this._attemptStatuses.get(execution.attempt.id);
if (promiseStatus !== "PENDING") {
return;
}
this._attemptStatuses.set(execution.attempt.id, "RESOLVED");
const attemptPromise = this._attemptPromises.get(execution.attempt.id);
if (!attemptPromise) {
return;
}
const { resolver } = attemptPromise;
resolver(result);
},
READY_TO_DISPOSE: async (message) => {
process.exit(0);
},
TASK_HEARTBEAT: async (message) => {
if (this.messageId) {
this.onTaskRunHeartbeat.post(this.messageId);
} else {
console.error(
"No message id for task heartbeat, falling back to (deprecated) attempt heartbeat",
{ id: message.id }
);
this.onTaskHeartbeat.post(message.id);
}
},
TASKS_READY: async (message) => {
},
WAIT_FOR_TASK: async (message) => {
this.onWaitForTask.post(message);
},
WAIT_FOR_BATCH: async (message) => {
this.onWaitForBatch.post(message);
},
WAIT_FOR_DURATION: async (message) => {
this.onWaitForDuration.post(message);
}
}
});
this._child.on("exit", this.#handleExit.bind(this));
this._child.stdout?.on("data", this.#handleLog.bind(this));
this._child.stderr?.on("data", this.#handleStdErr.bind(this));
}
async cancel() {
this._isBeingCancelled = true;
await this.cleanup(true);
}
async cleanup(kill = false, gracefulExitTimeoutElapsed = false) {
console.log("cleanup()", { kill, gracefulExitTimeoutElapsed });
if (kill && this._isBeingKilled) {
return;
}
if (kill) {
this._isBeingKilled = true;
this.onIsBeingKilled.post(this);
}
const killChildProcess = gracefulExitTimeoutElapsed && !!this._currentExecution;
const killParentProcess = kill && !killChildProcess;
console.log("Cleaning up task run process", {
killChildProcess,
killParentProcess,
ipc: this._ipc,
childPid: this._childPid,
realChildPid: this._child?.pid
});
try {
await this._ipc?.sendWithAck(
"CLEANUP",
{
flush: true,
kill: killParentProcess
},
3e4
);
} catch (error) {
console.error("Error while cleaning up task run process", error);
if (killParentProcess) {
process.exit(0);
}
}
if (killChildProcess) {
this._gracefulExitTimeoutElapsed = true;
await this.kill("SIGKILL");
}
}
async executeTaskRun(payload) {
let resolver;
let rejecter;
const promise = new Promise((resolve, reject) => {
resolver = resolve;
rejecter = reject;
});
this._attemptStatuses.set(payload.execution.attempt.id, "PENDING");
this._attemptPromises.set(payload.execution.attempt.id, { resolver, rejecter });
const { execution, traceContext } = payload;
this._currentExecution = execution;
if (this._child?.connected && !this._isBeingKilled && !this._child.killed) {
await this._ipc?.send("EXECUTE_TASK_RUN", {
execution,
traceContext,
metadata: this.metadata
});
}
const result = await promise;
this._currentExecution = void 0;
return result;
}
taskRunCompletedNotification(completion) {
if (!completion.ok && typeof completion.retry !== "undefined") {
console.error(
"Task run completed with error and wants to retry, won't send task run completed notification"
);
return;
}
if (!this._child?.connected || this._isBeingKilled || this._child.killed) {
console.error(
"Child process not connected or being killed, can't send task run completed notification"
);
return;
}
this._ipc?.send("TASK_RUN_COMPLETED_NOTIFICATION", {
version: "v2",
completion
});
}
waitCompletedNotification() {
if (!this._child?.connected || this._isBeingKilled || this._child.killed) {
console.error(
"Child process not connected or being killed, can't send wait completed notification"
);
return;
}
this._ipc?.send("WAIT_COMPLETED_NOTIFICATION", {});
}
async #handleExit(code, signal) {
console.log("handling child exit", { code, signal });
for (const [id, status] of this._attemptStatuses.entries()) {
if (status === "PENDING") {
console.log("found pending attempt", { id });
this._attemptStatuses.set(id, "REJECTED");
const attemptPromise = this._attemptPromises.get(id);
if (!attemptPromise) {
continue;
}
const { rejecter } = attemptPromise;
if (this._isBeingCancelled) {
rejecter(new CancelledProcessError());
} else if (this._gracefulExitTimeoutElapsed) {
rejecter(new GracefulExitTimeoutError());
} else if (this._isBeingKilled) {
rejecter(new CleanupProcessError());
} else {
rejecter(
new UnexpectedExitError(
code ?? -1,
signal,
this._stderr.length ? this._stderr.join("\n") : void 0
)
);
}
}
}
this.onExit.post({ code, signal, pid: this.pid });
}
#handleLog(data) {
console.log(data.toString());
}
#handleStdErr(data) {
const text = data.toString();
console.error(text);
if (this._stderr.length > 100) {
this._stderr.shift();
}
this._stderr.push(text);
}
async kill(signal, timeoutInMs) {
this._isBeingKilled = true;
const killTimeout = this.onExit.waitFor(timeoutInMs);
this.onIsBeingKilled.post(this);
this._child?.kill(signal);
if (timeoutInMs) {
await killTimeout;
}
}
get isBeingKilled() {
return this._isBeingKilled || this._child?.killed;
}
get pid() {
return this._childPid;
}
};
// src/workers/prod/entry-point.ts
import { checkpointSafeTimeout, unboundedTimeout } from "@systemfsoftware/trigger.dev_core/v3/utils/timers";
import { randomUUID } from "node:crypto";
import { readFile } from "node:fs/promises";
import { createServer } from "node:http";
import { setTimeout as timeout2 } from "node:timers/promises";
import { Evt as Evt2 } from "evt";
var HTTP_SERVER_PORT = Number(process.env.HTTP_SERVER_PORT || getRandomPortNumber());
var COORDINATOR_HOST = process.env.COORDINATOR_HOST || "127.0.0.1";
var COORDINATOR_PORT = Number(process.env.COORDINATOR_PORT || 50080);
var MACHINE_NAME = process.env.MACHINE_NAME || "local";
var POD_NAME = process.env.POD_NAME || "some-pod";
var SHORT_HASH = process.env.TRIGGER_CONTENT_HASH.slice(0, 9);
var logger = new SimpleLogger(`[${MACHINE_NAME}][${SHORT_HASH}]`);
var defaultBackoff = new ExponentialBackoff("FullJitter", {
maxRetries: 5
});
var ProdWorker = class {
constructor(port, host = "0.0.0.0") {
this.host = host;
process.on("SIGTERM", this.#handleSignal.bind(this, "SIGTERM"));
this.#coordinatorSocket = this.#createCoordinatorSocket(COORDINATOR_HOST);
this.#backgroundWorker = this.#createBackgroundWorker();
this.#httpPort = port;
this.#httpServer = this.#createHttpServer();
}
apiUrl = process.env.TRIGGER_API_URL;
apiKey = process.env.TRIGGER_SECRET_KEY;
contentHash = process.env.TRIGGER_CONTENT_HASH;
projectRef = process.env.TRIGGER_PROJECT_REF;
envId = process.env.TRIGGER_ENV_ID;
runId = process.env.TRIGGER_RUN_ID || "index-only";
deploymentId = process.env.TRIGGER_DEPLOYMENT_ID;
deploymentVersion = process.env.TRIGGER_DEPLOYMENT_VERSION;
runningInKubernetes = !!process.env.KUBERNETES_PORT;
executing = false;
completed = /* @__PURE__ */ new Set();
paused = false;
attemptFriendlyId;
attemptNumber;
nextResumeAfter;
waitForPostStart = false;
connectionCount = 0;
restoreNotification = Evt2.create();
waitForTaskReplay;
waitForBatchReplay;
readyForLazyAttemptReplay;
durationResumeFallback;
#httpPort;
#backgroundWorker;
#httpServer;
#coordinatorSocket;
async #handleSignal(signal) {
logger.log("Received signal", { signal });
if (signal === "SIGTERM") {
let gracefulExitTimeoutElapsed = false;
if (this.executing) {
const terminationGracePeriodSeconds = 60 * 60;
logger.log("Waiting for attempt to complete before exiting", {
terminationGracePeriodSeconds
});
await timeout2(terminationGracePeriodSeconds * 1e3 - 5e3);
gracefulExitTimeoutElapsed = true;
logger.log("Termination timeout reached, exiting gracefully.");
} else {
logger.log("Not executing, exiting immediately.");
}
await this.#exitGracefully(gracefulExitTimeoutElapsed);
return;
}
logger.log("Unhandled signal", { signal });
}
async #exitGracefully(gracefulExitTimeoutElapsed = false, exitCode = 0) {
await this.#backgroundWorker.close(gracefulExitTimeoutElapsed);
if (!gracefulExitTimeoutElapsed) {
process.exit(exitCode);
}
}
async #reconnectAfterPostStart() {
this.waitForPostStart = false;
this.#coordinatorSocket.close();
this.connectionCount = 0;
let coordinatorHost = COORDINATOR_HOST;
try {
if (this.runningInKubernetes) {
coordinatorHost = (await readFile("/etc/taskinfo/coordinator-host", "utf-8")).replace(
"\n",
""
);
logger.log("reconnecting", {
coordinatorHost: {
fromEnv: COORDINATOR_HOST,
fromVolume: coordinatorHost,
current: this.#coordinatorSocket.socket.io.opts.hostname
}
});
}
} catch (error) {
logger.error("taskinfo read error during reconnect", {
error: error instanceof Error ? error.message : error
});
} finally {
this.#coordinatorSocket = this.#createCoordinatorSocket(coordinatorHost);
}
}
// MARK: TASK WAIT
#waitForTaskHandlerFactory(workerId) {
return async (message, replayIdempotencyKey) => {
logger.log("onWaitForTask", { workerId, message });
if (this.nextResumeAfter) {
logger.error("Already waiting for resume, skipping wait for task", {
nextResumeAfter: this.nextResumeAfter
});
return;
}
const waitForTask = await defaultBackoff.execute(async ({ retry }) => {
logger.log("Wait for task with backoff", { retry });
if (!this.attemptFriendlyId) {
logger.error("Failed to send wait message, attempt friendly ID not set", { message });
throw new ExponentialBackoff.StopRetrying("No attempt ID");
}
return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_TASK", {
version: "v2",
friendlyId: message.friendlyId,
attemptFriendlyId: this.attemptFriendlyId
});
});
if (!waitForTask.success) {
logger.error("Failed to wait for task with backoff", {
cause: waitForTask.cause,
error: waitForTask.error
});
this.#emitUnrecoverableError(
"WaitForTaskFailed",
`${waitForTask.cause}: ${waitForTask.error}`
);
return;
}
const { willCheckpointAndRestore } = waitForTask.result;
await this.#prepareForWait("WAIT_FOR_TASK", willCheckpointAndRestore);
if (willCheckpointAndRestore) {
if (!this.waitForTaskReplay) {
this.waitForTaskReplay = {
message,
attempt: 1,
idempotencyKey: randomUUID()
};
} else {
if (replayIdempotencyKey && replayIdempotencyKey !== this.waitForTaskReplay.idempotencyKey) {
logger.error(
"wait for task handler called with mismatched idempotency key, won't overwrite replay request"
);
return;
}
this.waitForTaskReplay.attempt++;
}
}
};
}
// MARK: BATCH WAIT
#waitForBatchHandlerFactory(workerId) {
return async (message, replayIdempotencyKey) => {
logger.log("onWaitForBatch", { workerId, message });
if (this.nextResumeAfter) {
logger.error("Already waiting for resume, skipping wait for batch", {
nextResumeAfter: this.nextResumeAfter
});
return;
}
const waitForBatch = await defaultBackoff.execute(async ({ retry }) => {
logger.log("Wait for batch with backoff", { retry });
if (!this.attemptFriendlyId) {
logger.error("Failed to send wait message, attempt friendly ID not set", { message });
throw new ExponentialBackoff.StopRetrying("No attempt ID");
}
return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_BATCH", {
version: "v2",
batchFriendlyId: message.batchFriendlyId,
runFriendlyIds: message.runFriendlyIds,
attemptFriendlyId: this.attemptFriendlyId
});
});
if (!waitForBatch.success) {
logger.error("Failed to wait for batch with backoff", {
cause: waitForBatch.cause,
error: waitForBatch.error
});
this.#emitUnrecoverableError(
"WaitForBatchFailed",
`${waitForBatch.cause}: ${waitForBatch.error}`
);
return;
}
const { willCheckpointAndRestore } = waitForBatch.result;
await this.#prepareForWait("WAIT_FOR_BATCH", willCheckpointAndRestore);
if (willCheckpointAndRestore) {
if (!this.waitForBatchReplay) {
this.waitForBatchReplay = {
message,
attempt: 1,
idempotencyKey: randomUUID()
};
} else {
if (replayIdempotencyKey && replayIdempotencyKey !== this.waitForBatchReplay.idempotencyKey) {
logger.error(
"wait for task handler called with mismatched idempotency key, won't overwrite replay request"
);
return;
}
this.waitForBatchReplay.attempt++;
}
}
};
}
// MARK: WORKER CREATION
#createBackgroundWorker() {
const workerId = randomUUID();
logger.log("Creating background worker", { workerId });
const backgroundWorker = new ProdBackgroundWorker("worker.js", {
projectConfig: __PROJECT_CONFIG__,
env: {
...gatherProcessEnv(),
TRIGGER_API_URL: this.apiUrl,
TRIGGER_SECRET_KEY: this.apiKey,
OTEL_EXPORTER_OTLP_ENDPOINT: process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? "http://0.0.0.0:4318"
},
contentHash: this.contentHash
});
backgroundWorker.onTaskHeartbeat.attach((attemptFriendlyId) => {
logger.log("onTaskHeartbeat", {
workerId,
attemptFriendlyId
});
this.#coordinatorSocket.socket.volatile.emit("TASK_HEARTBEAT", {
version: "v1",
attemptFriendlyId
});
});
backgroundWorker.onTaskRunHeartbeat.attach((runId) => {
logger.log("onTaskRunHeartbeat", {
workerId,
runId
});
this.#coordinatorSocket.socket.volatile.emit("TASK_RUN_HEARTBEAT", { version: "v1", runId });
});
backgroundWorker.onCreateTaskRunAttempt.attach(async (message) => {
logger.log("onCreateTaskRunAttempt()", {
workerId,
message
});
const createAttempt = await defaultBackoff.execute(async ({ retry }) => {
logger.log("Create task run attempt with backoff", { retry });
return await this.#coordinatorSocket.socket.timeout(15e3).emitWithAck("CREATE_TASK_RUN_ATTEMPT", {
version: "v1",
runId: message.runId
});
});
if (!createAttempt.success) {
backgroundWorker.attemptCreatedNotification.post({
success: false,
reason: `Failed to create attempt with backoff due to ${createAttempt.cause}. ${createAttempt.error}`
});
return;
}
if (!createAttempt.result.success) {
backgroundWorker.attemptCreatedNotification.post({
success: false,
reason: createAttempt.result.reason
});
return;
}
backgroundWorker.attemptCreatedNotification.post({
success: true,
execution: createAttempt.result.executionPayload.execution
});
});
backgroundWorker.attemptCreatedNotification.attach((message) => {
logger.log("attemptCreatedNotification", {
workerId,
success: message.success,
...message.success ? {
attempt: message.execution.attempt,
queue: message.execution.queue,
worker: message.execution.worker,
machine: message.execution.machine
} : {
reason: message.reason
}
});
if (!message.success) {
return;
}
this.attemptFriendlyId = message.execution.attempt.id;
this.attemptNumber = message.execution.attempt.number;
});
backgroundWorker.onWaitForDuration.attach(async (message) => {
logger.log("onWaitForDuration", {
workerId,
...message,
drift: Date.now() - message.now
});
if (this.nextResumeAfter) {
logger.error("Already waiting for resume, skipping wait for duration", {
nextResumeAfter: this.nextResumeAfter
});
return;
}
noResume: {
const { ms, waitThresholdInMs } = message;
const internalTimeout = unboundedTimeout(ms, "internal");
const checkpointSafeInternalTimeout = checkpointSafeTimeout(ms);
if (ms < waitThresholdInMs) {
await internalTimeout;
break noResume;
}
const waitForDuration = await defaultBackoff.execute(async ({ retry }) => {
logger.log("Wait for duration with backoff", { retry });
if (!this.attemptFriendlyId) {
logger.error("Failed to send wait message, attempt friendly ID not set", { message });
throw new ExponentialBackoff.StopRetrying("No attempt ID");
}
return await this.#coordinatorSocket.socket.timeout(2e4).emitWithAck("WAIT_FOR_DURATION", {
...message,
attemptFriendlyId: this.attemptFriendlyId
});
});
if (!waitForDuration.success) {
logger.error("Failed to wait for duration with backoff", {
cause: waitForDuration.cause,
error: waitForDuration.error
});
this.#emitUnrecoverableError(
"WaitForDurationFailed",
`${waitForDuration.cause}: ${waitForDuration.error}`
);
return;
}
const { willCheckpointAndRestore } = waitForDuration.result;
if (!willCheckpointAndRestore) {
await internalTimeout;
break noResume;
}
await this.#prepareForWait("WAIT_FOR_DURATION", willCheckpointAndRestore);
await Promise.race([internalTimeout, checkpointSafeInternalTimeout]);
const idempotencyKey = randomUUID();
this.durationResumeFallback = { idempotencyKey };
try {
await this.restoreNotification.waitFor(5e3);
} catch (error) {
logger.error("Did not receive restore notification in time", {
error
});
}
try {
const { checkpointCanceled } = await this.#coordinatorSocket.socket.timeout(15e3).emitWithAck("CANCEL_CHECKPOINT", {
version: "v2",
reason: "WAIT_FOR_DURATION"
});
logger.log("onCancelCheckpoint coordinator response", { checkpointCanceled });
if (checkpointCanceled) {
break noResume;
}
logger.log("Waiting for external duration resume as we may have been restored");
setTimeout(() => {
if (!this.durationResumeFallback) {
logger.error("Already resumed after duration, skipping fallback");
return;
}
if (this.durationResumeFallback.idempotencyKey !== idempotencyKey) {
logger.error("Duration resume idempotency key mismatch, skipping fallback");
return;
}
logger.log("Resuming after duration with fallback");
this.#resumeAfterDuration();
}, 15e3);
} catch (error) {
logger.debug("Checkpoint cancellation timed out", {
workerId,
message,
error
});
}
return;
}
this.#resumeAfterDuration();
});
backgroundWorker.onWaitForTask.attach(this.#waitForTaskHandlerFactory(workerId).bind(this));
backgroundWorker.onWaitForBatch.attach(this.#waitForBatchHandlerFactory(workerId).bind(this));
return backgroundWorker;
}
async #prepareForWait(reason, willCheckpointAndRestore) {
logger.log(`prepare for ${reason}`, { willCheckpointAndRestore });
if (this.nextResumeAfter) {
logger.error("Already waiting for resume, skipping prepare for wait", {
nextResumeAfter: this.nextResumeAfter,
params: {
reason,
willCheckpointAndRestore
}
});
return;
}
if (!willCheckpointAndRestore) {
return;
}
this.paused = true;
this.nextResumeAfter = reason;
this.waitForPostStart = true;
await this.#prepareForCheckpoint();
}
// MARK: RETRY PREP
async #prepareForRetry(shouldExit, exitCode) {
logger.log("prepare for retry", { shouldExit, exitCode });
if (shouldExit) {
await this.#exitGracefully(false, exitCode);
return;
}
this.paused = false;
this.waitForPostStart = false;
this.executing = false;
this.attemptFriendlyId = void 0;
this.attemptNumber = void 0;
}
// MARK: CHECKPOINT PREP
async #prepareForCheckpoint(flush = true) {
if (flush) {
try {
await this.#backgroundWorker.flushTelemetry();
} catch (error) {
logger.error(
"Failed to flush telemetry while preparing for checkpoint, will proceed anyway",
{ error }
);
}
}
try {
await this.#backgroundWorker.forceKillOldTaskRunProcesses();
} catch (error) {
logger.error(
"Failed to kill previous worker while preparing for checkpoint, will proceed anyway",
{ error }
);
}
this.#readyForCheckpoint();
}
#resumeAfterDuration() {
this.paused = false;
this.nextResumeAfter = void 0;
this.waitForPostStart = false;
this.durationResumeFallback = void 0;
this.#backgroundWorker.waitCompletedNotification();
}
async #readyForLazyAttempt() {
const idempotencyKey = randomUUID();
this.readyForLazyAttemptReplay = {
idempotencyKey
};
for await (const { delay, retry } of defaultBackoff.min(10).maxRetries(3)) {
if (retry > 0) {
logger.log("retrying ready for lazy attempt", { retry });
}
this.#coordinatorSocket.socket.emit("READY_FOR_LAZY_ATTEMPT", {
version: "v1",
runId: this.runId,
totalCompletions: this.completed.size
});
await timeout2(delay.milliseconds);
if (!this.readyForLazyAttemptReplay) {
logger.error("replay ready for lazy attempt cancelled, discarding", {
idempotencyKey
});
return;
}
if (idempotencyKey !== this.readyForLazyAttemptReplay.idempotencyKey) {
logger.error("replay ready for lazy attempt idempotency key mismatch, discarding", {
idempotencyKey,
newIdempotencyKey: this.readyForLazyAttemptReplay.idempotencyKey
});
return;
}
}
this.#failRun(this.runId, "Failed to receive execute request in a reasonable time");
}
#readyForCheckpoint() {
this.#coordinatorSocket.socket.emit("READY_FOR_CHECKPOINT", { version: "v1" });
}
#failRun(anyRunId, error) {
logger.error("Failing run", { anyRunId, error });
const completion = {
ok: false,
id: anyRunId,
retry: void 0,
error: error instanceof Error ? {
type: "BUILT_IN_ERROR",
name: error.name,
message: error.message,
stackTrace: error.stack ?? ""
} : {
type: "BUILT_IN_ERROR",
name: "UnknownError",
message: String(error),
sta