@cloudsnorkel/cdk-github-runners
Version:
CDK construct to create GitHub Actions self-hosted runners. Creates ephemeral runners on demand. Easy to deploy and highly customizable.
131 lines • 17.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.handler = handler;
const client_sfn_1 = require("@aws-sdk/client-sfn");
const lambda_github_1 = require("./lambda-github");
const sfn = new client_sfn_1.SFNClient();
async function handler(event) {
const result = { batchItemFailures: [] };
let octokitCache;
let runnerLevel;
for (const record of event.Records) {
const input = JSON.parse(record.body);
console.log({
notice: 'Checking runner',
input,
});
const retryLater = () => result.batchItemFailures.push({ itemIdentifier: record.messageId });
// check if step function is still running
const execution = await sfn.send(new client_sfn_1.DescribeExecutionCommand({ executionArn: input.executionArn }));
if (execution.status != 'RUNNING') {
// no need to test again as runner already finished
console.log({
notice: 'Runner already finished',
input,
});
continue;
}
// get github access
if (!octokitCache) {
// getOctokit calls secrets manager every time, so cache the result
const { octokit, githubSecrets } = await (0, lambda_github_1.getOctokit)(input.installationId);
// TODO if installationId changes during normal operations, we may have some records with good installationId, and some with bad
octokitCache = octokit;
runnerLevel = githubSecrets.runnerLevel;
}
// find runner
const runner = await (0, lambda_github_1.getRunner)(octokitCache, runnerLevel, input.owner, input.repo, input.runnerName);
if (!runner) {
console.log({
notice: 'Runner not running yet',
input,
});
retryLater();
continue;
}
// if not idle, try again later
// we want to try again because the runner might be retried due to e.g. lambda timeout
// we need to keep following the retry too and make sure it doesn't go idle
if (runner.busy) {
console.log({
notice: 'Runner is not idle',
input,
});
retryLater();
continue;
}
// check if max idle timeout has reached
let found = false;
for (const label of runner.labels) {
if (label.name.toLowerCase().startsWith('cdkghr:started:')) {
const started = parseFloat(label.name.split(':')[2]);
const startedDate = new Date(started * 1000);
const now = new Date();
const diffMs = now.getTime() - startedDate.getTime();
console.log({
notice: `Runner ${input.runnerName} started ${diffMs / 1000} seconds ago`,
input,
});
if (diffMs > 1000 * input.maxIdleSeconds) {
// max idle time reached, delete runner
console.log({
notice: `Runner ${input.runnerName} is idle for too long`,
input,
});
try {
// stop step function first, so it's marked as aborted with the proper error
// if we delete the runner first, the step function will be marked as failed with a generic error
console.log({
notice: `Stopping step function ${input.executionArn}...`,
input,
});
await sfn.send(new client_sfn_1.StopExecutionCommand({
executionArn: input.executionArn,
error: 'IdleRunner',
cause: `Runner ${input.runnerName} on ${input.owner}/${input.repo} is idle for too long (${diffMs / 1000} seconds and limit is ${input.maxIdleSeconds} seconds)`,
}));
}
catch (e) {
console.error({
notice: `Failed to stop step function ${input.executionArn}: ${e}`,
input,
});
retryLater();
continue;
}
try {
console.log({
notice: `Deleting runner ${runner.id}...`,
input,
});
await (0, lambda_github_1.deleteRunner)(octokitCache, runnerLevel, input.owner, input.repo, runner.id);
}
catch (e) {
console.error({
notice: `Failed to delete runner ${runner.id}: ${e}`,
input,
});
retryLater();
continue;
}
}
else {
// still idle, timeout not reached -- retry later
retryLater();
}
found = true;
break;
}
}
if (!found) {
// no started label? retry later (it won't retry forever as eventually the runner will stop and the step function will finish)
console.error({
notice: 'No `cdkghr:started:xxx` label found???',
input,
});
retryLater();
}
}
return result;
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"idle-runner-repear.lambda.js","sourceRoot":"","sources":["../src/idle-runner-repear.lambda.ts"],"names":[],"mappings":";;AAgBA,0BAsIC;AAtJD,oDAAgG;AAGhG,mDAAsE;AAWtE,MAAM,GAAG,GAAG,IAAI,sBAAS,EAAE,CAAC;AAErB,KAAK,UAAU,OAAO,CAAC,KAAyB;IACrD,MAAM,MAAM,GAA+B,EAAE,iBAAiB,EAAE,EAAE,EAAE,CAAC;IACrE,IAAI,YAAiC,CAAC;IACtC,IAAI,WAAuC,CAAC;IAE5C,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAA0B,CAAC;QAC/D,OAAO,CAAC,GAAG,CAAC;YACV,MAAM,EAAE,iBAAiB;YACzB,KAAK;SACN,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,EAAE,cAAc,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QAE7F,0CAA0C;QAC1C,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,qCAAwB,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QACrG,IAAI,SAAS,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAClC,mDAAmD;YACnD,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,yBAAyB;gBACjC,KAAK;aACN,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,mEAAmE;YACnE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,MAAM,IAAA,0BAAU,EAAC,KAAK,CAAC,cAAc,CAAC,CAAC;YAC1E,gIAAgI;YAChI,YAAY,GAAG,OAAO,CAAC;YACvB,WAAW,GAAG,aAAa,CAAC,WAAW,CAAC;QAC1C,CAAC;QAED,cAAc;QACd,MAAM,MAAM,GAAG,MAAM,IAAA,yBAAS,EAAC,YAAY,EAAE,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;QACrG,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,wBAAwB;gBAChC,KAAK;aACN,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,+BAA+B;QAC/B,sFAAsF;QACtF,2EAA2E;QAC3E,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,oBAAoB;gBAC5B,KAAK;aACN,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,wCAAwC;QACxC,IAAI,KAAK,GAAG,KAAK,CAAC;QAClB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClC,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBAC3D,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrD,MAAM,WAAW,GAAG,IAAI,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;gBAC7C,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;gBACvB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,EAAE,GAAG,WAAW,CAAC,OAAO,EAAE,CAAC;gBAErD,OAAO,CAAC,GAAG,CAAC;oBACV,MAAM,EAAE,UAAU,KAAK,CAAC,UAAU,YAAY,MAAM,GAAG,IAAI,cAAc;oBACzE,KAAK;iBACN,CAAC,CAAC;gBAEH,IAAI,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC,cAAc,EAAE,CAAC;oBACzC,uCAAuC;oBACvC,OAAO,CAAC,GAAG,CAAC;wBACV,MAAM,EAAE,UAAU,KAAK,CAAC,UAAU,uBAAuB;wBACzD,KAAK;qBACN,CAAC,CAAC;oBAEH,IAAI,CAAC;wBACH,4EAA4E;wBAC5E,iGAAiG;wBACjG,OAAO,CAAC,GAAG,CAAC;4BACV,MAAM,EAAE,0BAA0B,KAAK,CAAC,YAAY,KAAK;4BACzD,KAAK;yBACN,CAAC,CAAC;wBACH,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,iCAAoB,CAAC;4BACtC,YAAY,EAAE,KAAK,CAAC,YAAY;4BAChC,KAAK,EAAE,YAAY;4BACnB,KAAK,EAAE,UAAU,KAAK,CAAC,UAAU,OAAO,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,0BAA0B,MAAM,GAAG,IAAI,yBAAyB,KAAK,CAAC,cAAc,WAAW;yBACjK,CAAC,CAAC,CAAC;oBACN,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,OAAO,CAAC,KAAK,CAAC;4BACZ,MAAM,EAAE,gCAAgC,KAAK,CAAC,YAAY,KAAK,CAAC,EAAE;4BAClE,KAAK;yBACN,CAAC,CAAC;wBACH,UAAU,EAAE,CAAC;wBACb,SAAS;oBACX,CAAC;oBAED,IAAI,CAAC;wBACH,OAAO,CAAC,GAAG,CAAC;4BACV,MAAM,EAAE,mBAAmB,MAAM,CAAC,EAAE,KAAK;4BACzC,KAAK;yBACN,CAAC,CAAC;wBACH,MAAM,IAAA,4BAAY,EAAC,YAAY,EAAE,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;oBACpF,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,OAAO,CAAC,KAAK,CAAC;4BACZ,MAAM,EAAE,2BAA2B,MAAM,CAAC,EAAE,KAAK,CAAC,EAAE;4BACpD,KAAK;yBACN,CAAC,CAAC;wBACH,UAAU,EAAE,CAAC;wBACb,SAAS;oBACX,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,iDAAiD;oBACjD,UAAU,EAAE,CAAC;gBACf,CAAC;gBAED,KAAK,GAAG,IAAI,CAAC;gBACb,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,8HAA8H;YAC9H,OAAO,CAAC,KAAK,CAAC;gBACZ,MAAM,EAAE,wCAAwC;gBAChD,KAAK;aACN,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import { DescribeExecutionCommand, SFNClient, StopExecutionCommand } from '@aws-sdk/client-sfn';\nimport { Octokit } from '@octokit/rest';\nimport * as AWSLambda from 'aws-lambda';\nimport { deleteRunner, getOctokit, getRunner } from './lambda-github';\n\ninterface IdleReaperLambdaInput {\n  readonly executionArn: string;\n  readonly runnerName: string;\n  readonly owner: string;\n  readonly repo: string;\n  readonly installationId?: number;\n  readonly maxIdleSeconds: number;\n}\n\nconst sfn = new SFNClient();\n\nexport async function handler(event: AWSLambda.SQSEvent): Promise<AWSLambda.SQSBatchResponse> {\n  const result: AWSLambda.SQSBatchResponse = { batchItemFailures: [] };\n  let octokitCache: Octokit | undefined;\n  let runnerLevel: 'repo' | 'org' | undefined;\n\n  for (const record of event.Records) {\n    const input = JSON.parse(record.body) as IdleReaperLambdaInput;\n    console.log({\n      notice: 'Checking runner',\n      input,\n    });\n\n    const retryLater = () => result.batchItemFailures.push({ itemIdentifier: record.messageId });\n\n    // check if step function is still running\n    const execution = await sfn.send(new DescribeExecutionCommand({ executionArn: input.executionArn }));\n    if (execution.status != 'RUNNING') {\n      // no need to test again as runner already finished\n      console.log({\n        notice: 'Runner already finished',\n        input,\n      });\n      continue;\n    }\n\n    // get github access\n    if (!octokitCache) {\n      // getOctokit calls secrets manager every time, so cache the result\n      const { octokit, githubSecrets } = await getOctokit(input.installationId);\n      // TODO if installationId changes during normal operations, we may have some records with good installationId, and some with bad\n      octokitCache = octokit;\n      runnerLevel = githubSecrets.runnerLevel;\n    }\n\n    // find runner\n    const runner = await getRunner(octokitCache, runnerLevel, input.owner, input.repo, input.runnerName);\n    if (!runner) {\n      console.log({\n        notice: 'Runner not running yet',\n        input,\n      });\n      retryLater();\n      continue;\n    }\n\n    // if not idle, try again later\n    // we want to try again because the runner might be retried due to e.g. lambda timeout\n    // we need to keep following the retry too and make sure it doesn't go idle\n    if (runner.busy) {\n      console.log({\n        notice: 'Runner is not idle',\n        input,\n      });\n      retryLater();\n      continue;\n    }\n\n    // check if max idle timeout has reached\n    let found = false;\n    for (const label of runner.labels) {\n      if (label.name.toLowerCase().startsWith('cdkghr:started:')) {\n        const started = parseFloat(label.name.split(':')[2]);\n        const startedDate = new Date(started * 1000);\n        const now = new Date();\n        const diffMs = now.getTime() - startedDate.getTime();\n\n        console.log({\n          notice: `Runner ${input.runnerName} started ${diffMs / 1000} seconds ago`,\n          input,\n        });\n\n        if (diffMs > 1000 * input.maxIdleSeconds) {\n          // max idle time reached, delete runner\n          console.log({\n            notice: `Runner ${input.runnerName} is idle for too long`,\n            input,\n          });\n\n          try {\n            // stop step function first, so it's marked as aborted with the proper error\n            // if we delete the runner first, the step function will be marked as failed with a generic error\n            console.log({\n              notice: `Stopping step function ${input.executionArn}...`,\n              input,\n            });\n            await sfn.send(new StopExecutionCommand({\n              executionArn: input.executionArn,\n              error: 'IdleRunner',\n              cause: `Runner ${input.runnerName} on ${input.owner}/${input.repo} is idle for too long (${diffMs / 1000} seconds and limit is ${input.maxIdleSeconds} seconds)`,\n            }));\n          } catch (e) {\n            console.error({\n              notice: `Failed to stop step function ${input.executionArn}: ${e}`,\n              input,\n            });\n            retryLater();\n            continue;\n          }\n\n          try {\n            console.log({\n              notice: `Deleting runner ${runner.id}...`,\n              input,\n            });\n            await deleteRunner(octokitCache, runnerLevel, input.owner, input.repo, runner.id);\n          } catch (e) {\n            console.error({\n              notice: `Failed to delete runner ${runner.id}: ${e}`,\n              input,\n            });\n            retryLater();\n            continue;\n          }\n        } else {\n          // still idle, timeout not reached -- retry later\n          retryLater();\n        }\n\n        found = true;\n        break;\n      }\n    }\n\n    if (!found) {\n      // no started label? retry later (it won't retry forever as eventually the runner will stop and the step function will finish)\n      console.error({\n        notice: 'No `cdkghr:started:xxx` label found???',\n        input,\n      });\n      retryLater();\n    }\n  }\n\n  return result;\n}\n"]}