UNPKG

@cloudsnorkel/cdk-github-runners

Version:

CDK construct to create GitHub Actions self-hosted runners. Creates ephemeral runners on demand. Easy to deploy and highly customizable.

139 lines 18.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.handler = handler; const client_sfn_1 = require("@aws-sdk/client-sfn"); const lambda_github_1 = require("./lambda-github"); const sfn = new client_sfn_1.SFNClient(); async function handler(event) { const result = { batchItemFailures: [] }; const octokitCache = new Map(); for (const record of event.Records) { const input = JSON.parse(record.body); console.log({ notice: 'Checking runner', input, }); const retryLater = () => result.batchItemFailures.push({ itemIdentifier: record.messageId }); // check if step function is still running const execution = await sfn.send(new client_sfn_1.DescribeExecutionCommand({ executionArn: input.executionArn })); if (execution.status != 'RUNNING') { // no need to test again as runner already finished console.log({ notice: 'Runner already finished', input, }); continue; } // get github access let octokit; let secrets; const cached = octokitCache.get(input.installationId); if (cached) { // use cached octokit octokit = cached.octokit; secrets = cached.secrets; } else { // getOctokit calls secrets manager and Github API every time, so cache the result // this handler can work on multiple runners at once, so caching is important const { octokit: newOctokit, githubSecrets: newSecrets } = await (0, lambda_github_1.getOctokit)(input.installationId); octokit = newOctokit; secrets = newSecrets; octokitCache.set(input.installationId, { octokit, secrets }); } // find runner const runner = await (0, lambda_github_1.getRunner)(octokit, secrets.runnerLevel, input.owner, input.repo, input.runnerName); if (!runner) { console.log({ notice: 'Runner not running yet', input, }); retryLater(); continue; } // if not idle, try again later // we want to try again because the runner might be retried due to e.g. lambda timeout // we need to keep following the retry too and make sure it doesn't go idle if (runner.busy) { console.log({ notice: 'Runner is not idle', input, }); retryLater(); continue; } // check if max idle timeout has reached let found = false; for (const label of runner.labels) { if (label.name.toLowerCase().startsWith('cdkghr:started:')) { const started = parseFloat(label.name.split(':')[2]); const startedDate = new Date(started * 1000); const now = new Date(); const diffMs = now.getTime() - startedDate.getTime(); console.log({ notice: `Runner ${input.runnerName} started ${diffMs / 1000} seconds ago`, input, }); if (diffMs > 1000 * input.maxIdleSeconds) { // max idle time reached, delete runner console.log({ notice: `Runner ${input.runnerName} is idle for too long`, input, }); try { // stop step function first, so it's marked as aborted with the proper error // if we delete the runner first, the step function will be marked as failed with a generic error console.log({ notice: `Stopping step function ${input.executionArn}...`, input, }); await sfn.send(new client_sfn_1.StopExecutionCommand({ executionArn: input.executionArn, error: 'IdleRunner', cause: `Runner ${input.runnerName} on ${input.owner}/${input.repo} is idle for too long (${diffMs / 1000} seconds and limit is ${input.maxIdleSeconds} seconds)`, })); } catch (e) { console.error({ notice: `Failed to stop step function ${input.executionArn}: ${e}`, input, }); retryLater(); continue; } try { console.log({ notice: `Deleting runner ${runner.id}...`, input, }); await (0, lambda_github_1.deleteRunner)(octokit, secrets.runnerLevel, input.owner, input.repo, runner.id); } catch (e) { console.error({ notice: `Failed to delete runner ${runner.id}: ${e}`, input, }); retryLater(); continue; } } else { // still idle, timeout not reached -- retry later retryLater(); } found = true; break; } } if (!found) { // no started label? retry later (it won't retry forever as eventually the runner will stop and the step function will finish) console.error({ notice: 'No `cdkghr:started:xxx` label found???', input, }); retryLater(); } } return result; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"idle-runner-repear.lambda.js","sourceRoot":"","sources":["../src/idle-runner-repear.lambda.ts"],"names":[],"mappings":";;AAgBA,0BA6IC;AA7JD,oDAAgG;AAGhG,mDAAqF;AAWrF,MAAM,GAAG,GAAG,IAAI,sBAAS,EAAE,CAAC;AAErB,KAAK,UAAU,OAAO,CAAC,KAAyB;IACrD,MAAM,MAAM,GAA+B,EAAE,iBAAiB,EAAE,EAAE,EAAE,CAAC;IACrE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoE,CAAC;IAEjG,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAA0B,CAAC;QAC/D,OAAO,CAAC,GAAG,CAAC;YACV,MAAM,EAAE,iBAAiB;YACzB,KAAK;SACN,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,EAAE,cAAc,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QAE7F,0CAA0C;QAC1C,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,qCAAwB,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QACrG,IAAI,SAAS,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;YAClC,mDAAmD;YACnD,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,yBAAyB;gBACjC,KAAK;aACN,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,OAAgB,CAAC;QACrB,IAAI,OAAsB,CAAC;QAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QACtD,IAAI,MAAM,EAAE,CAAC;YACX,qBAAqB;YACrB,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;YACzB,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,kFAAkF;YAClF,6EAA6E;YAC7E,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,0BAAU,EAAC,KAAK,CAAC,cAAc,CAAC,CAAC;YAClG,OAAO,GAAG,UAAU,CAAC;YACrB,OAAO,GAAG,UAAU,CAAC;YACrB,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;QAED,cAAc;QACd,MAAM,MAAM,GAAG,MAAM,IAAA,yBAAS,EAAC,OAAO,EAAE,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;QACxG,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,wBAAwB;gBAChC,KAAK;aACN,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,+BAA+B;QAC/B,sFAAsF;QACtF,2EAA2E;QAC3E,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,oBAAoB;gBAC5B,KAAK;aACN,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,wCAAwC;QACxC,IAAI,KAAK,GAAG,KAAK,CAAC;QAClB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClC,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBAC3D,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrD,MAAM,WAAW,GAAG,IAAI,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;gBAC7C,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;gBACvB,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,EAAE,GAAG,WAAW,CAAC,OAAO,EAAE,CAAC;gBAErD,OAAO,CAAC,GAAG,CAAC;oBACV,MAAM,EAAE,UAAU,KAAK,CAAC,UAAU,YAAY,MAAM,GAAG,IAAI,cAAc;oBACzE,KAAK;iBACN,CAAC,CAAC;gBAEH,IAAI,MAAM,GAAG,IAAI,GAAG,KAAK,CAAC,cAAc,EAAE,CAAC;oBACzC,uCAAuC;oBACvC,OAAO,CAAC,GAAG,CAAC;wBACV,MAAM,EAAE,UAAU,KAAK,CAAC,UAAU,uBAAuB;wBACzD,KAAK;qBACN,CAAC,CAAC;oBAEH,IAAI,CAAC;wBACH,4EAA4E;wBAC5E,iGAAiG;wBACjG,OAAO,CAAC,GAAG,CAAC;4BACV,MAAM,EAAE,0BAA0B,KAAK,CAAC,YAAY,KAAK;4BACzD,KAAK;yBACN,CAAC,CAAC;wBACH,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,iCAAoB,CAAC;4BACtC,YAAY,EAAE,KAAK,CAAC,YAAY;4BAChC,KAAK,EAAE,YAAY;4BACnB,KAAK,EAAE,UAAU,KAAK,CAAC,UAAU,OAAO,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,IAAI,0BAA0B,MAAM,GAAG,IAAI,yBAAyB,KAAK,CAAC,cAAc,WAAW;yBACjK,CAAC,CAAC,CAAC;oBACN,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,OAAO,CAAC,KAAK,CAAC;4BACZ,MAAM,EAAE,gCAAgC,KAAK,CAAC,YAAY,KAAK,CAAC,EAAE;4BAClE,KAAK;yBACN,CAAC,CAAC;wBACH,UAAU,EAAE,CAAC;wBACb,SAAS;oBACX,CAAC;oBAED,IAAI,CAAC;wBACH,OAAO,CAAC,GAAG,CAAC;4BACV,MAAM,EAAE,mBAAmB,MAAM,CAAC,EAAE,KAAK;4BACzC,KAAK;yBACN,CAAC,CAAC;wBACH,MAAM,IAAA,4BAAY,EAAC,OAAO,EAAE,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;oBACvF,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,OAAO,CAAC,KAAK,CAAC;4BACZ,MAAM,EAAE,2BAA2B,MAAM,CAAC,EAAE,KAAK,CAAC,EAAE;4BACpD,KAAK;yBACN,CAAC,CAAC;wBACH,UAAU,EAAE,CAAC;wBACb,SAAS;oBACX,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,iDAAiD;oBACjD,UAAU,EAAE,CAAC;gBACf,CAAC;gBAED,KAAK,GAAG,IAAI,CAAC;gBACb,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,8HAA8H;YAC9H,OAAO,CAAC,KAAK,CAAC;gBACZ,MAAM,EAAE,wCAAwC;gBAChD,KAAK;aACN,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["import { DescribeExecutionCommand, SFNClient, StopExecutionCommand } from '@aws-sdk/client-sfn';\nimport type { Octokit } from '@octokit/rest';\nimport * as AWSLambda from 'aws-lambda';\nimport { deleteRunner, getOctokit, getRunner, GitHubSecrets } from './lambda-github';\n\ninterface IdleReaperLambdaInput {\n  readonly executionArn: string;\n  readonly runnerName: string;\n  readonly owner: string;\n  readonly repo: string;\n  readonly installationId?: number;\n  readonly maxIdleSeconds: number;\n}\n\nconst sfn = new SFNClient();\n\nexport async function handler(event: AWSLambda.SQSEvent): Promise<AWSLambda.SQSBatchResponse> {\n  const result: AWSLambda.SQSBatchResponse = { batchItemFailures: [] };\n  const octokitCache = new Map<number | undefined, { octokit: Octokit; secrets: GitHubSecrets }>();\n\n  for (const record of event.Records) {\n    const input = JSON.parse(record.body) as IdleReaperLambdaInput;\n    console.log({\n      notice: 'Checking runner',\n      input,\n    });\n\n    const retryLater = () => result.batchItemFailures.push({ itemIdentifier: record.messageId });\n\n    // check if step function is still running\n    const execution = await sfn.send(new DescribeExecutionCommand({ executionArn: input.executionArn }));\n    if (execution.status != 'RUNNING') {\n      // no need to test again as runner already finished\n      console.log({\n        notice: 'Runner already finished',\n        input,\n      });\n      continue;\n    }\n\n    // get github access\n    let octokit: Octokit;\n    let secrets: GitHubSecrets;\n    const cached = octokitCache.get(input.installationId);\n    if (cached) {\n      // use cached octokit\n      octokit = cached.octokit;\n      secrets = cached.secrets;\n    } else {\n      // getOctokit calls secrets manager and Github API every time, so cache the result\n      // this handler can work on multiple runners at once, so caching is important\n      const { octokit: newOctokit, githubSecrets: newSecrets } = await getOctokit(input.installationId);\n      octokit = newOctokit;\n      secrets = newSecrets;\n      octokitCache.set(input.installationId, { octokit, secrets });\n    }\n\n    // find runner\n    const runner = await getRunner(octokit, secrets.runnerLevel, input.owner, input.repo, input.runnerName);\n    if (!runner) {\n      console.log({\n        notice: 'Runner not running yet',\n        input,\n      });\n      retryLater();\n      continue;\n    }\n\n    // if not idle, try again later\n    // we want to try again because the runner might be retried due to e.g. lambda timeout\n    // we need to keep following the retry too and make sure it doesn't go idle\n    if (runner.busy) {\n      console.log({\n        notice: 'Runner is not idle',\n        input,\n      });\n      retryLater();\n      continue;\n    }\n\n    // check if max idle timeout has reached\n    let found = false;\n    for (const label of runner.labels) {\n      if (label.name.toLowerCase().startsWith('cdkghr:started:')) {\n        const started = parseFloat(label.name.split(':')[2]);\n        const startedDate = new Date(started * 1000);\n        const now = new Date();\n        const diffMs = now.getTime() - startedDate.getTime();\n\n        console.log({\n          notice: `Runner ${input.runnerName} started ${diffMs / 1000} seconds ago`,\n          input,\n        });\n\n        if (diffMs > 1000 * input.maxIdleSeconds) {\n          // max idle time reached, delete runner\n          console.log({\n            notice: `Runner ${input.runnerName} is idle for too long`,\n            input,\n          });\n\n          try {\n            // stop step function first, so it's marked as aborted with the proper error\n            // if we delete the runner first, the step function will be marked as failed with a generic error\n            console.log({\n              notice: `Stopping step function ${input.executionArn}...`,\n              input,\n            });\n            await sfn.send(new StopExecutionCommand({\n              executionArn: input.executionArn,\n              error: 'IdleRunner',\n              cause: `Runner ${input.runnerName} on ${input.owner}/${input.repo} is idle for too long (${diffMs / 1000} seconds and limit is ${input.maxIdleSeconds} seconds)`,\n            }));\n          } catch (e) {\n            console.error({\n              notice: `Failed to stop step function ${input.executionArn}: ${e}`,\n              input,\n            });\n            retryLater();\n            continue;\n          }\n\n          try {\n            console.log({\n              notice: `Deleting runner ${runner.id}...`,\n              input,\n            });\n            await deleteRunner(octokit, secrets.runnerLevel, input.owner, input.repo, runner.id);\n          } catch (e) {\n            console.error({\n              notice: `Failed to delete runner ${runner.id}: ${e}`,\n              input,\n            });\n            retryLater();\n            continue;\n          }\n        } else {\n          // still idle, timeout not reached -- retry later\n          retryLater();\n        }\n\n        found = true;\n        break;\n      }\n    }\n\n    if (!found) {\n      // no started label? retry later (it won't retry forever as eventually the runner will stop and the step function will finish)\n      console.error({\n        notice: 'No `cdkghr:started:xxx` label found???',\n        input,\n      });\n      retryLater();\n    }\n  }\n\n  return result;\n}\n"]}