@cloudsnorkel/cdk-github-runners
Version:
CDK construct to create GitHub Actions self-hosted runners. Creates ephemeral runners on demand. Easy to deploy and highly customizable.
636 lines • 116 kB
JavaScript
"use strict";
var _a;
Object.defineProperty(exports, "__esModule", { value: true });
exports.GitHubRunners = void 0;
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
const fs = require("fs");
const os = require("os");
const path = require("path");
const cdk = require("aws-cdk-lib");
const aws_cdk_lib_1 = require("aws-cdk-lib");
const constructs_1 = require("constructs");
const access_1 = require("./access");
const delete_failed_runner_function_1 = require("./delete-failed-runner-function");
const idle_runner_repear_function_1 = require("./idle-runner-repear-function");
const providers_1 = require("./providers");
const secrets_1 = require("./secrets");
const setup_function_1 = require("./setup-function");
const status_function_1 = require("./status-function");
const token_retriever_function_1 = require("./token-retriever-function");
const utils_1 = require("./utils");
const webhook_1 = require("./webhook");
const webhook_redelivery_1 = require("./webhook-redelivery");
/**
* Create all the required infrastructure to provide self-hosted GitHub runners. It creates a webhook, secrets, and a step function to orchestrate all runs. Secrets are not automatically filled. See README.md for instructions on how to setup GitHub integration.
*
* By default, this will create a runner provider of each available type with the defaults. This is good enough for the initial setup stage when you just want to get GitHub integration working.
*
* ```typescript
* new GitHubRunners(this, 'runners');
* ```
*
* Usually you'd want to configure the runner providers so the runners can run in a certain VPC or have certain permissions.
*
* ```typescript
* const vpc = ec2.Vpc.fromLookup(this, 'vpc', { vpcId: 'vpc-1234567' });
* const runnerSg = new ec2.SecurityGroup(this, 'runner security group', { vpc: vpc });
* const dbSg = ec2.SecurityGroup.fromSecurityGroupId(this, 'database security group', 'sg-1234567');
* const bucket = new s3.Bucket(this, 'runner bucket');
*
* // create a custom CodeBuild provider
* const myProvider = new CodeBuildRunnerProvider(
* this, 'codebuild runner',
* {
* labels: ['my-codebuild'],
* vpc: vpc,
* securityGroups: [runnerSg],
* },
* );
* // grant some permissions to the provider
* bucket.grantReadWrite(myProvider);
* dbSg.connections.allowFrom(runnerSg, ec2.Port.tcp(3306), 'allow runners to connect to MySQL database');
*
* // create the runner infrastructure
* new GitHubRunners(
* this,
* 'runners',
* {
* providers: [myProvider],
* }
* );
* ```
*/
class GitHubRunners extends constructs_1.Construct {
constructor(scope, id, props) {
super(scope, id);
this.props = props;
this.extraLambdaEnv = {};
this.jobsCompletedMetricFiltersInitialized = false;
this.secrets = new secrets_1.Secrets(this, 'Secrets');
this.extraLambdaProps = {
vpc: this.props?.vpc,
vpcSubnets: this.props?.vpcSubnets,
allowPublicSubnet: this.props?.allowPublicSubnet,
securityGroups: this.lambdaSecurityGroups(),
layers: [],
};
this.connections = new aws_cdk_lib_1.aws_ec2.Connections({ securityGroups: this.extraLambdaProps.securityGroups });
this.createCertificateLayer(scope);
if (this.props?.providers) {
this.providers = this.props.providers;
}
else {
this.providers = [
new providers_1.CodeBuildRunnerProvider(this, 'CodeBuild'),
new providers_1.LambdaRunnerProvider(this, 'Lambda'),
new providers_1.FargateRunnerProvider(this, 'Fargate'),
];
}
if (this.providers.length == 0) {
throw new Error('At least one runner provider is required');
}
this.checkIntersectingLabels();
this.orchestrator = this.stateMachine(props);
this.webhook = new webhook_1.GithubWebhookHandler(this, 'Webhook Handler', {
orchestrator: this.orchestrator,
secrets: this.secrets,
access: this.props?.webhookAccess ?? access_1.LambdaAccess.lambdaUrl(),
providers: this.providers.reduce((acc, p) => {
acc[p.node.path] = p.labels;
return acc;
}, {}),
requireSelfHostedLabel: this.props?.requireSelfHostedLabel ?? true,
providerSelector: this.props?.providerSelector,
extraLambdaProps: this.extraLambdaProps,
extraLambdaEnv: this.extraLambdaEnv,
});
this.redeliverer = new webhook_redelivery_1.GithubWebhookRedelivery(this, 'Webhook Redelivery', {
secrets: this.secrets,
extraLambdaProps: this.extraLambdaProps,
extraLambdaEnv: this.extraLambdaEnv,
});
this.setupUrl = this.setupFunction();
this.statusFunction();
}
stateMachine(props) {
const tokenRetrieverTask = new aws_cdk_lib_1.aws_stepfunctions_tasks.LambdaInvoke(this, 'Get Runner Token', {
lambdaFunction: this.tokenRetriever(),
payloadResponseOnly: true,
resultPath: '$.runner',
});
let deleteFailedRunnerFunction = this.deleteFailedRunner();
const deleteFailedRunnerTask = new aws_cdk_lib_1.aws_stepfunctions_tasks.LambdaInvoke(this, 'Delete Failed Runner', {
lambdaFunction: deleteFailedRunnerFunction,
payloadResponseOnly: true,
resultPath: '$.delete',
payload: aws_cdk_lib_1.aws_stepfunctions.TaskInput.fromObject({
runnerName: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$$.Execution.Name'),
owner: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.owner'),
repo: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.repo'),
installationId: aws_cdk_lib_1.aws_stepfunctions.JsonPath.numberAt('$.installationId'),
error: aws_cdk_lib_1.aws_stepfunctions.JsonPath.objectAt('$.error'),
}),
});
deleteFailedRunnerTask.addRetry({
errors: [
'RunnerBusy',
],
interval: cdk.Duration.minutes(1),
backoffRate: 1,
maxAttempts: 60,
});
const idleReaper = this.idleReaper();
const queueIdleReaperTask = new aws_cdk_lib_1.aws_stepfunctions_tasks.SqsSendMessage(this, 'Queue Idle Reaper', {
queue: this.idleReaperQueue(idleReaper),
messageBody: aws_cdk_lib_1.aws_stepfunctions.TaskInput.fromObject({
executionArn: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$$.Execution.Id'),
runnerName: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$$.Execution.Name'),
owner: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.owner'),
repo: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.repo'),
installationId: aws_cdk_lib_1.aws_stepfunctions.JsonPath.numberAt('$.installationId'),
maxIdleSeconds: (props?.idleTimeout ?? cdk.Duration.minutes(5)).toSeconds(),
}),
resultPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.DISCARD,
});
const providerChooser = new aws_cdk_lib_1.aws_stepfunctions.Choice(this, 'Choose provider');
for (const provider of this.providers) {
const providerTask = provider.getStepFunctionTask({
runnerTokenPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.runner.token'),
runnerNamePath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$$.Execution.Name'),
githubDomainPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.runner.domain'),
ownerPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.owner'),
repoPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.repo'),
registrationUrl: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.runner.registrationUrl'),
labelsPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.labels'),
});
providerChooser.when(aws_cdk_lib_1.aws_stepfunctions.Condition.and(aws_cdk_lib_1.aws_stepfunctions.Condition.stringEquals('$.provider', provider.node.path)), providerTask, {
comment: `Labels: ${provider.labels.join(', ')}`,
});
}
providerChooser.otherwise(new aws_cdk_lib_1.aws_stepfunctions.Succeed(this, 'Unknown label'));
const runProviders = new aws_cdk_lib_1.aws_stepfunctions.Parallel(this, 'Run Providers').branch(new aws_cdk_lib_1.aws_stepfunctions.Parallel(this, 'Error Handler').branch(
// we get a token for every retry because the token can expire faster than the job can timeout
tokenRetrieverTask.next(providerChooser)).addCatch(
// delete runner on failure as it won't remove itself and there is a limit on the number of registered runners
deleteFailedRunnerTask, {
resultPath: '$.error',
}));
if (props?.retryOptions?.retry ?? true) {
const interval = props?.retryOptions?.interval ?? cdk.Duration.minutes(1);
const maxAttempts = props?.retryOptions?.maxAttempts ?? 23;
const backoffRate = props?.retryOptions?.backoffRate ?? 1.3;
const totalSeconds = interval.toSeconds() * backoffRate ** maxAttempts / (backoffRate - 1);
if (totalSeconds >= cdk.Duration.days(1).toSeconds()) {
// https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#usage-limits
// "Job queue time - Each job for self-hosted runners can be queued for a maximum of 24 hours. If a self-hosted runner does not start executing the job within this limit, the job is terminated and fails to complete."
aws_cdk_lib_1.Annotations.of(this).addWarning(`Total retry time is greater than 24 hours (${Math.floor(totalSeconds / 60 / 60)} hours). Jobs expire after 24 hours so it would be a waste of resources to retry further.`);
}
runProviders.addRetry({
interval,
maxAttempts,
backoffRate,
// we retry on everything
// deleted idle runners will also fail, but the reaper will stop this step function to avoid endless retries
});
}
let logOptions;
if (this.props?.logOptions) {
this.stateMachineLogGroup = new aws_cdk_lib_1.aws_logs.LogGroup(this, 'Logs', {
logGroupName: props?.logOptions?.logGroupName,
retention: props?.logOptions?.logRetention ?? aws_cdk_lib_1.aws_logs.RetentionDays.ONE_MONTH,
removalPolicy: cdk.RemovalPolicy.DESTROY,
});
logOptions = {
destination: this.stateMachineLogGroup,
includeExecutionData: props?.logOptions?.includeExecutionData ?? true,
level: props?.logOptions?.level ?? aws_cdk_lib_1.aws_stepfunctions.LogLevel.ALL,
};
}
const stateMachine = new aws_cdk_lib_1.aws_stepfunctions.StateMachine(this, 'Runner Orchestrator', {
definitionBody: aws_cdk_lib_1.aws_stepfunctions.DefinitionBody.fromChainable(queueIdleReaperTask.next(runProviders)),
logs: logOptions,
});
stateMachine.grantRead(idleReaper);
stateMachine.grantExecution(idleReaper, 'states:StopExecution');
for (const provider of this.providers) {
provider.grantStateMachine(stateMachine);
}
return stateMachine;
}
tokenRetriever() {
const func = new token_retriever_function_1.TokenRetrieverFunction(this, 'token-retriever', {
description: 'Get token from GitHub Actions used to start new self-hosted runner',
environment: {
GITHUB_SECRET_ARN: this.secrets.github.secretArn,
GITHUB_PRIVATE_KEY_SECRET_ARN: this.secrets.githubPrivateKey.secretArn,
...this.extraLambdaEnv,
},
timeout: cdk.Duration.seconds(30),
logGroup: (0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.ORCHESTRATOR),
loggingFormat: aws_cdk_lib_1.aws_lambda.LoggingFormat.JSON,
...this.extraLambdaProps,
});
this.secrets.github.grantRead(func);
this.secrets.githubPrivateKey.grantRead(func);
return func;
}
deleteFailedRunner() {
const func = new delete_failed_runner_function_1.DeleteFailedRunnerFunction(this, 'delete-runner', {
description: 'Delete failed GitHub Actions runner on error',
environment: {
GITHUB_SECRET_ARN: this.secrets.github.secretArn,
GITHUB_PRIVATE_KEY_SECRET_ARN: this.secrets.githubPrivateKey.secretArn,
...this.extraLambdaEnv,
},
timeout: cdk.Duration.seconds(30),
logGroup: (0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.ORCHESTRATOR),
loggingFormat: aws_cdk_lib_1.aws_lambda.LoggingFormat.JSON,
...this.extraLambdaProps,
});
this.secrets.github.grantRead(func);
this.secrets.githubPrivateKey.grantRead(func);
return func;
}
statusFunction() {
const statusFunction = new status_function_1.StatusFunction(this, 'status', {
description: 'Provide user with status about self-hosted GitHub Actions runners',
environment: {
WEBHOOK_SECRET_ARN: this.secrets.webhook.secretArn,
GITHUB_SECRET_ARN: this.secrets.github.secretArn,
GITHUB_PRIVATE_KEY_SECRET_ARN: this.secrets.githubPrivateKey.secretArn,
SETUP_SECRET_ARN: this.secrets.setup.secretArn,
WEBHOOK_URL: this.webhook.url,
WEBHOOK_HANDLER_ARN: this.webhook.handler.latestVersion.functionArn,
STEP_FUNCTION_ARN: this.orchestrator.stateMachineArn,
STEP_FUNCTION_LOG_GROUP: this.stateMachineLogGroup?.logGroupName ?? '',
SETUP_FUNCTION_URL: this.setupUrl,
...this.extraLambdaEnv,
},
timeout: cdk.Duration.minutes(3),
logGroup: (0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.SETUP),
loggingFormat: aws_cdk_lib_1.aws_lambda.LoggingFormat.JSON,
...this.extraLambdaProps,
});
const providers = this.providers.flatMap(provider => {
const status = provider.status(statusFunction);
// Composite providers return an array, regular providers return a single status
return Array.isArray(status) ? status : [status];
});
// expose providers as stack metadata as it's too big for Lambda environment variables
// specifically integration testing got an error because lambda update request was >5kb
const stack = cdk.Stack.of(this);
const f = statusFunction.node.defaultChild;
f.addPropertyOverride('Environment.Variables.LOGICAL_ID', f.logicalId);
f.addPropertyOverride('Environment.Variables.STACK_NAME', stack.stackName);
f.addMetadata('providers', providers);
statusFunction.addToRolePolicy(new aws_cdk_lib_1.aws_iam.PolicyStatement({
actions: ['cloudformation:DescribeStackResource'],
resources: [stack.stackId],
}));
this.secrets.webhook.grantRead(statusFunction);
this.secrets.github.grantRead(statusFunction);
this.secrets.githubPrivateKey.grantRead(statusFunction);
this.secrets.setup.grantRead(statusFunction);
this.orchestrator.grantRead(statusFunction);
new cdk.CfnOutput(this, 'status command', {
value: `aws --region ${stack.region} lambda invoke --function-name ${statusFunction.functionName} status.json`,
});
const access = this.props?.statusAccess ?? access_1.LambdaAccess.noAccess();
const url = access.bind(this, 'status access', statusFunction);
if (url !== '') {
new cdk.CfnOutput(this, 'status url', {
value: url,
});
}
}
setupFunction() {
const setupFunction = new setup_function_1.SetupFunction(this, 'setup', {
description: 'Setup GitHub Actions integration with self-hosted runners',
environment: {
SETUP_SECRET_ARN: this.secrets.setup.secretArn,
WEBHOOK_SECRET_ARN: this.secrets.webhook.secretArn,
GITHUB_SECRET_ARN: this.secrets.github.secretArn,
GITHUB_PRIVATE_KEY_SECRET_ARN: this.secrets.githubPrivateKey.secretArn,
WEBHOOK_URL: this.webhook.url,
...this.extraLambdaEnv,
},
timeout: cdk.Duration.minutes(3),
logGroup: (0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.SETUP),
loggingFormat: aws_cdk_lib_1.aws_lambda.LoggingFormat.JSON,
...this.extraLambdaProps,
});
// this.secrets.webhook.grantRead(setupFunction);
this.secrets.webhook.grantWrite(setupFunction);
this.secrets.github.grantRead(setupFunction);
this.secrets.github.grantWrite(setupFunction);
// this.secrets.githubPrivateKey.grantRead(setupFunction);
this.secrets.githubPrivateKey.grantWrite(setupFunction);
this.secrets.setup.grantRead(setupFunction);
this.secrets.setup.grantWrite(setupFunction);
const access = this.props?.setupAccess ?? access_1.LambdaAccess.lambdaUrl();
return access.bind(this, 'setup access', setupFunction);
}
checkIntersectingLabels() {
// this "algorithm" is very inefficient, but good enough for the tiny datasets we expect
for (const p1 of this.providers) {
for (const p2 of this.providers) {
if (p1 == p2) {
continue;
}
if (p1.labels.every(l => p2.labels.includes(l))) {
if (p2.labels.every(l => p1.labels.includes(l))) {
throw new Error(`Both ${p1.node.path} and ${p2.node.path} use the same labels [${p1.labels.join(', ')}]`);
}
aws_cdk_lib_1.Annotations.of(p1).addWarning(`Labels [${p1.labels.join(', ')}] intersect with another provider (${p2.node.path} -- [${p2.labels.join(', ')}]). If a workflow specifies the labels [${p1.labels.join(', ')}], it is not guaranteed which provider will be used. It is recommended you do not use intersecting labels`);
}
}
}
}
idleReaper() {
return new idle_runner_repear_function_1.IdleRunnerRepearFunction(this, 'Idle Reaper', {
description: 'Stop idle GitHub runners to avoid paying for runners when the job was already canceled',
environment: {
GITHUB_SECRET_ARN: this.secrets.github.secretArn,
GITHUB_PRIVATE_KEY_SECRET_ARN: this.secrets.githubPrivateKey.secretArn,
...this.extraLambdaEnv,
},
logGroup: (0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.ORCHESTRATOR),
loggingFormat: aws_cdk_lib_1.aws_lambda.LoggingFormat.JSON,
timeout: cdk.Duration.minutes(5),
...this.extraLambdaProps,
});
}
idleReaperQueue(reaper) {
// see this comment to understand why it's a queue that's out of the step function
// https://github.com/CloudSnorkel/cdk-github-runners/pull/314#issuecomment-1528901192
const queue = new aws_cdk_lib_1.aws_sqs.Queue(this, 'Idle Reaper Queue', {
deliveryDelay: cdk.Duration.minutes(10),
visibilityTimeout: cdk.Duration.minutes(10),
});
reaper.addEventSource(new aws_cdk_lib_1.aws_lambda_event_sources.SqsEventSource(queue, {
reportBatchItemFailures: true,
maxBatchingWindow: cdk.Duration.minutes(1),
}));
this.secrets.github.grantRead(reaper);
this.secrets.githubPrivateKey.grantRead(reaper);
return queue;
}
lambdaSecurityGroups() {
if (!this.props?.vpc) {
if (this.props?.securityGroup) {
cdk.Annotations.of(this).addWarning('securityGroup is specified, but vpc is not. securityGroup will be ignored');
}
if (this.props?.securityGroups) {
cdk.Annotations.of(this).addWarning('securityGroups is specified, but vpc is not. securityGroups will be ignored');
}
return undefined;
}
if (this.props.securityGroups) {
if (this.props.securityGroup) {
cdk.Annotations.of(this).addWarning('Both securityGroup and securityGroups are specified. securityGroup will be ignored');
}
return this.props.securityGroups;
}
if (this.props.securityGroup) {
return [this.props.securityGroup];
}
return [new aws_cdk_lib_1.aws_ec2.SecurityGroup(this, 'Management Lambdas Security Group', { vpc: this.props.vpc })];
}
/**
* Extracts all unique IRunnerProvider instances from providers and composite providers (one level only).
* Uses a Set to ensure we don't process the same provider twice, even if it's used in multiple composites.
*
* @returns Set of unique IRunnerProvider instances
*/
extractUniqueSubProviders() {
const seen = new Set();
for (const provider of this.providers) {
// instanceof doesn't really work in CDK so use this hack instead
if ('logGroup' in provider) {
// Regular provider
seen.add(provider);
}
else {
// Composite provider - access the providers field
for (const subProvider of provider.providers) {
seen.add(subProvider);
}
}
}
return seen;
}
/**
* Creates a Lambda layer with certificates if extraCertificates is specified.
*/
createCertificateLayer(scope) {
if (!this.props?.extraCertificates) {
return;
}
const certificateFiles = (0, utils_1.discoverCertificateFiles)(this.props.extraCertificates);
// Concatenate all certificates into a single file for NODE_EXTRA_CA_CERTS
let combinedCertContent = '';
for (const certFile of certificateFiles) {
const certContent = fs.readFileSync(certFile, 'utf8');
combinedCertContent += certContent;
// Ensure proper PEM format with newline between certificates
if (!certContent.endsWith('\n')) {
combinedCertContent += '\n';
}
}
// Create a temporary directory, write the certificate file, create asset, then delete temp dir
const workdir = fs.mkdtempSync(path.join(os.tmpdir(), 'certificate-layer-'));
try {
const certPath = path.join(workdir, 'certs.pem');
fs.writeFileSync(certPath, combinedCertContent);
// Set environment variable and create layer
this.extraLambdaEnv.NODE_EXTRA_CA_CERTS = '/opt/certs.pem';
this.extraLambdaProps.layers.push(new aws_cdk_lib_1.aws_lambda.LayerVersion(scope, 'Certificate Layer', {
description: 'Layer containing GitHub Enterprise Server certificate(s) for cdk-github-runners',
code: aws_cdk_lib_1.aws_lambda.Code.fromAsset(workdir),
}));
}
finally {
// Calling `fromAsset()` has copied files to the assembly, so we can delete the temporary directory.
fs.rmSync(workdir, { recursive: true, force: true });
}
}
/**
* Metric for the number of GitHub Actions jobs completed. It has `ProviderLabels` and `Status` dimensions. The status can be one of "Succeeded", "SucceededWithIssues", "Failed", "Canceled", "Skipped", or "Abandoned".
*
* **WARNING:** this method creates a metric filter for each provider. Each metric has a status dimension with six possible values. These resources may incur cost.
*/
metricJobCompleted(props) {
if (!this.jobsCompletedMetricFiltersInitialized) {
// we can't use logs.FilterPattern.spaceDelimited() because it has no support for ||
// status list taken from https://github.com/actions/runner/blob/be9632302ceef50bfb36ea998cea9c94c75e5d4d/src/Sdk/DTWebApi/WebApi/TaskResult.cs
// we need "..." for Lambda that prefixes some extra data to log lines
const pattern = aws_cdk_lib_1.aws_logs.FilterPattern.literal('[..., marker = "CDKGHA", job = "JOB", done = "DONE", labels, status = "Succeeded" || status = "SucceededWithIssues" || status = "Failed" || status = "Canceled" || status = "Skipped" || status = "Abandoned"]');
// Extract all unique sub-providers from regular and composite providers
// Build a set first to avoid filtering the same log twice
for (const p of this.extractUniqueSubProviders()) {
const metricFilter = p.logGroup.addMetricFilter(`${p.logGroup.node.id} filter`, {
metricNamespace: 'GitHubRunners',
metricName: 'JobCompleted',
filterPattern: pattern,
metricValue: '1',
// can't with dimensions -- defaultValue: 0,
dimensions: {
ProviderLabels: '$labels',
Status: '$status',
},
});
if (metricFilter.node.defaultChild instanceof aws_cdk_lib_1.aws_logs.CfnMetricFilter) {
metricFilter.node.defaultChild.addPropertyOverride('MetricTransformations.0.Unit', 'Count');
}
else {
aws_cdk_lib_1.Annotations.of(metricFilter).addWarning('Unable to set metric filter Unit to Count');
}
}
this.jobsCompletedMetricFiltersInitialized = true;
}
return new aws_cdk_lib_1.aws_cloudwatch.Metric({
namespace: 'GitHubRunners',
metricName: 'JobsCompleted',
unit: aws_cdk_lib_1.aws_cloudwatch.Unit.COUNT,
statistic: aws_cdk_lib_1.aws_cloudwatch.Stats.SUM,
...props,
}).attachTo(this);
}
/**
* Metric for successful executions.
*
* A successful execution doesn't always mean a runner was started. It can be successful even without any label matches.
*
* A successful runner doesn't mean the job it executed was successful. For that, see {@link metricJobCompleted}.
*/
metricSucceeded(props) {
return this.orchestrator.metricSucceeded(props);
}
/**
* Metric for failed runner executions.
*
* A failed runner usually means the runner failed to start and so a job was never executed. It doesn't necessarily mean the job was executed and failed. For that, see {@link metricJobCompleted}.
*/
metricFailed(props) {
return this.orchestrator.metricFailed(props);
}
/**
* Metric for the interval, in milliseconds, between the time the execution starts and the time it closes. This time may be longer than the time the runner took.
*/
metricTime(props) {
return this.orchestrator.metricTime(props);
}
/**
* Creates a topic for notifications when a runner image build fails.
*
* Runner images are rebuilt every week by default. This provides the latest GitHub Runner version and software updates.
*
* If you want to be sure you are using the latest runner version, you can use this topic to be notified when a build fails.
*/
failedImageBuildsTopic() {
const topic = new aws_cdk_lib_1.aws_sns.Topic(this, 'Failed Runner Image Builds');
const stack = cdk.Stack.of(this);
cdk.Aspects.of(stack).add(new providers_1.CodeBuildImageBuilderFailedBuildNotifier(topic));
cdk.Aspects.of(stack).add(new providers_1.AwsImageBuilderFailedBuildNotifier(providers_1.AwsImageBuilderFailedBuildNotifier.createFilteringTopic(this, topic)));
return topic;
}
/**
* Creates CloudWatch Logs Insights saved queries that can be used to debug issues with the runners.
*
* * "Webhook errors" helps diagnose configuration issues with GitHub integration
* * "Ignored webhook" helps understand why runners aren't started
* * "Ignored jobs based on labels" helps debug label matching issues
* * "Webhook started runners" helps understand which runners were started
*/
createLogsInsightsQueries() {
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Webhook errors', {
queryDefinitionName: 'GitHub Runners/Webhook errors',
logGroups: [this.webhook.handler.logGroup],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
filterStatements: [
`strcontains(@logStream, "${this.webhook.handler.functionName}")`,
'level = "ERROR"',
],
sort: '@timestamp desc',
limit: 100,
}),
});
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Orchestration errors', {
queryDefinitionName: 'GitHub Runners/Orchestration errors',
logGroups: [(0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.ORCHESTRATOR)],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
filterStatements: [
'level = "ERROR"',
],
sort: '@timestamp desc',
limit: 100,
}),
});
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Runner image build errors', {
queryDefinitionName: 'GitHub Runners/Runner image build errors',
logGroups: [(0, utils_1.singletonLogGroup)(this, utils_1.SingletonLogType.RUNNER_IMAGE_BUILD)],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
filterStatements: [
'strcontains(message, "error") or strcontains(message, "ERROR") or strcontains(message, "Error") or level = "ERROR"',
],
sort: '@timestamp desc',
limit: 100,
}),
});
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Ignored webhooks', {
queryDefinitionName: 'GitHub Runners/Ignored webhooks',
logGroups: [this.webhook.handler.logGroup],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
fields: ['@timestamp', 'message.notice'],
filterStatements: [
`strcontains(@logStream, "${this.webhook.handler.functionName}")`,
'strcontains(message.notice, "Ignoring")',
],
sort: '@timestamp desc',
limit: 100,
}),
});
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Ignored jobs based on labels', {
queryDefinitionName: 'GitHub Runners/Ignored jobs based on labels',
logGroups: [this.webhook.handler.logGroup],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
fields: ['@timestamp', 'message.notice'],
filterStatements: [
`strcontains(@logStream, "${this.webhook.handler.functionName}")`,
'strcontains(message.notice, "Ignoring labels")',
],
sort: '@timestamp desc',
limit: 100,
}),
});
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Webhook started runners', {
queryDefinitionName: 'GitHub Runners/Webhook started runners',
logGroups: [this.webhook.handler.logGroup],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
fields: ['@timestamp', 'message.sfnInput.jobUrl', 'message.sfnInput.jobLabels', 'message.sfnInput.labels', 'message.sfnInput.provider'],
filterStatements: [
`strcontains(@logStream, "${this.webhook.handler.functionName}")`,
'message.sfnInput.jobUrl like /http.*/',
],
sort: '@timestamp desc',
limit: 100,
}),
});
new aws_cdk_lib_1.aws_logs.QueryDefinition(this, 'Webhook redeliveries', {
queryDefinitionName: 'GitHub Runners/Webhook redeliveries',
logGroups: [this.redeliverer.handler.logGroup],
queryString: new aws_cdk_lib_1.aws_logs.QueryString({
fields: ['@timestamp', 'message.notice', 'message.deliveryId', 'message.guid'],
filterStatements: [
'isPresent(message.deliveryId)',
],
sort: '@timestamp desc',
limit: 100,
}),
});
}
}
exports.GitHubRunners = GitHubRunners;
_a = JSII_RTTI_SYMBOL_1;
GitHubRunners[_a] = { fqn: "@cloudsnorkel/cdk-github-runners.GitHubRunners", version: "0.14.21" };
//# sourceMappingURL=data:application/json;base64,