@cloudsnorkel/cdk-github-runners
Version:
CDK construct to create GitHub Actions self-hosted runners. Creates ephemeral runners on demand. Easy to deploy and highly customizable.
435 lines (423 loc) • 62.1 kB
JavaScript
"use strict";
var _a, _b;
Object.defineProperty(exports, "__esModule", { value: true });
exports.Ec2Runner = exports.Ec2RunnerProvider = void 0;
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
const cdk = require("aws-cdk-lib");
const aws_cdk_lib_1 = require("aws-cdk-lib");
const aws_logs_1 = require("aws-cdk-lib/aws-logs");
const aws_stepfunctions_1 = require("aws-cdk-lib/aws-stepfunctions");
const common_1 = require("./common");
const image_builders_1 = require("../image-builders");
const utils_1 = require("../utils");
// this script is specifically made so `poweroff` is absolutely always called
// each `{}` is a variable coming from `params` below
const linuxUserDataTemplate = `#!/bin/bash -x
TASK_TOKEN="{}"
logGroupName="{}"
runnerNamePath="{}"
githubDomainPath="{}"
ownerPath="{}"
repoPath="{}"
runnerTokenPath="{}"
labels="{}"
registrationURL="{}"
runnerGroup1="{}"
runnerGroup2="{}"
defaultLabels="{}"
heartbeat () {
while true; do
aws stepfunctions send-task-heartbeat --task-token "$TASK_TOKEN"
sleep 60
done
}
setup_logs () {
cat <<EOF > /tmp/log.conf || exit 1
{
"logs": {
"log_stream_name": "unknown",
"logs_collected": {
"files": {
"collect_list": [
{
"file_path": "/var/log/runner.log",
"log_group_name": "$logGroupName",
"log_stream_name": "$runnerNamePath",
"timezone": "UTC"
}
]
}
}
}
}
EOF
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/tmp/log.conf || exit 2
}
action () {
# Determine the value of RUNNER_FLAGS
if [ "$(< /home/runner/RUNNER_VERSION)" = "latest" ]; then
RUNNER_FLAGS=""
else
RUNNER_FLAGS="--disableupdate"
fi
labelsTemplate="$labels,cdkghr:started:$(date +%s)"
# Execute the configuration command for runner registration
sudo -Hu runner /home/runner/config.sh --unattended --url "$registrationURL" --token "$runnerTokenPath" --ephemeral --work _work --labels "$labelsTemplate" $RUNNER_FLAGS --name "$runnerNamePath" $runnerGroup1 $runnerGroup2 $defaultLabels || exit 1
# Execute the run command
sudo --preserve-env=AWS_REGION -Hu runner /home/runner/run.sh || exit 2
# Retrieve the status
STATUS=$(grep -Phors "finish job request for job [0-9a-f\\-]+ with result: \K.*" /home/runner/_diag/ | tail -n1)
# Check and print the job status
[ -n "$STATUS" ] && echo CDKGHA JOB DONE "$labels" "$STATUS"
}
heartbeat &
if setup_logs && action | tee /var/log/runner.log 2>&1; then
aws stepfunctions send-task-success --task-token "$TASK_TOKEN" --task-output '{"ok": true}'
else
aws stepfunctions send-task-failure --task-token "$TASK_TOKEN"
fi
sleep 10 # give cloudwatch agent its default 5 seconds buffer duration to upload logs
poweroff
`.replace(/{/g, '\\{').replace(/}/g, '\\}').replace(/\\{\\}/g, '{}');
// this script is specifically made so `poweroff` is absolutely always called
// each `{}` is a variable coming from `params` below and their order should match the linux script
const windowsUserDataTemplate = `<powershell>
$TASK_TOKEN = "{}"
$logGroupName="{}"
$runnerNamePath="{}"
$githubDomainPath="{}"
$ownerPath="{}"
$repoPath="{}"
$runnerTokenPath="{}"
$labels="{}"
$registrationURL="{}"
$runnerGroup1="{}"
$runnerGroup2="{}"
$defaultLabels="{}"
# EC2Launch only starts ssm agent after user data is done, so we need to start it ourselves (it is disabled by default)
Set-Service -StartupType Manual AmazonSSMAgent
Start-Service AmazonSSMAgent
Start-Job -ScriptBlock {
while (1) {
aws stepfunctions send-task-heartbeat --task-token "$using:TASK_TOKEN"
sleep 60
}
}
function setup_logs () {
echo "{
\`"logs\`": {
\`"log_stream_name\`": \`"unknown\`",
\`"logs_collected\`": {
\`"files\`": {
\`"collect_list\`": [
{
\`"file_path\`": \`"/actions/runner.log\`",
\`"log_group_name\`": \`"$logGroupName\`",
\`"log_stream_name\`": \`"$runnerNamePath\`",
\`"timezone\`": \`"UTC\`"
}
]
}
}
}
}" | Out-File -Encoding ASCII $Env:TEMP/log.conf
& "C:/Program Files/Amazon/AmazonCloudWatchAgent/amazon-cloudwatch-agent-ctl.ps1" -a fetch-config -m ec2 -s -c file:$Env:TEMP/log.conf
}
function action () {
cd /actions
$RunnerVersion = Get-Content /actions/RUNNER_VERSION -Raw
if ($RunnerVersion -eq "latest") { $RunnerFlags = "" } else { $RunnerFlags = "--disableupdate" }
./config.cmd --unattended --url "\${registrationUrl}" --token "\${runnerTokenPath}" --ephemeral --work _work --labels "\${labels},cdkghr:started:$(Get-Date -UFormat +%s)" $RunnerFlags --name "\${runnerNamePath}" \${runnerGroup1} \${runnerGroup2} \${defaultLabels} 2>&1 | Out-File -Encoding ASCII -Append /actions/runner.log
if ($LASTEXITCODE -ne 0) { return 1 }
./run.cmd 2>&1 | Out-File -Encoding ASCII -Append /actions/runner.log
if ($LASTEXITCODE -ne 0) { return 2 }
$STATUS = Select-String -Path './_diag/*.log' -Pattern 'finish job request for job [0-9a-f\\-]+ with result: (.*)' | %{$_.Matches.Groups[1].Value} | Select-Object -Last 1
if ($STATUS) {
echo "CDKGHA JOB DONE \${labels} $STATUS" | Out-File -Encoding ASCII -Append /actions/runner.log
}
return 0
}
setup_logs
$r = action
if ($r -eq 0) {
aws stepfunctions send-task-success --task-token "$TASK_TOKEN" --task-output '{ }'
} else {
aws stepfunctions send-task-failure --task-token "$TASK_TOKEN"
}
Start-Sleep -Seconds 10 # give cloudwatch agent its default 5 seconds buffer duration to upload logs
Stop-Computer -ComputerName localhost -Force
</powershell>
`.replace(/{/g, '\\{').replace(/}/g, '\\}').replace(/\\{\\}/g, '{}');
/**
* GitHub Actions runner provider using EC2 to execute jobs.
*
* This construct is not meant to be used by itself. It should be passed in the providers property for GitHubRunners.
*/
class Ec2RunnerProvider extends common_1.BaseProvider {
/**
* Create new image builder that builds EC2 specific runner images.
*
* You can customize the OS, architecture, VPC, subnet, security groups, etc. by passing in props.
*
* You can add components to the image builder by calling `imageBuilder.addComponent()`.
*
* The default OS is Ubuntu running on x64 architecture.
*
* Included components:
* * `RunnerImageComponent.requiredPackages()`
* * `RunnerImageComponent.cloudWatchAgent()`
* * `RunnerImageComponent.runnerUser()`
* * `RunnerImageComponent.git()`
* * `RunnerImageComponent.githubCli()`
* * `RunnerImageComponent.awsCli()`
* * `RunnerImageComponent.docker()`
* * `RunnerImageComponent.githubRunner()`
*/
static imageBuilder(scope, id, props) {
return image_builders_1.RunnerImageBuilder.new(scope, id, {
os: common_1.Os.LINUX_UBUNTU,
architecture: common_1.Architecture.X86_64,
builderType: image_builders_1.RunnerImageBuilderType.AWS_IMAGE_BUILDER,
components: [
image_builders_1.RunnerImageComponent.requiredPackages(),
image_builders_1.RunnerImageComponent.cloudWatchAgent(),
image_builders_1.RunnerImageComponent.runnerUser(),
image_builders_1.RunnerImageComponent.git(),
image_builders_1.RunnerImageComponent.githubCli(),
image_builders_1.RunnerImageComponent.awsCli(),
image_builders_1.RunnerImageComponent.docker(),
image_builders_1.RunnerImageComponent.githubRunner(props?.runnerVersion ?? common_1.RunnerVersion.latest()),
],
...props,
});
}
constructor(scope, id, props) {
super(scope, id, props);
this.retryableErrors = [
'Ec2.Ec2Exception',
'States.Timeout',
];
this.labels = props?.labels ?? ['ec2'];
this.group = props?.group;
this.vpc = props?.vpc ?? aws_cdk_lib_1.aws_ec2.Vpc.fromLookup(this, 'Default VPC', { isDefault: true });
this.securityGroups = props?.securityGroup ? [props.securityGroup] : (props?.securityGroups ?? [new aws_cdk_lib_1.aws_ec2.SecurityGroup(this, 'SG', { vpc: this.vpc })]);
this.subnets = props?.subnet ? [props.subnet] : this.vpc.selectSubnets(props?.subnetSelection).subnets;
this.instanceType = props?.instanceType ?? aws_cdk_lib_1.aws_ec2.InstanceType.of(aws_cdk_lib_1.aws_ec2.InstanceClass.M6I, aws_cdk_lib_1.aws_ec2.InstanceSize.LARGE);
this.storageSize = props?.storageSize ?? cdk.Size.gibibytes(30); // 30 is the minimum for Windows
this.storageOptions = props?.storageOptions;
this.spot = props?.spot ?? false;
this.spotMaxPrice = props?.spotMaxPrice;
this.defaultLabels = props?.defaultLabels ?? true;
this.amiBuilder = props?.imageBuilder ?? props?.amiBuilder ?? Ec2RunnerProvider.imageBuilder(this, 'Ami Builder', {
vpc: props?.vpc,
subnetSelection: props?.subnetSelection,
securityGroups: this.securityGroups,
});
this.ami = this.amiBuilder.bindAmi();
if (this.amiBuilder instanceof image_builders_1.AwsImageBuilderRunnerImageBuilder) {
if (this.amiBuilder.storageSize && this.storageSize.toBytes() < this.amiBuilder.storageSize.toBytes()) {
throw new Error(`Runner storage size (${this.storageSize.toGibibytes()} GiB) must be at least the same as the image builder storage size (${this.amiBuilder.storageSize.toGibibytes()} GiB)`);
}
}
if (!this.ami.architecture.instanceTypeMatch(this.instanceType)) {
throw new Error(`AMI architecture (${this.ami.architecture.name}) doesn't match runner instance type (${this.instanceType} / ${this.instanceType.architecture})`);
}
this.grantPrincipal = this.role = new aws_cdk_lib_1.aws_iam.Role(this, 'Role', {
assumedBy: new aws_cdk_lib_1.aws_iam.ServicePrincipal('ec2.amazonaws.com'),
});
this.grantPrincipal.addToPrincipalPolicy(new aws_cdk_lib_1.aws_iam.PolicyStatement({
actions: ['states:SendTaskFailure', 'states:SendTaskSuccess', 'states:SendTaskHeartbeat'],
resources: ['*'], // no support for stateMachine.stateMachineArn :(
conditions: {
StringEquals: {
'aws:ResourceTag/aws:cloudformation:stack-id': cdk.Stack.of(this).stackId,
},
},
}));
this.grantPrincipal.addToPrincipalPolicy(utils_1.MINIMAL_EC2_SSM_SESSION_MANAGER_POLICY_STATEMENT);
this.logGroup = new aws_cdk_lib_1.aws_logs.LogGroup(this, 'Logs', {
retention: props?.logRetention ?? aws_logs_1.RetentionDays.ONE_MONTH,
removalPolicy: aws_cdk_lib_1.RemovalPolicy.DESTROY,
});
this.logGroup.grantWrite(this);
}
/**
* Generate step function task(s) to start a new runner.
*
* Called by GithubRunners and shouldn't be called manually.
*
* @param parameters workflow job details
*/
getStepFunctionTask(parameters) {
// we need to build user data in two steps because passing the template as the first parameter to stepfunctions.JsonPath.format fails on syntax
const params = [
aws_cdk_lib_1.aws_stepfunctions.JsonPath.taskToken,
this.logGroup.logGroupName,
parameters.runnerNamePath,
parameters.githubDomainPath,
parameters.ownerPath,
parameters.repoPath,
parameters.runnerTokenPath,
parameters.labelsPath,
parameters.registrationUrl,
this.group ? '--runnergroup' : '',
// this is split into 2 for powershell otherwise it will pass "--runnergroup name" as a single argument and config.sh will fail
this.group ? this.group : '',
this.defaultLabels ? '' : '--no-default-labels',
];
const passUserData = new aws_cdk_lib_1.aws_stepfunctions.Pass(this, 'Data', {
stateName: (0, common_1.generateStateName)(this, 'data'),
parameters: {
userdataTemplate: this.ami.os.is(common_1.Os.WINDOWS) ? windowsUserDataTemplate : linuxUserDataTemplate,
},
resultPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.ec2'),
});
// we use ec2:RunInstances because we must
// we can't use fleets because they don't let us override user data, security groups or even disk size
// we can't use requestSpotInstances because it doesn't support launch templates, and it's deprecated
// ec2:RunInstances also seemed like the only one to immediately return an error when spot capacity is not available
// we build a complicated chain of states here because ec2:RunInstances can only try one subnet at a time
// if someone can figure out a good way to use Map for this, please open a PR
// build a state for each subnet we want to try
const instanceProfile = new aws_cdk_lib_1.aws_iam.CfnInstanceProfile(this, 'Instance Profile', {
roles: [this.role.roleName],
});
const rootDeviceResource = (0, common_1.amiRootDevice)(this, this.ami.launchTemplate.launchTemplateId);
rootDeviceResource.node.addDependency(this.amiBuilder);
const subnetRunners = this.subnets.map(subnet => {
return new aws_cdk_lib_1.aws_stepfunctions_tasks.CallAwsService(this, subnet.subnetId, {
stateName: (0, common_1.generateStateName)(this, subnet.subnetId),
comment: subnet.availabilityZone,
integrationPattern: aws_stepfunctions_1.IntegrationPattern.WAIT_FOR_TASK_TOKEN,
service: 'ec2',
action: 'runInstances',
heartbeatTimeout: aws_cdk_lib_1.aws_stepfunctions.Timeout.duration(aws_cdk_lib_1.Duration.minutes(10)),
parameters: {
LaunchTemplate: {
LaunchTemplateId: this.ami.launchTemplate.launchTemplateId,
},
MinCount: 1,
MaxCount: 1,
InstanceType: this.instanceType.toString(),
UserData: aws_cdk_lib_1.aws_stepfunctions.JsonPath.base64Encode(aws_cdk_lib_1.aws_stepfunctions.JsonPath.format(aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.ec2.userdataTemplate'), ...params)),
InstanceInitiatedShutdownBehavior: aws_cdk_lib_1.aws_ec2.InstanceInitiatedShutdownBehavior.TERMINATE,
IamInstanceProfile: {
Arn: instanceProfile.attrArn,
},
MetadataOptions: {
HttpTokens: 'required',
},
SecurityGroupIds: this.securityGroups.map(sg => sg.securityGroupId),
SubnetId: subnet.subnetId,
BlockDeviceMappings: [{
DeviceName: rootDeviceResource.ref,
Ebs: {
DeleteOnTermination: true,
VolumeSize: this.storageSize.toGibibytes(),
VolumeType: this.storageOptions?.volumeType,
Iops: this.storageOptions?.iops,
Throughput: this.storageOptions?.throughput,
},
}],
InstanceMarketOptions: this.spot ? {
MarketType: 'spot',
SpotOptions: {
MaxPrice: this.spotMaxPrice,
SpotInstanceType: 'one-time',
},
} : undefined,
TagSpecifications: [
{
ResourceType: 'instance',
Tags: [{
Key: 'GitHubRunners:Provider',
Value: this.node.path,
}],
},
{
ResourceType: 'volume',
Tags: [{
Key: 'GitHubRunners:Provider',
Value: this.node.path,
}],
},
],
},
iamResources: ['*'],
});
});
// start with the first subnet
passUserData.next(subnetRunners[0]);
// chain up the rest of the subnets
for (let i = 1; i < subnetRunners.length; i++) {
subnetRunners[i - 1].addCatch(subnetRunners[i], {
errors: ['Ec2.Ec2Exception', 'States.Timeout'],
resultPath: aws_cdk_lib_1.aws_stepfunctions.JsonPath.stringAt('$.lastSubnetError'),
});
}
return passUserData;
}
grantStateMachine(stateMachineRole) {
stateMachineRole.grantPrincipal.addToPrincipalPolicy(new aws_cdk_lib_1.aws_iam.PolicyStatement({
actions: ['iam:PassRole'],
resources: [this.role.roleArn],
conditions: {
StringEquals: {
'iam:PassedToService': 'ec2.amazonaws.com',
},
},
}));
stateMachineRole.grantPrincipal.addToPrincipalPolicy(new aws_cdk_lib_1.aws_iam.PolicyStatement({
actions: ['ec2:createTags'],
resources: [aws_cdk_lib_1.Stack.of(this).formatArn({
service: 'ec2',
resource: '*',
})],
}));
stateMachineRole.grantPrincipal.addToPrincipalPolicy(new aws_cdk_lib_1.aws_iam.PolicyStatement({
actions: ['iam:CreateServiceLinkedRole'],
resources: ['*'],
conditions: {
StringEquals: {
'iam:AWSServiceName': 'spot.amazonaws.com',
},
},
}));
}
status(statusFunctionRole) {
statusFunctionRole.grantPrincipal.addToPrincipalPolicy(new aws_cdk_lib_1.aws_iam.PolicyStatement({
actions: ['ec2:DescribeLaunchTemplateVersions'],
resources: ['*'],
}));
return {
type: this.constructor.name,
labels: this.labels,
constructPath: this.node.path,
securityGroups: this.securityGroups.map(sg => sg.securityGroupId),
roleArn: this.role.roleArn,
logGroup: this.logGroup.logGroupName,
ami: {
launchTemplate: this.ami.launchTemplate.launchTemplateId || 'unknown',
amiBuilderLogGroup: this.ami.logGroup?.logGroupName,
},
};
}
/**
* The network connections associated with this resource.
*/
get connections() {
return new aws_cdk_lib_1.aws_ec2.Connections({ securityGroups: this.securityGroups });
}
}
exports.Ec2RunnerProvider = Ec2RunnerProvider;
_a = JSII_RTTI_SYMBOL_1;
Ec2RunnerProvider[_a] = { fqn: "@cloudsnorkel/cdk-github-runners.Ec2RunnerProvider", version: "0.14.21" };
/**
* @deprecated use {@link Ec2RunnerProvider}
*/
class Ec2Runner extends Ec2RunnerProvider {
}
exports.Ec2Runner = Ec2Runner;
_b = JSII_RTTI_SYMBOL_1;
Ec2Runner[_b] = { fqn: "@cloudsnorkel/cdk-github-runners.Ec2Runner", version: "0.14.21" };
//# sourceMappingURL=data:application/json;base64,