Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
432 changes: 432 additions & 0 deletions .claude/plans/actions-runner-scale-in-race.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
puppet-code (0.1.0-1build301) noble; urgency=medium

* commit event. see changes history in git log

-- root <packager@infrahouse.com> Sat, 18 Apr 2026 19:07:51 +0000

puppet-code (0.1.0-1build300) noble; urgency=medium

* commit event. see changes history in git log
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
# No-op unless the instance is in Terminating:Wait. Fire-and-forget.
set -eu

hook_name="${DEREGISTRATION_HOOK_NAME:-}"
[[ -z "$hook_name" ]] && exit 0

instance_id=$(ec2metadata --instance-id)
state=$(aws autoscaling describe-auto-scaling-instances \
--instance-ids "$instance_id" \
--query 'AutoScalingInstances[0].LifecycleState' --output text 2>/dev/null || echo "")

if [[ "$state" == "Terminating:Wait" ]]; then
asg=$(ih-ec2 tags | jq -r '."aws:autoscaling:groupName"')
aws autoscaling record-lifecycle-action-heartbeat \
--auto-scaling-group-name "$asg" \
--lifecycle-hook-name "$hook_name" \
--instance-id "$instance_id"
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[Unit]
Description=Every 10 minutes, heartbeat the deregistration lifecycle hook if needed

[Timer]
OnBootSec=2min
OnUnitActiveSec=10min
Unit=gha-lifecycle-heartbeater.service

[Install]
WantedBy=timers.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
# Called by systemd's ExecStopPost when actions-runner.service stops.
# If the ASG wants this instance terminated, complete the deregistration
# lifecycle hook now so the instance can go away cleanly.
set -eu

hook_name="${DEREGISTRATION_HOOK_NAME:-}"
[[ -z "$hook_name" ]] && exit 0

instance_id=$(ec2metadata --instance-id)
state=$(aws autoscaling describe-auto-scaling-instances \
--instance-ids "$instance_id" \
--query 'AutoScalingInstances[0].LifecycleState' --output text 2>/dev/null || echo "")

case "$state" in
Terminating:Wait|Terminating:Proceed)
/usr/local/bin/ih-aws autoscaling complete --result CONTINUE "$hook_name"
;;
esac
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,22 @@
set -eu

sudo chown -R "$USER" "$GITHUB_WORKSPACE"
/usr/local/bin/ih-aws autoscaling scale-in enable-protection

# Try to protect this instance from scale-in. If the ASG has already
# decided to terminate us, protection is meaningless; let the job run
# and let the deprovisioning path finish us off cleanly.
if ! /usr/local/bin/ih-aws autoscaling scale-in enable-protection 2>/tmp/prerun_err; then
instance_id=$(ec2metadata --instance-id)
state=$(aws autoscaling describe-auto-scaling-instances \
--instance-ids "$instance_id" \
--query 'AutoScalingInstances[0].LifecycleState' --output text 2>/dev/null || echo "")
case "$state" in
Terminating:Wait|Terminating:Proceed)
echo "prerun: instance is in $state — skipping protect, job will proceed" >&2
;;
*)
cat /tmp/prerun_err >&2
exit 1
;;
esac
fi
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
creates => "${runner_package_directory}/.credentials",
require => [
Exec[extract_runner_package]
]
],
notify => Exec['delete_registration_token'],
}

exec { 'delete_registration_token':
user => $user,
path => '/usr/bin:/usr/local/bin',
command => "aws secretsmanager delete-secret --secret-id ${token_secret} --force-delete-without-recovery",
refreshonly => true,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
$env_file = "${runner_package_directory}/.env"
$prerun_path = '/usr/local/bin/gha_prerun.sh'
$postrun_path = '/usr/local/bin/gha_postrun.sh'
$on_exit_path = '/usr/local/bin/gha-on-runner-exit.sh'
$heartbeater_script = '/usr/local/bin/gha-lifecycle-heartbeater.sh'
$heartbeater_service = '/etc/systemd/system/gha-lifecycle-heartbeater.service'
$heartbeater_timer = '/etc/systemd/system/gha-lifecycle-heartbeater.timer'
$deregistration_hookname = pick_default($facts['deregistration_hookname'], '')

file { $env_file:
ensure => file,
Expand Down Expand Up @@ -56,6 +61,22 @@
mode => '0755',
}

file { $on_exit_path:
ensure => file,
source => 'puppet:///modules/profile/github_runner/gha-on-runner-exit.sh',
owner => 'root',
group => 'root',
mode => '0755',
}

file { $heartbeater_script:
ensure => file,
source => 'puppet:///modules/profile/github_runner/gha-lifecycle-heartbeater.sh',
owner => 'root',
group => 'root',
mode => '0755',
}

file { $start_script:
ensure => file,
content => template('profile/github_runner/start-actions-runner.sh.erb'),
Expand All @@ -73,6 +94,24 @@
notify => Exec['daemon-reload'],
}

file { $heartbeater_service:
ensure => file,
content => template('profile/github_runner/gha-lifecycle-heartbeater.service.erb'),
owner => 'root',
group => 'root',
mode => '0644',
notify => Exec['daemon-reload'],
}

file { $heartbeater_timer:
ensure => file,
source => 'puppet:///modules/profile/github_runner/gha-lifecycle-heartbeater.timer',
owner => 'root',
group => 'root',
mode => '0644',
notify => Exec['daemon-reload'],
}

exec { 'daemon-reload':
command => '/usr/bin/systemctl daemon-reload',
refreshonly => true,
Expand All @@ -88,6 +127,17 @@
]
}

service { 'gha-lifecycle-heartbeater.timer':
ensure => running,
enable => true,
require => [
File[$heartbeater_script],
File[$heartbeater_service],
File[$heartbeater_timer],
Exec['daemon-reload'],
]
}

cron { 'check-health':
command => [
'ih-github',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
[Unit]
Description=GitHub self-hosted runner
After=network.target
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
ExecStart=<%= @start_script %>
ExecStopPost=/usr/local/bin/gha-on-runner-exit.sh
Environment=DEREGISTRATION_HOOK_NAME=<%= @deregistration_hookname %>
WorkingDirectory=<%= @runner_package_directory %>
User=<%= @github_runner_user %>
Group=<%= @github_runner_group %>
KillMode=process
KillSignal=SIGTERM
TimeoutStopSec=21600
Restart=on-failure

[Install]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Unit]
Description=Extend deregistration lifecycle hook while this instance is terminating

[Service]
Type=oneshot
ExecStart=/usr/local/bin/gha-lifecycle-heartbeater.sh
Environment=DEREGISTRATION_HOOK_NAME=<%= @deregistration_hookname %>
Restart=on-failure
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

set -eu

instance_id=$(ec2metadata --instance-id)

while true
do
state="$(aws autoscaling describe-auto-scaling-instances --instance-ids "$(ec2metadata --instance-id)" | jq -r .AutoScalingInstances[0].LifecycleState)"
state=$(aws autoscaling describe-auto-scaling-instances \
--instance-ids "$instance_id" \
--query 'AutoScalingInstances[0].LifecycleState' --output text)
if [[ "$state" == "InService" ]]; then
break
else
Expand All @@ -13,5 +17,4 @@ do
fi
done

# Start actions-runner
<%= @runner_package_directory %>/run.sh
exec <%= @runner_package_directory %>/bin/runsvc.sh
Loading