fix(runner): preserve guarded active cd drain lane
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 48s
CD Pipeline / build-and-deploy (push) Successful in 4m23s
CD Pipeline / post-deploy-checks (push) Successful in 2m2s
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 48s
CD Pipeline / build-and-deploy (push) Successful in 4m23s
CD Pipeline / post-deploy-checks (push) Successful in 2m2s
This commit is contained in:
@@ -111,6 +111,16 @@ as_root() {
|
||||
fi
|
||||
}
|
||||
|
||||
bounded_chattr_recursive_clear() {
|
||||
local path="$1"
|
||||
[ -e "$path" ] || return 0
|
||||
if command -v timeout >/dev/null 2>&1; then
|
||||
as_root timeout 5s chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||
return 0
|
||||
fi
|
||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
host_is_110() {
|
||||
if command -v ip >/dev/null 2>&1; then
|
||||
ip -o -4 addr show 2>/dev/null | awk '{print $4}' | grep -q '^192\.168\.0\.110/'
|
||||
@@ -255,6 +265,22 @@ controlled_drain_service_inactive() {
|
||||
[ "$unitfile" != "enabled" ] || return 1
|
||||
}
|
||||
|
||||
controlled_drain_registration_present() {
|
||||
[ -s "$CONTROLLED_DRAIN_REGISTRATION" ]
|
||||
}
|
||||
|
||||
controlled_drain_service_active_guarded() {
|
||||
local load active unitfile mainpid
|
||||
load="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p LoadState --value 2>/dev/null || true)"
|
||||
active="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p ActiveState --value 2>/dev/null || true)"
|
||||
unitfile="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p UnitFileState --value 2>/dev/null || true)"
|
||||
mainpid="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p MainPID --value 2>/dev/null || true)"
|
||||
[ "$load" = "loaded" ] || return 1
|
||||
[ "$active" = "active" ] || return 1
|
||||
[ "${mainpid:-0}" != "0" ] || return 1
|
||||
[ "$unitfile" != "masked" ] || return 1
|
||||
}
|
||||
|
||||
controlled_drain_staging_allowed() {
|
||||
controlled_drain_config_safe \
|
||||
&& controlled_drain_binary_safe \
|
||||
@@ -262,6 +288,22 @@ controlled_drain_staging_allowed() {
|
||||
&& controlled_drain_service_inactive
|
||||
}
|
||||
|
||||
controlled_drain_active_allowed() {
|
||||
controlled_drain_config_safe \
|
||||
&& controlled_drain_binary_safe \
|
||||
&& controlled_drain_unit_safe \
|
||||
&& controlled_drain_registration_present \
|
||||
&& controlled_drain_service_active_guarded
|
||||
}
|
||||
|
||||
controlled_drain_preserve_allowed() {
|
||||
controlled_drain_staging_allowed || controlled_drain_active_allowed
|
||||
}
|
||||
|
||||
lane_process_count_ok() {
|
||||
[ "$(count_lane_processes)" = "0" ] || controlled_drain_active_allowed
|
||||
}
|
||||
|
||||
list_action_runner_units() {
|
||||
{
|
||||
systemctl list-unit-files 'actions.runner.*' --no-legend --plain 2>/dev/null | awk '{print $1}'
|
||||
@@ -272,9 +314,11 @@ list_action_runner_units() {
|
||||
stop_and_mask_units() {
|
||||
local unit
|
||||
for unit in "${RUNNER_UNITS[@]}"; do
|
||||
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_staging_allowed; then
|
||||
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_preserve_allowed; then
|
||||
as_root systemctl reset-failed "$unit" >/dev/null 2>&1 || true
|
||||
as_root systemctl disable "$unit" >/dev/null 2>&1 || true
|
||||
if controlled_drain_staging_allowed; then
|
||||
as_root systemctl disable "$unit" >/dev/null 2>&1 || true
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
as_root systemctl kill --signal=SIGKILL "$unit" >/dev/null 2>&1 || true
|
||||
@@ -301,7 +345,9 @@ stop_and_mask_action_runner_units() {
|
||||
|
||||
kill_runner_processes() {
|
||||
pkill -KILL -f '^/home/wooo/awoooi-cd-lane/awoooi_cd_lane' >/dev/null 2>&1 || true
|
||||
pkill -KILL -f '^/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled' >/dev/null 2>&1 || true
|
||||
if ! controlled_drain_active_allowed; then
|
||||
pkill -KILL -f '^/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled' >/dev/null 2>&1 || true
|
||||
fi
|
||||
pkill -KILL -f '^/home/wooo/act-runner/act_runner' >/dev/null 2>&1 || true
|
||||
pkill -KILL -f '^/home/wooo/act-runner-controlled/act_runner' >/dev/null 2>&1 || true
|
||||
pkill -KILL -f '^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner' >/dev/null 2>&1 || true
|
||||
@@ -348,7 +394,7 @@ seal_lane_binary_restore_sources() {
|
||||
local path
|
||||
while IFS= read -r -d '' path; do
|
||||
[ -e "$path" ] || continue
|
||||
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_staging_allowed; then
|
||||
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_preserve_allowed; then
|
||||
continue
|
||||
fi
|
||||
write_failclosed_stub "$path"
|
||||
@@ -367,7 +413,7 @@ quarantine_lane_registration_sources() {
|
||||
local target
|
||||
for lane_dir in "/home/wooo/awoooi-cd-lane" "/home/wooo/awoooi-cd-lane-drain"; do
|
||||
[ -d "$lane_dir" ] || continue
|
||||
if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_staging_allowed; then
|
||||
if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_preserve_allowed; then
|
||||
continue
|
||||
fi
|
||||
quarantine_dir="$lane_dir/quarantine-failclosed-${STAMP}"
|
||||
@@ -393,7 +439,7 @@ quarantine_lane_registration_sources() {
|
||||
seal_live_binary_paths() {
|
||||
local path
|
||||
for path in "${LIVE_BINARY_PATHS[@]}"; do
|
||||
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_staging_allowed; then
|
||||
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_preserve_allowed; then
|
||||
continue
|
||||
fi
|
||||
write_failclosed_stub "$path"
|
||||
@@ -631,7 +677,7 @@ seal_enforcer_disabler_artifacts() {
|
||||
while IFS= read -r -d '' path; do
|
||||
[ -e "$path" ] || [ -L "$path" ] || continue
|
||||
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||
bounded_chattr_recursive_clear "$path"
|
||||
as_root mv "$path" "$target_root/$(basename "$path").sealed" >/dev/null 2>&1 || true
|
||||
done < <(
|
||||
as_root find /etc/systemd/system -maxdepth 1 -type d \( \
|
||||
@@ -740,7 +786,7 @@ seal_root_restore_sources() {
|
||||
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
||||
moved=1
|
||||
fi
|
||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||
bounded_chattr_recursive_clear "$path"
|
||||
as_root mv "$path" "$target_root/" >/dev/null 2>&1 || true
|
||||
done < <(
|
||||
as_root find /root -maxdepth 1 -type d \( \
|
||||
@@ -772,7 +818,7 @@ EOF
|
||||
while IFS= read -r -d '' path; do
|
||||
[ -d "$path" ] || continue
|
||||
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||
bounded_chattr_recursive_clear "$path"
|
||||
as_root mv "$path" "$target_root/$(basename "$path").sealed" >/dev/null 2>&1 || true
|
||||
as_root mkdir -p "$path" >/dev/null 2>&1 || true
|
||||
if [ -f "$current" ]; then
|
||||
@@ -805,7 +851,7 @@ mask_unit_file_to_devnull() {
|
||||
|
||||
seal_lane_unit_files() {
|
||||
mask_unit_file_to_devnull "awoooi-cd-lane.service"
|
||||
if controlled_drain_staging_allowed; then
|
||||
if controlled_drain_preserve_allowed; then
|
||||
return 0
|
||||
fi
|
||||
mask_unit_file_to_devnull "$CONTROLLED_DRAIN_UNIT"
|
||||
@@ -822,7 +868,7 @@ root_restore_sources_left() {
|
||||
unit_ok() {
|
||||
local unit="$1"
|
||||
local load active unitfile mainpid
|
||||
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_staging_allowed; then
|
||||
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_preserve_allowed; then
|
||||
return 0
|
||||
fi
|
||||
load="$(systemctl show "$unit" -p LoadState --value 2>/dev/null || true)"
|
||||
@@ -877,6 +923,9 @@ awoooi_runner_failclosed_enforcer_apply_performed $APPLY_PERFORMED
|
||||
# HELP awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed Controlled drain lane non-secret guardrail staging allowance.
|
||||
# TYPE awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed gauge
|
||||
awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed $(controlled_drain_staging_allowed && echo 1 || echo 0)
|
||||
# HELP awoooi_runner_failclosed_enforcer_controlled_drain_active_allowed Controlled drain lane guarded active allowance.
|
||||
# TYPE awoooi_runner_failclosed_enforcer_controlled_drain_active_allowed gauge
|
||||
awoooi_runner_failclosed_enforcer_controlled_drain_active_allowed $(controlled_drain_active_allowed && echo 1 || echo 0)
|
||||
EOF
|
||||
as_root install -o root -g root -m 0644 "$tmp" "$dir/awoooi_runner_failclosed_enforcer.prom" >/dev/null 2>&1 || true
|
||||
rm -f "$tmp"
|
||||
@@ -892,6 +941,8 @@ print_readback() {
|
||||
echo "RUNNER_PROCESS_COUNT=$(count_runner_processes)"
|
||||
echo "ROOT_RESTORE_SOURCES_LEFT=$(root_restore_sources_left)"
|
||||
echo "CONTROLLED_DRAIN_STAGING_ALLOWED=$(controlled_drain_staging_allowed && echo 1 || echo 0)"
|
||||
echo "CONTROLLED_DRAIN_ACTIVE_ALLOWED=$(controlled_drain_active_allowed && echo 1 || echo 0)"
|
||||
echo "CONTROLLED_DRAIN_PRESERVE_ALLOWED=$(controlled_drain_preserve_allowed && echo 1 || echo 0)"
|
||||
echo "RUNNER_UNITS_BAD_COUNT=$(runner_units_bad_count)"
|
||||
for unit in "${RUNNER_UNITS[@]}"; do
|
||||
load="$(systemctl show "$unit" -p LoadState --value 2>/dev/null || true)"
|
||||
@@ -952,7 +1003,7 @@ write_metrics "/home/wooo/node_exporter_textfiles"
|
||||
print_readback
|
||||
|
||||
if [ "$(count_active_job_containers)" = "0" ] \
|
||||
&& [ "$(count_lane_processes)" = "0" ] \
|
||||
&& lane_process_count_ok \
|
||||
&& [ "$(count_runner_processes)" = "0" ] \
|
||||
&& [ "$(root_restore_sources_left)" = "0" ] \
|
||||
&& [ "$(runner_units_bad_count)" = "0" ]; then
|
||||
|
||||
@@ -226,14 +226,21 @@ def test_runner_failclosed_enforcer_does_not_seal_live_startup_recovery_script()
|
||||
assert "awoooi-startup-110.sh.*controlled*" in text
|
||||
|
||||
|
||||
def test_runner_failclosed_enforcer_preserves_controlled_drain_staging_only() -> None:
|
||||
def test_runner_failclosed_enforcer_preserves_controlled_drain_staging_or_guarded_active() -> None:
|
||||
text = FAILCLOSED_ENFORCER.read_text(encoding="utf-8")
|
||||
|
||||
assert "controlled_drain_staging_allowed()" in text
|
||||
assert "controlled_drain_active_allowed()" in text
|
||||
assert "controlled_drain_preserve_allowed()" in text
|
||||
assert "controlled_drain_config_safe" in text
|
||||
assert "controlled_drain_binary_safe" in text
|
||||
assert "controlled_drain_unit_safe" in text
|
||||
assert "controlled_drain_service_inactive" in text
|
||||
assert "controlled_drain_registration_present" in text
|
||||
assert "controlled_drain_service_active_guarded" in text
|
||||
assert 'lane_process_count_ok \\' in text
|
||||
assert "bounded_chattr_recursive_clear()" in text
|
||||
assert "as_root timeout 5s chattr -R -i" in text
|
||||
assert "awoooi-host:host" in text
|
||||
assert (
|
||||
"awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04"
|
||||
@@ -245,10 +252,12 @@ def test_runner_failclosed_enforcer_preserves_controlled_drain_staging_only() ->
|
||||
assert 'grep -Eq \'^[[:space:]]*MemoryAccounting=true\'' in text
|
||||
assert 'grep -Eq \'^[[:space:]]*TasksAccounting=true\'' in text
|
||||
assert '[ "$unitfile" != "enabled" ] || return 1' in text
|
||||
assert 'if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_staging_allowed; then' in text
|
||||
assert 'if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_staging_allowed; then' in text
|
||||
assert 'if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_staging_allowed; then' in text
|
||||
assert 'if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_preserve_allowed; then' in text
|
||||
assert 'if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_preserve_allowed; then' in text
|
||||
assert 'if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_preserve_allowed; then' in text
|
||||
assert "CONTROLLED_DRAIN_STAGING_ALLOWED=" in text
|
||||
assert "CONTROLLED_DRAIN_ACTIVE_ALLOWED=" in text
|
||||
assert "CONTROLLED_DRAIN_PRESERVE_ALLOWED=" in text
|
||||
|
||||
|
||||
def test_controlled_cd_lane_unit_source_has_required_accounting_guardrails() -> None:
|
||||
|
||||
Reference in New Issue
Block a user