fix(runner): preserve guarded active cd drain lane
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 48s
CD Pipeline / build-and-deploy (push) Successful in 4m23s
CD Pipeline / post-deploy-checks (push) Successful in 2m2s
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 48s
CD Pipeline / build-and-deploy (push) Successful in 4m23s
CD Pipeline / post-deploy-checks (push) Successful in 2m2s
This commit is contained in:
@@ -111,6 +111,16 @@ as_root() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bounded_chattr_recursive_clear() {
|
||||||
|
local path="$1"
|
||||||
|
[ -e "$path" ] || return 0
|
||||||
|
if command -v timeout >/dev/null 2>&1; then
|
||||||
|
as_root timeout 5s chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
|
||||||
host_is_110() {
|
host_is_110() {
|
||||||
if command -v ip >/dev/null 2>&1; then
|
if command -v ip >/dev/null 2>&1; then
|
||||||
ip -o -4 addr show 2>/dev/null | awk '{print $4}' | grep -q '^192\.168\.0\.110/'
|
ip -o -4 addr show 2>/dev/null | awk '{print $4}' | grep -q '^192\.168\.0\.110/'
|
||||||
@@ -255,6 +265,22 @@ controlled_drain_service_inactive() {
|
|||||||
[ "$unitfile" != "enabled" ] || return 1
|
[ "$unitfile" != "enabled" ] || return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
controlled_drain_registration_present() {
|
||||||
|
[ -s "$CONTROLLED_DRAIN_REGISTRATION" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
controlled_drain_service_active_guarded() {
|
||||||
|
local load active unitfile mainpid
|
||||||
|
load="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p LoadState --value 2>/dev/null || true)"
|
||||||
|
active="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p ActiveState --value 2>/dev/null || true)"
|
||||||
|
unitfile="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p UnitFileState --value 2>/dev/null || true)"
|
||||||
|
mainpid="$(systemctl show "$CONTROLLED_DRAIN_UNIT" -p MainPID --value 2>/dev/null || true)"
|
||||||
|
[ "$load" = "loaded" ] || return 1
|
||||||
|
[ "$active" = "active" ] || return 1
|
||||||
|
[ "${mainpid:-0}" != "0" ] || return 1
|
||||||
|
[ "$unitfile" != "masked" ] || return 1
|
||||||
|
}
|
||||||
|
|
||||||
controlled_drain_staging_allowed() {
|
controlled_drain_staging_allowed() {
|
||||||
controlled_drain_config_safe \
|
controlled_drain_config_safe \
|
||||||
&& controlled_drain_binary_safe \
|
&& controlled_drain_binary_safe \
|
||||||
@@ -262,6 +288,22 @@ controlled_drain_staging_allowed() {
|
|||||||
&& controlled_drain_service_inactive
|
&& controlled_drain_service_inactive
|
||||||
}
|
}
|
||||||
|
|
||||||
|
controlled_drain_active_allowed() {
|
||||||
|
controlled_drain_config_safe \
|
||||||
|
&& controlled_drain_binary_safe \
|
||||||
|
&& controlled_drain_unit_safe \
|
||||||
|
&& controlled_drain_registration_present \
|
||||||
|
&& controlled_drain_service_active_guarded
|
||||||
|
}
|
||||||
|
|
||||||
|
controlled_drain_preserve_allowed() {
|
||||||
|
controlled_drain_staging_allowed || controlled_drain_active_allowed
|
||||||
|
}
|
||||||
|
|
||||||
|
lane_process_count_ok() {
|
||||||
|
[ "$(count_lane_processes)" = "0" ] || controlled_drain_active_allowed
|
||||||
|
}
|
||||||
|
|
||||||
list_action_runner_units() {
|
list_action_runner_units() {
|
||||||
{
|
{
|
||||||
systemctl list-unit-files 'actions.runner.*' --no-legend --plain 2>/dev/null | awk '{print $1}'
|
systemctl list-unit-files 'actions.runner.*' --no-legend --plain 2>/dev/null | awk '{print $1}'
|
||||||
@@ -272,9 +314,11 @@ list_action_runner_units() {
|
|||||||
stop_and_mask_units() {
|
stop_and_mask_units() {
|
||||||
local unit
|
local unit
|
||||||
for unit in "${RUNNER_UNITS[@]}"; do
|
for unit in "${RUNNER_UNITS[@]}"; do
|
||||||
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_staging_allowed; then
|
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_preserve_allowed; then
|
||||||
as_root systemctl reset-failed "$unit" >/dev/null 2>&1 || true
|
as_root systemctl reset-failed "$unit" >/dev/null 2>&1 || true
|
||||||
as_root systemctl disable "$unit" >/dev/null 2>&1 || true
|
if controlled_drain_staging_allowed; then
|
||||||
|
as_root systemctl disable "$unit" >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
as_root systemctl kill --signal=SIGKILL "$unit" >/dev/null 2>&1 || true
|
as_root systemctl kill --signal=SIGKILL "$unit" >/dev/null 2>&1 || true
|
||||||
@@ -301,7 +345,9 @@ stop_and_mask_action_runner_units() {
|
|||||||
|
|
||||||
kill_runner_processes() {
|
kill_runner_processes() {
|
||||||
pkill -KILL -f '^/home/wooo/awoooi-cd-lane/awoooi_cd_lane' >/dev/null 2>&1 || true
|
pkill -KILL -f '^/home/wooo/awoooi-cd-lane/awoooi_cd_lane' >/dev/null 2>&1 || true
|
||||||
pkill -KILL -f '^/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled' >/dev/null 2>&1 || true
|
if ! controlled_drain_active_allowed; then
|
||||||
|
pkill -KILL -f '^/home/wooo/awoooi-cd-lane-drain/awoooi_cd_lane_controlled' >/dev/null 2>&1 || true
|
||||||
|
fi
|
||||||
pkill -KILL -f '^/home/wooo/act-runner/act_runner' >/dev/null 2>&1 || true
|
pkill -KILL -f '^/home/wooo/act-runner/act_runner' >/dev/null 2>&1 || true
|
||||||
pkill -KILL -f '^/home/wooo/act-runner-controlled/act_runner' >/dev/null 2>&1 || true
|
pkill -KILL -f '^/home/wooo/act-runner-controlled/act_runner' >/dev/null 2>&1 || true
|
||||||
pkill -KILL -f '^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner' >/dev/null 2>&1 || true
|
pkill -KILL -f '^/home/wooo/awoooi-controlled-runner/awoooi_controlled_runner' >/dev/null 2>&1 || true
|
||||||
@@ -348,7 +394,7 @@ seal_lane_binary_restore_sources() {
|
|||||||
local path
|
local path
|
||||||
while IFS= read -r -d '' path; do
|
while IFS= read -r -d '' path; do
|
||||||
[ -e "$path" ] || continue
|
[ -e "$path" ] || continue
|
||||||
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_staging_allowed; then
|
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_preserve_allowed; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
write_failclosed_stub "$path"
|
write_failclosed_stub "$path"
|
||||||
@@ -367,7 +413,7 @@ quarantine_lane_registration_sources() {
|
|||||||
local target
|
local target
|
||||||
for lane_dir in "/home/wooo/awoooi-cd-lane" "/home/wooo/awoooi-cd-lane-drain"; do
|
for lane_dir in "/home/wooo/awoooi-cd-lane" "/home/wooo/awoooi-cd-lane-drain"; do
|
||||||
[ -d "$lane_dir" ] || continue
|
[ -d "$lane_dir" ] || continue
|
||||||
if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_staging_allowed; then
|
if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_preserve_allowed; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
quarantine_dir="$lane_dir/quarantine-failclosed-${STAMP}"
|
quarantine_dir="$lane_dir/quarantine-failclosed-${STAMP}"
|
||||||
@@ -393,7 +439,7 @@ quarantine_lane_registration_sources() {
|
|||||||
seal_live_binary_paths() {
|
seal_live_binary_paths() {
|
||||||
local path
|
local path
|
||||||
for path in "${LIVE_BINARY_PATHS[@]}"; do
|
for path in "${LIVE_BINARY_PATHS[@]}"; do
|
||||||
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_staging_allowed; then
|
if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_preserve_allowed; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
write_failclosed_stub "$path"
|
write_failclosed_stub "$path"
|
||||||
@@ -631,7 +677,7 @@ seal_enforcer_disabler_artifacts() {
|
|||||||
while IFS= read -r -d '' path; do
|
while IFS= read -r -d '' path; do
|
||||||
[ -e "$path" ] || [ -L "$path" ] || continue
|
[ -e "$path" ] || [ -L "$path" ] || continue
|
||||||
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
||||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
bounded_chattr_recursive_clear "$path"
|
||||||
as_root mv "$path" "$target_root/$(basename "$path").sealed" >/dev/null 2>&1 || true
|
as_root mv "$path" "$target_root/$(basename "$path").sealed" >/dev/null 2>&1 || true
|
||||||
done < <(
|
done < <(
|
||||||
as_root find /etc/systemd/system -maxdepth 1 -type d \( \
|
as_root find /etc/systemd/system -maxdepth 1 -type d \( \
|
||||||
@@ -740,7 +786,7 @@ seal_root_restore_sources() {
|
|||||||
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
||||||
moved=1
|
moved=1
|
||||||
fi
|
fi
|
||||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
bounded_chattr_recursive_clear "$path"
|
||||||
as_root mv "$path" "$target_root/" >/dev/null 2>&1 || true
|
as_root mv "$path" "$target_root/" >/dev/null 2>&1 || true
|
||||||
done < <(
|
done < <(
|
||||||
as_root find /root -maxdepth 1 -type d \( \
|
as_root find /root -maxdepth 1 -type d \( \
|
||||||
@@ -772,7 +818,7 @@ EOF
|
|||||||
while IFS= read -r -d '' path; do
|
while IFS= read -r -d '' path; do
|
||||||
[ -d "$path" ] || continue
|
[ -d "$path" ] || continue
|
||||||
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
as_root mkdir -p "$target_root" >/dev/null 2>&1 || true
|
||||||
as_root chattr -R -i "$path" >/dev/null 2>&1 || true
|
bounded_chattr_recursive_clear "$path"
|
||||||
as_root mv "$path" "$target_root/$(basename "$path").sealed" >/dev/null 2>&1 || true
|
as_root mv "$path" "$target_root/$(basename "$path").sealed" >/dev/null 2>&1 || true
|
||||||
as_root mkdir -p "$path" >/dev/null 2>&1 || true
|
as_root mkdir -p "$path" >/dev/null 2>&1 || true
|
||||||
if [ -f "$current" ]; then
|
if [ -f "$current" ]; then
|
||||||
@@ -805,7 +851,7 @@ mask_unit_file_to_devnull() {
|
|||||||
|
|
||||||
seal_lane_unit_files() {
|
seal_lane_unit_files() {
|
||||||
mask_unit_file_to_devnull "awoooi-cd-lane.service"
|
mask_unit_file_to_devnull "awoooi-cd-lane.service"
|
||||||
if controlled_drain_staging_allowed; then
|
if controlled_drain_preserve_allowed; then
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
mask_unit_file_to_devnull "$CONTROLLED_DRAIN_UNIT"
|
mask_unit_file_to_devnull "$CONTROLLED_DRAIN_UNIT"
|
||||||
@@ -822,7 +868,7 @@ root_restore_sources_left() {
|
|||||||
unit_ok() {
|
unit_ok() {
|
||||||
local unit="$1"
|
local unit="$1"
|
||||||
local load active unitfile mainpid
|
local load active unitfile mainpid
|
||||||
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_staging_allowed; then
|
if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_preserve_allowed; then
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
load="$(systemctl show "$unit" -p LoadState --value 2>/dev/null || true)"
|
load="$(systemctl show "$unit" -p LoadState --value 2>/dev/null || true)"
|
||||||
@@ -877,6 +923,9 @@ awoooi_runner_failclosed_enforcer_apply_performed $APPLY_PERFORMED
|
|||||||
# HELP awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed Controlled drain lane non-secret guardrail staging allowance.
|
# HELP awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed Controlled drain lane non-secret guardrail staging allowance.
|
||||||
# TYPE awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed gauge
|
# TYPE awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed gauge
|
||||||
awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed $(controlled_drain_staging_allowed && echo 1 || echo 0)
|
awoooi_runner_failclosed_enforcer_controlled_drain_staging_allowed $(controlled_drain_staging_allowed && echo 1 || echo 0)
|
||||||
|
# HELP awoooi_runner_failclosed_enforcer_controlled_drain_active_allowed Controlled drain lane guarded active allowance.
|
||||||
|
# TYPE awoooi_runner_failclosed_enforcer_controlled_drain_active_allowed gauge
|
||||||
|
awoooi_runner_failclosed_enforcer_controlled_drain_active_allowed $(controlled_drain_active_allowed && echo 1 || echo 0)
|
||||||
EOF
|
EOF
|
||||||
as_root install -o root -g root -m 0644 "$tmp" "$dir/awoooi_runner_failclosed_enforcer.prom" >/dev/null 2>&1 || true
|
as_root install -o root -g root -m 0644 "$tmp" "$dir/awoooi_runner_failclosed_enforcer.prom" >/dev/null 2>&1 || true
|
||||||
rm -f "$tmp"
|
rm -f "$tmp"
|
||||||
@@ -892,6 +941,8 @@ print_readback() {
|
|||||||
echo "RUNNER_PROCESS_COUNT=$(count_runner_processes)"
|
echo "RUNNER_PROCESS_COUNT=$(count_runner_processes)"
|
||||||
echo "ROOT_RESTORE_SOURCES_LEFT=$(root_restore_sources_left)"
|
echo "ROOT_RESTORE_SOURCES_LEFT=$(root_restore_sources_left)"
|
||||||
echo "CONTROLLED_DRAIN_STAGING_ALLOWED=$(controlled_drain_staging_allowed && echo 1 || echo 0)"
|
echo "CONTROLLED_DRAIN_STAGING_ALLOWED=$(controlled_drain_staging_allowed && echo 1 || echo 0)"
|
||||||
|
echo "CONTROLLED_DRAIN_ACTIVE_ALLOWED=$(controlled_drain_active_allowed && echo 1 || echo 0)"
|
||||||
|
echo "CONTROLLED_DRAIN_PRESERVE_ALLOWED=$(controlled_drain_preserve_allowed && echo 1 || echo 0)"
|
||||||
echo "RUNNER_UNITS_BAD_COUNT=$(runner_units_bad_count)"
|
echo "RUNNER_UNITS_BAD_COUNT=$(runner_units_bad_count)"
|
||||||
for unit in "${RUNNER_UNITS[@]}"; do
|
for unit in "${RUNNER_UNITS[@]}"; do
|
||||||
load="$(systemctl show "$unit" -p LoadState --value 2>/dev/null || true)"
|
load="$(systemctl show "$unit" -p LoadState --value 2>/dev/null || true)"
|
||||||
@@ -952,7 +1003,7 @@ write_metrics "/home/wooo/node_exporter_textfiles"
|
|||||||
print_readback
|
print_readback
|
||||||
|
|
||||||
if [ "$(count_active_job_containers)" = "0" ] \
|
if [ "$(count_active_job_containers)" = "0" ] \
|
||||||
&& [ "$(count_lane_processes)" = "0" ] \
|
&& lane_process_count_ok \
|
||||||
&& [ "$(count_runner_processes)" = "0" ] \
|
&& [ "$(count_runner_processes)" = "0" ] \
|
||||||
&& [ "$(root_restore_sources_left)" = "0" ] \
|
&& [ "$(root_restore_sources_left)" = "0" ] \
|
||||||
&& [ "$(runner_units_bad_count)" = "0" ]; then
|
&& [ "$(runner_units_bad_count)" = "0" ]; then
|
||||||
|
|||||||
@@ -226,14 +226,21 @@ def test_runner_failclosed_enforcer_does_not_seal_live_startup_recovery_script()
|
|||||||
assert "awoooi-startup-110.sh.*controlled*" in text
|
assert "awoooi-startup-110.sh.*controlled*" in text
|
||||||
|
|
||||||
|
|
||||||
def test_runner_failclosed_enforcer_preserves_controlled_drain_staging_only() -> None:
|
def test_runner_failclosed_enforcer_preserves_controlled_drain_staging_or_guarded_active() -> None:
|
||||||
text = FAILCLOSED_ENFORCER.read_text(encoding="utf-8")
|
text = FAILCLOSED_ENFORCER.read_text(encoding="utf-8")
|
||||||
|
|
||||||
assert "controlled_drain_staging_allowed()" in text
|
assert "controlled_drain_staging_allowed()" in text
|
||||||
|
assert "controlled_drain_active_allowed()" in text
|
||||||
|
assert "controlled_drain_preserve_allowed()" in text
|
||||||
assert "controlled_drain_config_safe" in text
|
assert "controlled_drain_config_safe" in text
|
||||||
assert "controlled_drain_binary_safe" in text
|
assert "controlled_drain_binary_safe" in text
|
||||||
assert "controlled_drain_unit_safe" in text
|
assert "controlled_drain_unit_safe" in text
|
||||||
assert "controlled_drain_service_inactive" in text
|
assert "controlled_drain_service_inactive" in text
|
||||||
|
assert "controlled_drain_registration_present" in text
|
||||||
|
assert "controlled_drain_service_active_guarded" in text
|
||||||
|
assert 'lane_process_count_ok \\' in text
|
||||||
|
assert "bounded_chattr_recursive_clear()" in text
|
||||||
|
assert "as_root timeout 5s chattr -R -i" in text
|
||||||
assert "awoooi-host:host" in text
|
assert "awoooi-host:host" in text
|
||||||
assert (
|
assert (
|
||||||
"awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04"
|
"awoooi-ubuntu:docker://192.168.0.110:5000/awoooi/ci-runner:act-22.04"
|
||||||
@@ -245,10 +252,12 @@ def test_runner_failclosed_enforcer_preserves_controlled_drain_staging_only() ->
|
|||||||
assert 'grep -Eq \'^[[:space:]]*MemoryAccounting=true\'' in text
|
assert 'grep -Eq \'^[[:space:]]*MemoryAccounting=true\'' in text
|
||||||
assert 'grep -Eq \'^[[:space:]]*TasksAccounting=true\'' in text
|
assert 'grep -Eq \'^[[:space:]]*TasksAccounting=true\'' in text
|
||||||
assert '[ "$unitfile" != "enabled" ] || return 1' in text
|
assert '[ "$unitfile" != "enabled" ] || return 1' in text
|
||||||
assert 'if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_staging_allowed; then' in text
|
assert 'if [ "$unit" = "$CONTROLLED_DRAIN_UNIT" ] && controlled_drain_preserve_allowed; then' in text
|
||||||
assert 'if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_staging_allowed; then' in text
|
assert 'if [ "$path" = "$CONTROLLED_DRAIN_BINARY" ] && controlled_drain_preserve_allowed; then' in text
|
||||||
assert 'if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_staging_allowed; then' in text
|
assert 'if [ "$lane_dir" = "$CONTROLLED_DRAIN_DIR" ] && controlled_drain_preserve_allowed; then' in text
|
||||||
assert "CONTROLLED_DRAIN_STAGING_ALLOWED=" in text
|
assert "CONTROLLED_DRAIN_STAGING_ALLOWED=" in text
|
||||||
|
assert "CONTROLLED_DRAIN_ACTIVE_ALLOWED=" in text
|
||||||
|
assert "CONTROLLED_DRAIN_PRESERVE_ALLOWED=" in text
|
||||||
|
|
||||||
|
|
||||||
def test_controlled_cd_lane_unit_source_has_required_accounting_guardrails() -> None:
|
def test_controlled_cd_lane_unit_source_has_required_accounting_guardrails() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user