fix(ops): recognize repo-scoped CI containers in load guard [skip ci]
This commit is contained in:
@@ -24,6 +24,9 @@ TEXTFILE_DIR = Path(os.environ.get("NODE_EXPORTER_TEXTFILE_DIR", "/var/lib/node_
|
||||
OUTPUT_NAME = "host_runaway_process.prom"
|
||||
HOST_LABEL = os.environ.get("AIOPS_HOST_LABEL", os.uname().nodename)
|
||||
LABEL_RE = re.compile(r'["\\\n]')
|
||||
GITEA_ACTION_CONTAINER_RE = re.compile(
|
||||
r"^(?:GITEA-ACTIONS-|[A-Za-z0-9][A-Za-z0-9_.-]*-(?:cd|code-review)-[0-9]+-)"
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -214,7 +217,7 @@ def active_gitea_action_containers(docker_file: Path | None = None) -> int:
|
||||
names = run_text(["docker", "ps", "--format", "{{.Names}}"], timeout=10).splitlines()
|
||||
except Exception:
|
||||
return -1
|
||||
return sum(1 for name in names if "GITEA-ACTIONS-TASK-" in name)
|
||||
return sum(1 for name in names if GITEA_ACTION_CONTAINER_RE.search(name))
|
||||
|
||||
|
||||
def load5_per_core() -> float:
|
||||
|
||||
@@ -11,7 +11,8 @@ set -euo pipefail
|
||||
# bash scripts/ops/stop-stale-gitea-actions-jobs.sh --apply
|
||||
#
|
||||
# Safety rules:
|
||||
# - Only touches Docker containers named GITEA-ACTIONS-*.
|
||||
# - Only touches Docker containers named GITEA-ACTIONS-* or repo-scoped
|
||||
# Gitea job containers such as awoooi-cd-5901-1-e2e-smoke.
|
||||
# - Defaults to containers older than 20 minutes.
|
||||
# - Known long-running workflows get a higher stop threshold than the alert threshold.
|
||||
# - Skips containers with recent log output unless --force is provided.
|
||||
@@ -24,17 +25,20 @@ threshold_for_name() {
|
||||
local name="$1"
|
||||
|
||||
case "$name" in
|
||||
*WORKFLOW-CD-Pipeline_JOB-deploy*)
|
||||
*WORKFLOW-CD-Pipeline_JOB-deploy*|*-cd-*-deploy*)
|
||||
# .gitea/workflows/cd.yaml deploy job timeout is 60m. Give act/Gitea
|
||||
# cleanup a buffer before treating the container as abandoned.
|
||||
echo 4500
|
||||
;;
|
||||
*WORKFLOW-CD-Pipeline_JOB-tests*|*WORKFLOW-CD-Pipeline_JOB-post-deploy-checks*)
|
||||
*WORKFLOW-CD-Pipeline_JOB-tests*|*WORKFLOW-CD-Pipeline_JOB-post-deploy-checks*|*-cd-*-tests*|*-cd-*-post-deploy*)
|
||||
echo 2400
|
||||
;;
|
||||
*WORKFLOW-Code-Review_JOB-ai-code-review*)
|
||||
*WORKFLOW-Code-Review_JOB-ai-code-review*|*-code-review-*)
|
||||
echo 720
|
||||
;;
|
||||
*-cd-*-e2e-smoke*|*-cd-*-source-link-smoke*)
|
||||
echo 900
|
||||
;;
|
||||
*WORKFLOW-Deploy-Alert-Rules_JOB-deploy-alerts*)
|
||||
echo 900
|
||||
;;
|
||||
@@ -97,7 +101,7 @@ while read -r name; do
|
||||
fi
|
||||
docker stop "$name"
|
||||
fi
|
||||
done < <(docker ps --format '{{.Names}}' | grep '^GITEA-ACTIONS-' || true)
|
||||
done < <(docker ps --format '{{.Names}}' | grep -E '^(GITEA-ACTIONS-|[A-Za-z0-9][A-Za-z0-9_.-]*-(cd|code-review)-[0-9]+-)' || true)
|
||||
|
||||
if [[ "$found" == "0" ]]; then
|
||||
echo "No stale Gitea Actions containers older than policy threshold (minimum ${MIN_AGE_SECONDS}s)."
|
||||
|
||||
@@ -93,6 +93,26 @@ def test_renders_ci_load_and_swap_without_authorizing_repair(tmp_path: Path) ->
|
||||
assert 'rule="stockplatform_headless_smoke"' in metrics
|
||||
|
||||
|
||||
def test_counts_modern_gitea_action_container_names(tmp_path: Path) -> None:
|
||||
exporter = load_exporter()
|
||||
docker_file = tmp_path / "docker.txt"
|
||||
docker_file.write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"GITEA-ACTIONS-TASK-123",
|
||||
"awoooi-cd-5901-1-e2e-smoke",
|
||||
"awoooi-cd-5873-1-source-link-smoke",
|
||||
"awoooi-code-review-3323-1-ai-code-review",
|
||||
"gitea",
|
||||
"stockplatform-v2-api-1",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
assert exporter.active_gitea_action_containers(docker_file) == 4
|
||||
|
||||
|
||||
def test_remediation_defaults_to_dry_run(tmp_path: Path) -> None:
|
||||
ps_file = tmp_path / "ps.txt"
|
||||
ps_file.write_text(
|
||||
|
||||
Reference in New Issue
Block a user