diff --git a/scripts/ops/host-runaway-process-exporter.py b/scripts/ops/host-runaway-process-exporter.py index d1cc6068..1f6bc633 100755 --- a/scripts/ops/host-runaway-process-exporter.py +++ b/scripts/ops/host-runaway-process-exporter.py @@ -85,6 +85,7 @@ GITEA_ACTION_PROCESS_RE = re.compile( r"(/\.cache/act/|/home/wooo/\.cache/act/|\bdocker build\b|\bdocker-buildx\b|" r"\bbuildx build\b|\bpnpm turbo build\b|\bturbo build\b|\bnext build\b)" ) +HOST_PRESSURE_GATE_RE = re.compile(r"wait-host-web-build-pressure\.sh|awoooi-wait-host-web-build-pressure\.sh") def escape_label(value: str) -> str: @@ -235,7 +236,14 @@ def active_gitea_action_containers(docker_file: Path | None = None) -> int: def active_gitea_action_process_load(rows: list[ProcessRow]) -> ActiveCiLoad: grouped: dict[int, list[ProcessRow]] = {} + gate_pgids = { + row.pgid + for row in rows + if HOST_PRESSURE_GATE_RE.search(f"{row.comm} {row.args}") + } for row in rows: + if row.pgid in gate_pgids: + continue haystack = f"{row.comm} {row.args}" if not GITEA_ACTION_PROCESS_RE.search(haystack): continue diff --git a/scripts/ops/tests/test_host_runaway_process_exporter.py b/scripts/ops/tests/test_host_runaway_process_exporter.py index 2843200f..0e81f1ba 100644 --- a/scripts/ops/tests/test_host_runaway_process_exporter.py +++ b/scripts/ops/tests/test_host_runaway_process_exporter.py @@ -145,6 +145,25 @@ def test_counts_buildkit_runner_process_load() -> None: assert load.oldest_age_seconds == 240 +def test_ignores_the_host_pressure_gate_process_group() -> None: + exporter = load_exporter() + rows = exporter.parse_ps_rows( + """ + 100 10 100 100 240 0.0 S bash bash --noprofile --norc -e -o pipefail /home/wooo/.cache/act/14cc/act/workflow/2.sh + 101 100 100 100 239 0.0 S bash bash scripts/ci/wait-host-web-build-pressure.sh + 102 101 100 100 238 0.0 S sleep sleep 10 + 200 150 200 200 210 12.5 S turbo turbo build --filter=@awoooi/web --concurrency=1 + """ + ) + + load = exporter.active_gitea_action_process_load(rows) + + assert load.group_count == 1 + assert load.process_count == 1 + assert load.cpu_percent == 12.5 + assert load.oldest_age_seconds == 210 + + def test_remediation_defaults_to_dry_run(tmp_path: Path) -> None: ps_file = tmp_path / "ps.txt" ps_file.write_text(