From 3f8d78469896ac490177de8c261f78a7b2d89f1d Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 2 Jul 2026 12:51:05 +0800 Subject: [PATCH] fix(ops): add gitea pressure check-mode playbook --- .gitea/workflows/cd.yaml | 8 + docs/LOGBOOK.md | 27 + docs/runbooks/FULL-STACK-COLD-START-SOP.md | 2 + .../HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md | 19 +- .../test_cd_controlled_runtime_profile.py | 8 + .../ops/gitea-queue-hook-backlog-playbook.py | 513 ++++++++++++++++++ scripts/ops/host-sustained-load-controller.py | 5 +- .../test_gitea_queue_hook_backlog_playbook.py | 152 ++++++ .../test_host_runaway_process_exporter.py | 7 +- 9 files changed, 734 insertions(+), 7 deletions(-) create mode 100644 scripts/ops/gitea-queue-hook-backlog-playbook.py create mode 100644 scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py diff --git a/.gitea/workflows/cd.yaml b/.gitea/workflows/cd.yaml index 071c5b5e..1284ee2f 100644 --- a/.gitea/workflows/cd.yaml +++ b/.gitea/workflows/cd.yaml @@ -260,6 +260,8 @@ jobs: ;; docs/runbooks/FULL-STACK-COLD-START-SOP.md) ;; + docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md) + ;; docs/workplans/2026-06-04-reboot-cold-start-backup-recovery-workplan.md) ;; docs/schemas/product_awoooi_manifest_v1.schema.json) @@ -572,6 +574,8 @@ jobs: ;; scripts/ops/docker-disk-pressure-retention-cleanup.py) ;; + scripts/ops/gitea-queue-hook-backlog-playbook.py) + ;; scripts/ops/host-runaway-process-exporter.py) ;; scripts/ops/host-sustained-load-controller.py) @@ -582,6 +586,8 @@ jobs: ;; scripts/ops/tests/test_docker_disk_pressure_retention_cleanup.py) ;; + scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py) + ;; scripts/ops/tests/test_host_runaway_process_exporter.py) ;; scripts/ops/tests/test_host_pressure_alert_contract.py) @@ -821,6 +827,7 @@ jobs: ../../scripts/ops/backup-alert-label-contract-check.py \ ../../scripts/ops/backup-health-textfile-exporter.py \ ../../scripts/ops/docker-disk-pressure-retention-cleanup.py \ + ../../scripts/ops/gitea-queue-hook-backlog-playbook.py \ ../../scripts/ops/host-runaway-process-exporter.py \ ../../scripts/ops/host-sustained-load-controller.py \ ../../scripts/ops/host-sustained-load-evidence.py \ @@ -908,6 +915,7 @@ jobs: ../../scripts/backup/tests/test_backup_status_contract.py \ ../../scripts/ops/tests/test_backup_health_textfile_exporter.py \ ../../scripts/ops/tests/test_docker_disk_pressure_retention_cleanup.py \ + ../../scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py \ ../../scripts/ops/tests/test_host_runaway_process_exporter.py \ ../../scripts/ops/tests/test_host_pressure_alert_contract.py \ ../../scripts/reboot-recovery/tests/test_dr_escrow_evidence_checklist.py \ diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 19bd7a35..34e1c9ea 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,30 @@ +## 2026-07-02 — 12:50 110 Gitea CPU 壓力 check-mode playbook 實作 + +**完成內容**: +- 新增 `scripts/ops/gitea-queue-hook-backlog-playbook.py`,把 controller 的 `gitea_queue_or_hook_backlog` 從泛用 evidence 分流,升級成 Gitea 專用 read-only check-mode;只讀公開 `/api/healthz`、`/api/v1/version`、`/metrics` 與 sanitized host textfile,不讀 app.ini / secret / `.runner` / raw session / DB。 +- `scripts/ops/host-sustained-load-controller.py` 的兩條 Gitea 壓力分支現在輸出 `/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py ... --json`;StockPlatform、control-plane、unknown 分支仍維持 `host-sustained-load-evidence.py`。 +- `.gitea/workflows/cd.yaml` 與 `ops/runner/test_cd_controlled_runtime_profile.py` 已把新 playbook / test 納入 controlled-runtime allowlist、`py_compile` 與 focused pytest,避免 P0 ops 修法誤跑 B5 Docker socket。 +- 已 live 部署到 110:`/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py` SHA `361197681f23482a172ee2de9170b5669f58ac089b1d7ca3f2a18300bed3059c`,`/home/wooo/scripts/host-sustained-load-controller.py` SHA `96bb93e8169af5e98aac18d123daec89b9d8a74ce194abea16f5a75e051f4f64`。 + +**live readback 證據**: +- controller 現在回報 `classification=blocked_gitea_queue_or_hook_backlog_requires_playbook`,且 `dry_run=/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py ... --json`。 +- Gitea playbook 回報 `classification=blocked_gitea_hooktask_backlog_check_required`、`gitea_health=status pass`、`version=1.25.5`、`active_actions container_count=0 process_group_count=0`、`docker_stats fresh=true age=15s`、`gitea_container_cpu_cores=1.6166`、`gitea_process_family_cpu_percent=53.1`、`gitea_hooktasks=1218`、`go_goroutines=159`、`process_open_fds=88`。 +- operation boundaries 全部保持 read-only:`host_write_performed=false`、`process_signal_performed=false`、`docker_restart_performed=false`、`systemd_restart_performed=false`、`database_query_performed=false`、`secret_value_read=false`。 + +**驗證**: +- `python3.11 -m pytest scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py scripts/ops/tests/test_host_runaway_process_exporter.py scripts/ops/tests/test_host_pressure_alert_contract.py ops/runner/test_cd_controlled_runtime_profile.py -q`:`75 passed`。 +- `python3.11 -m py_compile scripts/ops/gitea-queue-hook-backlog-playbook.py scripts/ops/host-sustained-load-controller.py`:通過。 +- `.gitea/workflows/cd.yaml` / `ops/monitoring/alerts-unified.yml` YAML parse:通過。 +- `python3.11 ops/runner/guard-gitea-runner-pressure.py --root .`:通過,`auto_branch_events_on_110=0`、`generic_runner_labels=0`。 +- `git diff --check`:通過。 + +**仍維持**: +- 未使用 GitHub / `gh` / GitHub API;未讀 secret / token / `.env` / raw sessions / SQLite / auth;未讀 `.runner` 內容。 +- 未重啟主機,未 restart Docker / Nginx / K3s / DB / firewall,未 workflow_dispatch,未 DROP / TRUNCATE / restore / prune。 + +**下一步**: +- commit / push Gitea `main`,讀回 CD;CD 成功後下一個 P0 是把 `blocked_gitea_hooktask_backlog_check_required` 接成 public queue readback / authorized export-free 判讀,避免下次又只停在「知道 Gitea 熱」。 + ## 2026-07-02 — 11:28 110 cold-start SSH 探測誤判修正 **完成內容**: diff --git a/docs/runbooks/FULL-STACK-COLD-START-SOP.md b/docs/runbooks/FULL-STACK-COLD-START-SOP.md index bdfdb7a0..81021e74 100644 --- a/docs/runbooks/FULL-STACK-COLD-START-SOP.md +++ b/docs/runbooks/FULL-STACK-COLD-START-SOP.md @@ -96,6 +96,8 @@ v1.82 bounded summary rule:`post-start-quick-check.sh` 與 `188-host-hygiene-m 2026-07-02 12:35 追加 110 CPU 分流優先序:若 Docker stats 是 fresh,且 `gitea` 或 StockPlatform 關鍵容器已超過 early triage 門檻 `1.0 core`,controller / evidence 必須先路由到對應服務 playbook,不得被長壽命 `ps %CPU` 的 `systemd_control_plane` 平均值搶先導到 control-plane playbook。control-plane saturation 仍保留為後備路徑,適用於沒有已知 hot container / hot service family 的情境。 +2026-07-02 12:50 追加 Gitea 壓力 check-mode:`host-sustained-load-controller.py` 若分類為 `blocked_gitea_queue_or_hook_backlog_requires_playbook`,`dry_run_command` 必須指向 110 已部署的 `/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py --host 110 --metrics-file /home/wooo/node_exporter_textfiles/host_runaway_process.prom --docker-stats-file /home/wooo/node_exporter_textfiles/docker_stats.prom --json`。此 playbook 只能讀公開 Gitea health/version/metrics 與 sanitized host textfile;必須輸出 health、version、active Actions、fresh Docker stats、`gitea_hooktasks`、Gitea container CPU、process-family CPU 與 operation boundaries。若 health/version 正常、active Actions 為 0、`gitea` container 超過 `1.0 core` 且 `gitea_hooktasks >= 1000`,狀態固定為 `blocked_gitea_hooktask_backlog_check_required`,下一步走 public queue / hook backlog 判讀;不得讀 app.ini / secret / `.runner` / raw session / DB,不得重啟 Gitea / Docker / Nginx / K3s / DB / firewall。 + 2026-06-25 20:25 orphan Chrome cleanup / scorecard refresh supersedes the 20:11 CPU wording. 110 high CPU was traced to two `stockplatform-review-bulk-ux` Chrome process groups `2756503` and `2829627` with root Chrome process `PPID=1`, elapsed about 5h, no active parent smoke, and sustained GPU/renderer CPU. With user approval, only those two process groups received targeted `SIGTERM` at 20:24. Post-check showed no remaining PGID entries; `vmstat` showed CPU idle around `85-90%`, `si/so=0`, and no immediate swap thrash. No Docker/systemd/Nginx/firewall/K8s action, CI cancellation, manual data ingestion, manual DB write, Wazuh/SOC runtime change, or secret read was performed. The 20:25 full post-start wrapper then returned cold-start `PASS=89 WARN=0 BLOCKED=0`, but overall `POST_START_QUICK_CHECK PASS=37 WARN=2 BLOCKED=1`, `RESULT=BLOCKED`, because StockPlatform data freshness was still blocked at that time and DR remained incomplete. 2026-06-25 20:11 StockPlatform cron-source recovery supersedes the 19:35 source-version wording. StockPlatform Gitea `main` and live `/home/wooo/stockplatform-v2` are now at `fb91aa4c6272469d1d26e0820169629eac17d28a fix(ops): restore production cron recovery entrypoints`; six missing production cron entrypoint scripts are restored, `run-intelligence-sync.sh` contains the Docker-backed `psql` shim, and live contract check confirms every `scripts/ops/*.sh` referenced by `install-production-cron.sh` exists. The only live write performed for StockPlatform recovery was a fast-forward `git pull --ff-only origin main` on 110; no Docker/systemd/Nginx/firewall/K8s restart, manual ingestion run, manual DB write, or secret read was performed. Natural cron evidence after the pull is now green for the repaired entrypoints: `source-remediation-queue` 19:56 and 20:00 succeeded, `market-index-ingestion` 20:00 succeeded, `price-ingestion` 20:02 succeeded, `margin-short-ingestion` 20:05 succeeded, `chips-ingestion` 20:06 succeeded, and `ai-recommendation-pipeline` 20:10 ran but correctly produced the internal blocker `core_margin_short_daily_incomplete,official_margin_short_daily_official_pending`. StockPlatform `/api/v1/system/freshness` therefore still returns `status=blocked` because the 2026-06-25 official margin-short source is pending and `ai.recommendations` must stay on 2026-06-24 until that gate clears. This is no longer a route, source-version, or missing-cron-script blocker; it is a product-data freshness blocker waiting on official source availability and the next valid AI pipeline run. diff --git a/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md b/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md index b362665f..27998faa 100644 --- a/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md +++ b/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md @@ -1,7 +1,7 @@ # 主機異常行程 AIOps PlayBook -> 最後更新:2026-06-18 Asia/Taipei -> 範圍:110 host CPU 滿載、orphan Chrome / Playwright smoke、Gitea Actions CI load、Prisma / package install 資源壓力分流。 +> 最後更新:2026-07-02 Asia/Taipei +> 範圍:110 host CPU 滿載、orphan Chrome / Playwright smoke、Gitea Actions CI load、Gitea 服務熱點、Prisma / package install 資源壓力分流。 --- @@ -19,6 +19,7 @@ read-only exporter -> Prometheus alert -> AI triage packet -> KM / PlayBook evid |------|------|------| | orphan browser smoke | headless Chrome / Chromium / Playwright process group 存活過久、PPID=1 或 group leader 消失、CPU 合計過高 | 走 dry-run 修復包;controlled apply receipt + evidence + verifier 成立後可送 `SIGTERM` | | 合法 CI load | Gitea Actions task container 正在跑,沒有 orphan browser 指標 | 觀察 queue / timeout;不要誤殺 | +| Gitea 服務熱點 | Docker stats fresh,`gitea` container 超過 early triage 門檻,且 active Actions 為 0 | 走 `gitea-queue-hook-backlog-playbook.py` check-mode;先讀 public health/version/metrics 與 sanitized host textfile,不重啟服務 | | Docker / Sentry / Harbor 事故 | container restart、port down、journal error、cold-start gate blocked | 走各服務自己的 SOP,不使用本 PlayBook 殺 process | | swap 已滿但未 thrash | swap ratio 高但 `vmstat` / load 分類未顯示即時 thrash | 不手動清 swap;先降高 CPU 來源 | @@ -31,6 +32,7 @@ read-only exporter -> Prometheus alert -> AI triage packet -> KM / PlayBook evid ```text /home/wooo/scripts/host-runaway-process-exporter.py /home/wooo/scripts/host-runaway-process-remediation.py +/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py /home/wooo/node_exporter_textfiles/host_runaway_process.prom ``` @@ -42,6 +44,7 @@ read-only exporter -> Prometheus alert -> AI triage packet -> KM / PlayBook evid | `awoooi_host_runaway_browser_orphan_group_count{host="110",rule=...}` | 符合規則的 orphan browser process group 數 | | `awoooi_host_runaway_browser_orphan_cpu_percent{host="110",rule=...}` | orphan group CPU 合計 | | `awoooi_host_gitea_actions_active_container_count{host="110"}` | 目前 active Gitea Actions task containers | +| `awoooi_host_process_family_cpu_percent{host="110",family="gitea_service"}` | Gitea process family CPU,用來補足 Docker stats 歸因 | | `awoooi_host_load5_per_core{host="110"}` | load5 / CPU core | | `awoooi_host_swap_used_ratio{host="110"}` | swap 使用比例 | | `awoooi_host_runaway_process_remediation_authorized{host="110"}` | 必須永遠為 `0`;exporter 不是執行器 | @@ -93,6 +96,18 @@ dry-run 必須檢查: 如果只看到 `HostCiRunnerLoadSaturation`,且 orphan group count 為 `0`,預設判定是「合法 CI 短期負載」,不得自動殺 process;只能走 runner queue verifier、stale-run drain/cancel packet 與 host pressure fail-closed。 +如果 controller 回報 `blocked_gitea_queue_or_hook_backlog_requires_playbook`,下一步固定跑 Gitea 專用 check-mode: + +```bash +python3 scripts/ops/gitea-queue-hook-backlog-playbook.py \ + --host 110 \ + --metrics-file /home/wooo/node_exporter_textfiles/host_runaway_process.prom \ + --docker-stats-file /home/wooo/node_exporter_textfiles/docker_stats.prom \ + --json +``` + +Gitea playbook 只能讀公開 `/api/healthz`、`/api/v1/version`、`/metrics` 與 sanitized host textfile。必須輸出 active Actions、Gitea health/version、`gitea_hooktasks`、Gitea container CPU、process-family CPU、Docker stats freshness 與 operation boundaries;不得讀 app.ini、secret、`.runner`、raw session、DB,也不得 restart / reload / kill。若 health 正常、active Actions 為 0、Gitea container 超過 `1.0 core` 且 `gitea_hooktasks >= 1000`,分類為 `blocked_gitea_hooktask_backlog_check_required`,再進 public queue / hook backlog 判讀。 + 如果只看到 `HostLoadAverageSustainedHigh`,且 orphan / active CI / swap 都無明確命中,AI 必須先跑只讀脫敏 evidence collector: ```bash diff --git a/ops/runner/test_cd_controlled_runtime_profile.py b/ops/runner/test_cd_controlled_runtime_profile.py index d50d6cd1..269600c2 100644 --- a/ops/runner/test_cd_controlled_runtime_profile.py +++ b/ops/runner/test_cd_controlled_runtime_profile.py @@ -145,19 +145,23 @@ def test_backup_freshness_sources_stay_on_controlled_runtime_profile() -> None: "scripts/ops/backup-alert-label-contract-check.py)", "scripts/ops/backup-health-textfile-exporter.py)", "scripts/ops/tests/test_backup_health_textfile_exporter.py)", + "scripts/ops/gitea-queue-hook-backlog-playbook.py)", "scripts/ops/host-runaway-process-exporter.py)", "scripts/ops/host-sustained-load-controller.py)", "scripts/ops/host-sustained-load-evidence.py)", + "scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py)", "scripts/ops/tests/test_host_runaway_process_exporter.py)", "scripts/ops/tests/test_host_pressure_alert_contract.py)", "../../scripts/backup/backup-awoooi-frequent.sh", "../../scripts/backup/backup-status.sh", "../../scripts/backup/tests/test_backup_status_contract.py", "../../scripts/ops/backup-alert-label-contract-check.py", + "../../scripts/ops/gitea-queue-hook-backlog-playbook.py", "../../scripts/ops/host-runaway-process-exporter.py", "../../scripts/ops/host-sustained-load-controller.py", "../../scripts/ops/host-sustained-load-evidence.py", "../../scripts/ops/tests/test_backup_health_textfile_exporter.py", + "../../scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py", "../../scripts/ops/tests/test_host_runaway_process_exporter.py", "../../scripts/ops/tests/test_host_pressure_alert_contract.py", ] @@ -751,6 +755,7 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N "docs/runbooks/REBOOT-POST-START-QUICK-CHECK.md)", "docs/runbooks/REBOOT-RECOVERY-SOP.md)", "docs/runbooks/FULL-STACK-COLD-START-SOP.md)", + "docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md)", "docs/operations/host-cpu-pressure-drain-readback-2026-07-01.snapshot.json)", "docs/operations/post-reboot-runtime-recovery-readback-2026-07-01.snapshot.json)", "docs/workplans/2026-06-04-reboot-cold-start-backup-recovery-workplan.md)", @@ -766,8 +771,10 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N "scripts/backup/gitea-repo-bundle-backup.sh)", "scripts/ops/backup-health-textfile-exporter.py)", "scripts/ops/docker-disk-pressure-retention-cleanup.py)", + "scripts/ops/gitea-queue-hook-backlog-playbook.py)", "scripts/ops/host-runaway-process-exporter.py)", "scripts/ops/host-sustained-load-evidence.py)", + "scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py)", "scripts/reboot-recovery/deploy-to-110.sh)", "scripts/reboot-recovery/enforce-110-runner-failclosed.sh)", "scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh)", @@ -794,6 +801,7 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N "scripts/reboot-recovery/tests/test_momo_source_arrival_gate.py)", "../../scripts/ops/backup-health-textfile-exporter.py", "../../scripts/ops/docker-disk-pressure-retention-cleanup.py", + "../../scripts/ops/gitea-queue-hook-backlog-playbook.py", "../../scripts/ops/host-runaway-process-exporter.py", "../../scripts/ops/host-sustained-load-controller.py", "../../scripts/ops/host-sustained-load-evidence.py", diff --git a/scripts/ops/gitea-queue-hook-backlog-playbook.py b/scripts/ops/gitea-queue-hook-backlog-playbook.py new file mode 100644 index 00000000..a77e4d32 --- /dev/null +++ b/scripts/ops/gitea-queue-hook-backlog-playbook.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 +"""Read-only check-mode playbook for 110 Gitea CPU pressure. + +This helper turns the controller's generic Gitea pressure branch into a +source-specific packet. It only reads public Gitea health/version/metrics and +sanitized host textfiles; it does not read secrets, app.ini, raw runner +registrations, sessions, or environment files, and it does not mutate host +state. +""" + +from __future__ import annotations + +import argparse +import json +import re +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + + +DEFAULT_HOST_METRICS_FILE = Path("/home/wooo/node_exporter_textfiles/host_runaway_process.prom") +DEFAULT_DOCKER_STATS_FILE = Path("/home/wooo/node_exporter_textfiles/docker_stats.prom") +DEFAULT_DOCKER_STATS_MAX_AGE_SECONDS = 300 +DEFAULT_GITEA_METRICS_URL = "http://192.168.0.110:3001/metrics" +DEFAULT_GITEA_HEALTH_URL = "http://192.168.0.110:3001/api/healthz" +DEFAULT_GITEA_VERSION_URL = "http://192.168.0.110:3001/api/v1/version" +SCHEMA_VERSION = "gitea_queue_hook_backlog_check_mode_v1" + +LABEL_RE = re.compile(r"(?P[A-Za-z_][A-Za-z0-9_]*)=\"(?P(?:[^\"\\\\]|\\\\.)*)\"") +METRIC_RE = re.compile( + r"^(?P[A-Za-z_:][A-Za-z0-9_:]*)(?:\{(?P[^}]*)\})?\s+" + r"(?P[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?)$" +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Build a read-only Gitea queue/hook backlog check-mode packet." + ) + parser.add_argument("--host", default="110") + parser.add_argument("--metrics-file", type=Path, default=DEFAULT_HOST_METRICS_FILE) + parser.add_argument("--docker-stats-file", type=Path, default=DEFAULT_DOCKER_STATS_FILE) + parser.add_argument( + "--docker-stats-max-age-seconds", + type=int, + default=DEFAULT_DOCKER_STATS_MAX_AGE_SECONDS, + ) + parser.add_argument("--gitea-metrics-url", default=DEFAULT_GITEA_METRICS_URL) + parser.add_argument("--gitea-health-url", default=DEFAULT_GITEA_HEALTH_URL) + parser.add_argument("--gitea-version-url", default=DEFAULT_GITEA_VERSION_URL) + parser.add_argument("--gitea-metrics-file", type=Path) + parser.add_argument("--gitea-health-file", type=Path) + parser.add_argument("--gitea-version-file", type=Path) + parser.add_argument("--queue-json-file", type=Path) + parser.add_argument("--http-timeout-seconds", type=float, default=5.0) + parser.add_argument("--hot-container-cpu-threshold", type=float, default=1.0) + parser.add_argument("--gitea-family-cpu-threshold", type=float, default=50.0) + parser.add_argument("--hooktasks-warning-threshold", type=float, default=1000.0) + parser.add_argument("--json", action="store_true") + return parser.parse_args() + + +def _unescape_label(value: str) -> str: + return value.replace(r"\"", '"').replace(r"\\", "\\").replace(r"\n", "\n") + + +def parse_prometheus_text(text: str) -> list[dict[str, Any]]: + samples: list[dict[str, Any]] = [] + for raw_line in text.splitlines(): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + match = METRIC_RE.match(line) + if not match: + continue + labels = { + item.group("key"): _unescape_label(item.group("value")) + for item in LABEL_RE.finditer(match.group("labels") or "") + } + samples.append( + { + "name": match.group("name"), + "labels": labels, + "value": float(match.group("value")), + } + ) + return samples + + +def read_text(path: Path | None) -> str: + if path is None: + return "" + try: + return path.read_text(encoding="utf-8") + except FileNotFoundError: + return "" + + +def _sample_value_any(samples: list[dict[str, Any]], name: str) -> float | None: + for sample in samples: + if sample["name"] == name: + return float(sample["value"]) + return None + + +def _sample_value( + samples: list[dict[str, Any]], + name: str, + *, + host: str, + labels: dict[str, str] | None = None, + default: float = 0.0, +) -> float: + expected = {"host": host, **(labels or {})} + for sample in samples: + if sample["name"] != name: + continue + sample_labels = sample["labels"] + if all(sample_labels.get(key) == value for key, value in expected.items()): + return float(sample["value"]) + return default + + +def _textfile_mtime_seconds(samples: list[dict[str, Any]], suffix: str) -> float | None: + for sample in samples: + if sample["name"] != "node_textfile_mtime_seconds": + continue + file_label = str(sample["labels"].get("file") or "") + if file_label.endswith(suffix): + return float(sample["value"]) + return None + + +def docker_stats_freshness( + *, + samples: list[dict[str, Any]], + docker_stats_file: Path, + max_age_seconds: int, +) -> dict[str, Any]: + mtime = _textfile_mtime_seconds(samples, "docker_stats.prom") + now = _sample_value_any(samples, "node_time_seconds") + source = "node_textfile_mtime_seconds" + if mtime is None: + try: + mtime = docker_stats_file.stat().st_mtime + now = time.time() + source = "file_stat_mtime" + except FileNotFoundError: + return { + "fresh": False, + "age_seconds": None, + "max_age_seconds": max_age_seconds, + "source": "missing", + } + if now is None: + now = time.time() + age_seconds = max(0, int(now - mtime)) + return { + "fresh": age_seconds <= max_age_seconds, + "age_seconds": age_seconds, + "max_age_seconds": max_age_seconds, + "source": source, + } + + +def top_docker_containers( + samples: list[dict[str, Any]], + *, + host: str, + top_n: int = 5, +) -> list[dict[str, Any]]: + rows = [] + for sample in samples: + if sample["name"] != "docker_container_cpu_cores": + continue + labels = sample["labels"] + if labels.get("host", host) != host: + continue + rows.append( + { + "container_name": labels.get("container_name") or labels.get("name") or "unknown", + "cpu_cores": round(float(sample["value"]), 6), + } + ) + return sorted(rows, key=lambda item: (-item["cpu_cores"], item["container_name"]))[:top_n] + + +def process_families(samples: list[dict[str, Any]], *, host: str) -> list[dict[str, Any]]: + by_family: dict[str, dict[str, Any]] = {} + for sample in samples: + labels = sample["labels"] + if labels.get("host") != host: + continue + family = labels.get("family") + if not family: + continue + row = by_family.setdefault( + family, + { + "family": family, + "cpu_percent": 0.0, + "process_count": 0, + "oldest_age_seconds": 0, + "top_info": "", + }, + ) + if sample["name"] == "awoooi_host_process_family_cpu_percent": + row["cpu_percent"] = round(float(sample["value"]), 3) + elif sample["name"] == "awoooi_host_process_family_process_count": + row["process_count"] = int(sample["value"]) + elif sample["name"] == "awoooi_host_process_family_oldest_age_seconds": + row["oldest_age_seconds"] = int(sample["value"]) + elif sample["name"] == "awoooi_host_process_family_top_info": + row["top_info"] = str(labels.get("top_info") or "")[:120] + return sorted(by_family.values(), key=lambda item: (-float(item["cpu_percent"]), item["family"])) + + +def _family_cpu(families: list[dict[str, Any]], family: str) -> float: + for item in families: + if item.get("family") == family: + return float(item.get("cpu_percent") or 0.0) + return 0.0 + + +def fetch_text_or_file(*, url: str, path: Path | None, timeout_seconds: float) -> dict[str, Any]: + if path is not None: + text = read_text(path) + return { + "ok": bool(text), + "status_code": 200 if text else None, + "source": str(path), + "text": text, + "error_type": "" if text else "file_missing_or_empty", + } + request = urllib.request.Request( + url, + headers={"User-Agent": "awoooi-gitea-pressure-check-mode/1.0"}, + ) + try: + with urllib.request.urlopen(request, timeout=timeout_seconds) as response: + raw = response.read() + status = int(getattr(response, "status", 200)) + return { + "ok": 200 <= status < 300, + "status_code": status, + "source": url, + "text": raw.decode("utf-8", errors="replace"), + "error_type": "", + } + except urllib.error.HTTPError as exc: + return { + "ok": False, + "status_code": int(exc.code), + "source": url, + "text": "", + "error_type": "http_error", + } + except (urllib.error.URLError, TimeoutError): + return { + "ok": False, + "status_code": None, + "source": url, + "text": "", + "error_type": "connection_error", + } + + +def _json_from_text(text: str) -> dict[str, Any]: + try: + value = json.loads(text) + except json.JSONDecodeError: + return {} + return value if isinstance(value, dict) else {} + + +def selected_gitea_metrics(samples: list[dict[str, Any]]) -> dict[str, Any]: + selected = { + "gitea_hooktasks": _sample_value_any(samples, "gitea_hooktasks"), + "gitea_repositories": _sample_value_any(samples, "gitea_repositories"), + "gitea_webhooks": _sample_value_any(samples, "gitea_webhooks"), + "go_goroutines": _sample_value_any(samples, "go_goroutines"), + "go_sched_gomaxprocs_threads": _sample_value_any(samples, "go_sched_gomaxprocs_threads"), + "process_cpu_seconds_total": _sample_value_any(samples, "process_cpu_seconds_total"), + "process_open_fds": _sample_value_any(samples, "process_open_fds"), + "process_resident_memory_bytes": _sample_value_any(samples, "process_resident_memory_bytes"), + "gitea_build_version": "", + } + for sample in samples: + if sample["name"] == "gitea_build_info": + selected["gitea_build_version"] = str(sample["labels"].get("version") or "") + break + return selected + + +def queue_readback_summary(path: Path | None) -> dict[str, Any]: + if path is None: + return {"available": False, "source": "", "latest_visible_cd_run": None} + data = _json_from_text(read_text(path)) + visible_runs = data.get("top_visible_runs") or data.get("visible_runs") or [] + latest_cd_run = None + if isinstance(visible_runs, list): + for item in visible_runs: + if not isinstance(item, dict): + continue + workflow = str(item.get("workflow") or item.get("name") or "") + if workflow == "cd.yaml" or "cd" in workflow.lower(): + latest_cd_run = { + "workflow": workflow, + "run_id": str(item.get("run_id") or ""), + "status": str(item.get("status") or ""), + "commit_sha": str(item.get("commit_sha") or "")[:12], + } + break + return { + "available": bool(data), + "source": str(path), + "top_visible_run_count": len(visible_runs) if isinstance(visible_runs, list) else 0, + "latest_visible_cd_run": latest_cd_run, + "no_matching_runner_visible": data.get("no_matching_runner_visible"), + } + + +def build_payload(args: argparse.Namespace) -> dict[str, Any]: + host_samples = parse_prometheus_text(read_text(args.metrics_file)) + docker_samples = parse_prometheus_text(read_text(args.docker_stats_file)) + docker_status = docker_stats_freshness( + samples=host_samples, + docker_stats_file=args.docker_stats_file, + max_age_seconds=args.docker_stats_max_age_seconds, + ) + metrics_read = fetch_text_or_file( + url=args.gitea_metrics_url, + path=args.gitea_metrics_file, + timeout_seconds=args.http_timeout_seconds, + ) + health_read = fetch_text_or_file( + url=args.gitea_health_url, + path=args.gitea_health_file, + timeout_seconds=args.http_timeout_seconds, + ) + version_read = fetch_text_or_file( + url=args.gitea_version_url, + path=args.gitea_version_file, + timeout_seconds=args.http_timeout_seconds, + ) + gitea_samples = parse_prometheus_text(str(metrics_read.get("text") or "")) + gitea_metrics = selected_gitea_metrics(gitea_samples) + health_json = _json_from_text(str(health_read.get("text") or "")) + version_json = _json_from_text(str(version_read.get("text") or "")) + families = process_families(host_samples, host=args.host) + containers_untrusted = top_docker_containers(docker_samples, host=args.host) + containers = containers_untrusted if docker_status.get("fresh") is True else [] + gitea_container_cpu = 0.0 + for container in containers: + if str(container.get("container_name") or "").lower() == "gitea": + gitea_container_cpu = float(container.get("cpu_cores") or 0.0) + break + active_actions = { + "container_count": int( + _sample_value( + host_samples, + "awoooi_host_gitea_actions_active_container_count", + host=args.host, + ) + ), + "process_group_count": int( + _sample_value( + host_samples, + "awoooi_host_gitea_actions_active_process_group_count", + host=args.host, + ) + ), + "process_cpu_percent": round( + _sample_value( + host_samples, + "awoooi_host_gitea_actions_active_process_cpu_percent", + host=args.host, + ), + 3, + ), + } + gitea_family_cpu = _family_cpu(families, "gitea_service") + hooktasks = float(gitea_metrics.get("gitea_hooktasks") or 0.0) + health_status = str(health_json.get("status") or "") + + classification = "observing_gitea_pressure_below_threshold" + severity = "info" + next_action = "keep_read_only_monitoring" + if docker_status.get("fresh") is not True: + classification = "blocked_gitea_pressure_attribution_stale_requires_textfile_recovery" + severity = "warning" + next_action = "restore_docker_stats_textfile_before_gitea_apply" + elif not metrics_read.get("ok"): + classification = "blocked_gitea_metrics_unavailable_requires_route_or_exporter_check" + severity = "warning" + next_action = "restore_public_gitea_metrics_readback_before_apply" + elif health_status and health_status != "pass": + classification = "blocked_gitea_health_degraded_requires_service_recovery_playbook" + severity = "critical" + next_action = "run_gitea_service_health_recovery_check_mode_without_restart" + elif active_actions["container_count"] > 0 or active_actions["process_group_count"] > 0: + classification = "blocked_gitea_actions_pressure_requires_runner_queue_packet" + severity = "warning" + next_action = "run_runner_queue_readback_and_keep_110_pressure_gate_fail_closed" + elif ( + gitea_container_cpu >= args.hot_container_cpu_threshold + and hooktasks >= args.hooktasks_warning_threshold + ): + classification = "blocked_gitea_hooktask_backlog_check_required" + severity = "warning" + next_action = "read_gitea_hooktask_backlog_age_from_authorized_export_before_apply" + elif ( + gitea_container_cpu >= args.hot_container_cpu_threshold + or gitea_family_cpu >= args.gitea_family_cpu_threshold + ): + classification = "blocked_gitea_service_hot_without_actions_backlog" + severity = "warning" + next_action = "run_gitea_metrics_rate_probe_then_select_quota_or_hook_playbook" + + return { + "schema_version": SCHEMA_VERSION, + "host": args.host, + "mode": "read_only_check_mode", + "classification": classification, + "severity": severity, + "controlled_apply_allowed": False, + "next_action": next_action, + "readback": { + "docker_stats": docker_status, + "gitea_metrics_http": { + "ok": bool(metrics_read.get("ok")), + "status_code": metrics_read.get("status_code"), + "source": metrics_read.get("source"), + "error_type": metrics_read.get("error_type"), + }, + "gitea_health_http": { + "ok": bool(health_read.get("ok")), + "status_code": health_read.get("status_code"), + "status": health_status, + "checks": sorted((health_json.get("checks") or {}).keys()), + }, + "gitea_version_http": { + "ok": bool(version_read.get("ok")), + "status_code": version_read.get("status_code"), + "version": str(version_json.get("version") or ""), + }, + "selected_gitea_metrics": gitea_metrics, + "active_actions": active_actions, + "gitea_container_cpu_cores": round(gitea_container_cpu, 6), + "gitea_process_family_cpu_percent": round(gitea_family_cpu, 3), + "top_containers": containers, + "top_containers_untrusted": containers_untrusted, + "top_process_families": families[:5], + "queue_readback": queue_readback_summary(args.queue_json_file), + "thresholds": { + "hot_container_cpu": args.hot_container_cpu_threshold, + "gitea_family_cpu": args.gitea_family_cpu_threshold, + "hooktasks_warning": args.hooktasks_warning_threshold, + }, + }, + "commands": { + "check_mode": ( + "/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py " + f"--host {args.host} --metrics-file {DEFAULT_HOST_METRICS_FILE} " + f"--docker-stats-file {DEFAULT_DOCKER_STATS_FILE} --json" + ), + "post_apply_verifier": ( + "/home/wooo/scripts/host-sustained-load-controller.py " + f"--host {args.host} --metrics-file {DEFAULT_HOST_METRICS_FILE} " + f"--docker-stats-file {DEFAULT_DOCKER_STATS_FILE} --json" + ), + "controlled_apply": "", + "rollback": "no host mutation performed by this check-mode playbook", + }, + "redaction": { + "raw_metrics_emitted": False, + "raw_command_lines_emitted": False, + "workspace_paths_emitted": False, + "urls_emitted_from_processes": False, + "secret_values_read": False, + }, + "operation_boundaries": { + "host_write_performed": False, + "process_signal_performed": False, + "docker_restart_performed": False, + "systemd_restart_performed": False, + "nginx_reload_performed": False, + "database_query_performed": False, + "raw_runner_registration_read": False, + "raw_session_read": False, + "secret_value_read": False, + }, + } + + +def main() -> int: + args = parse_args() + payload = build_payload(args) + if args.json: + print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True)) + else: + print(f"status={payload['classification']}") + print(f"controlled_apply_allowed={str(payload['controlled_apply_allowed']).lower()}") + print(f"next_action={payload['next_action']}") + print(f"check_mode_command={payload['commands']['check_mode']}") + print(f"post_apply_verifier={payload['commands']['post_apply_verifier']}") + return 0 if not payload["classification"].startswith("blocked_") else 75 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/ops/host-sustained-load-controller.py b/scripts/ops/host-sustained-load-controller.py index 168d7ad5..6d3045e6 100755 --- a/scripts/ops/host-sustained-load-controller.py +++ b/scripts/ops/host-sustained-load-controller.py @@ -450,6 +450,7 @@ def build_packet( controlled_apply_command = "" controller_script = script_dir / "host-sustained-load-controller.py" evidence_script = script_dir / "host-sustained-load-evidence.py" + gitea_playbook_script = script_dir / "gitea-queue-hook-backlog-playbook.py" remediation_script = script_dir / "host-runaway-process-remediation.py" verifier_command = ( f"{controller_script} " @@ -532,7 +533,7 @@ def build_packet( else "warning" ) dry_run_command = ( - f"{evidence_script} " + f"{gitea_playbook_script} " f"--host {host} --metrics-file {DEFAULT_METRICS_FILE} " f"--docker-stats-file {DEFAULT_DOCKER_STATS_FILE} --json" ) @@ -558,7 +559,7 @@ def build_packet( classification = "blocked_gitea_queue_or_hook_backlog_requires_playbook" severity = "critical" if load5_per_core > load5_per_core_threshold else "warning" dry_run_command = ( - f"{evidence_script} " + f"{gitea_playbook_script} " f"--host {host} --metrics-file {DEFAULT_METRICS_FILE} " f"--docker-stats-file {DEFAULT_DOCKER_STATS_FILE} --json" ) diff --git a/scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py b/scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py new file mode 100644 index 00000000..a6f08a8b --- /dev/null +++ b/scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + + +SCRIPT_ROOT = Path(__file__).resolve().parents[1] +PLAYBOOK_PATH = SCRIPT_ROOT / "gitea-queue-hook-backlog-playbook.py" + + +def _write_common_gitea_files(tmp_path: Path) -> dict[str, Path]: + metrics_file = tmp_path / "gitea.prom" + metrics_file.write_text( + "\n".join( + [ + 'gitea_build_info{goarch="amd64",goos="linux",goversion="go1.25.8",version="1.25.5"} 1', + "gitea_hooktasks 1217", + "gitea_repositories 13", + "gitea_webhooks 2", + "go_goroutines 121", + "go_sched_gomaxprocs_threads 2", + "process_cpu_seconds_total 41040.25", + "process_open_fds 56", + "process_resident_memory_bytes 8.86448128e+08", + ] + ), + encoding="utf-8", + ) + health_file = tmp_path / "health.json" + health_file.write_text( + json.dumps( + { + "status": "pass", + "checks": { + "cache:ping": [{"status": "pass"}], + "database:ping": [{"status": "pass"}], + }, + } + ), + encoding="utf-8", + ) + version_file = tmp_path / "version.json" + version_file.write_text('{"version":"1.25.5"}', encoding="utf-8") + return { + "gitea_metrics": metrics_file, + "health": health_file, + "version": version_file, + } + + +def _run_playbook( + tmp_path: Path, + *, + host_metrics: list[str], + docker_metrics: list[str], +) -> subprocess.CompletedProcess[str]: + gitea_files = _write_common_gitea_files(tmp_path) + host_file = tmp_path / "host.prom" + host_file.write_text("\n".join(host_metrics), encoding="utf-8") + docker_file = tmp_path / "docker.prom" + docker_file.write_text("\n".join(docker_metrics), encoding="utf-8") + + return subprocess.run( + [ + sys.executable, + str(PLAYBOOK_PATH), + "--host", + "110", + "--metrics-file", + str(host_file), + "--docker-stats-file", + str(docker_file), + "--gitea-metrics-file", + str(gitea_files["gitea_metrics"]), + "--gitea-health-file", + str(gitea_files["health"]), + "--gitea-version-file", + str(gitea_files["version"]), + "--json", + ], + capture_output=True, + text=True, + ) + + +def test_gitea_playbook_classifies_hooktask_backlog_without_secret_reads(tmp_path: Path) -> None: + result = _run_playbook( + tmp_path, + host_metrics=[ + 'awoooi_host_gitea_actions_active_container_count{host="110"} 0', + 'awoooi_host_gitea_actions_active_process_group_count{host="110"} 0', + 'awoooi_host_gitea_actions_active_process_cpu_percent{host="110"} 0', + 'awoooi_host_process_family_cpu_percent{host="110",family="gitea_service"} 53.1', + 'awoooi_host_process_family_process_count{host="110",family="gitea_service"} 2', + ], + docker_metrics=['docker_container_cpu_cores{host="110",container_name="gitea"} 1.7052'], + ) + + assert result.returncode == 75 + payload = json.loads(result.stdout) + assert payload["classification"] == "blocked_gitea_hooktask_backlog_check_required" + assert payload["controlled_apply_allowed"] is False + assert payload["readback"]["gitea_health_http"]["status"] == "pass" + assert payload["readback"]["gitea_version_http"]["version"] == "1.25.5" + assert payload["readback"]["selected_gitea_metrics"]["gitea_hooktasks"] == 1217 + assert payload["readback"]["gitea_container_cpu_cores"] == 1.7052 + assert payload["operation_boundaries"]["database_query_performed"] is False + assert payload["operation_boundaries"]["secret_value_read"] is False + assert "/home/wooo/gitea/app.ini" not in result.stdout + assert "Authorization" not in result.stdout + + +def test_gitea_playbook_routes_active_actions_to_runner_queue_packet(tmp_path: Path) -> None: + result = _run_playbook( + tmp_path, + host_metrics=[ + 'awoooi_host_gitea_actions_active_container_count{host="110"} 1', + 'awoooi_host_gitea_actions_active_process_group_count{host="110"} 1', + 'awoooi_host_gitea_actions_active_process_cpu_percent{host="110"} 220.5', + 'awoooi_host_process_family_cpu_percent{host="110",family="gitea_service"} 20', + ], + docker_metrics=['docker_container_cpu_cores{host="110",container_name="gitea"} 1.4'], + ) + + assert result.returncode == 75 + payload = json.loads(result.stdout) + assert payload["classification"] == "blocked_gitea_actions_pressure_requires_runner_queue_packet" + assert payload["next_action"] == "run_runner_queue_readback_and_keep_110_pressure_gate_fail_closed" + assert payload["readback"]["active_actions"]["container_count"] == 1 + assert payload["readback"]["active_actions"]["process_group_count"] == 1 + + +def test_gitea_playbook_rejects_stale_docker_attribution(tmp_path: Path) -> None: + result = _run_playbook( + tmp_path, + host_metrics=[ + 'node_textfile_mtime_seconds{file="/host/home/wooo/node_exporter_textfiles/docker_stats.prom"} 1000', + "node_time_seconds 5000", + 'awoooi_host_gitea_actions_active_container_count{host="110"} 0', + 'awoooi_host_gitea_actions_active_process_group_count{host="110"} 0', + ], + docker_metrics=['docker_container_cpu_cores{host="110",container_name="gitea"} 1.7052'], + ) + + assert result.returncode == 75 + payload = json.loads(result.stdout) + assert payload["classification"] == "blocked_gitea_pressure_attribution_stale_requires_textfile_recovery" + assert payload["readback"]["docker_stats"]["fresh"] is False + assert payload["readback"]["top_containers"] == [] + assert payload["readback"]["top_containers_untrusted"][0]["container_name"] == "gitea" diff --git a/scripts/ops/tests/test_host_runaway_process_exporter.py b/scripts/ops/tests/test_host_runaway_process_exporter.py index cff3cbe7..94f986f6 100644 --- a/scripts/ops/tests/test_host_runaway_process_exporter.py +++ b/scripts/ops/tests/test_host_runaway_process_exporter.py @@ -465,7 +465,7 @@ def test_sustained_load_controller_routes_gitea_backlog_from_docker_metrics(tmp_ assert payload["classification"] == "blocked_gitea_queue_or_hook_backlog_requires_playbook" assert payload["readback"]["top_container_cpu"]["container_name"] == "gitea" assert payload["controlled_apply_allowed"] is False - assert "/home/wooo/scripts/host-sustained-load-evidence.py" in payload["commands"]["dry_run"] + assert "/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py" in payload["commands"]["dry_run"] assert "scripts/ops/" not in payload["commands"]["dry_run"] @@ -522,7 +522,7 @@ def test_sustained_load_controller_routes_gitea_quota_pressure_even_when_load_is assert payload["severity"] == "warning" assert payload["readback"]["container_cpu_threshold"] == 2.0 assert payload["readback"]["top_container_cpu"]["cpu_cores"] == 2.08 - assert "/home/wooo/scripts/host-sustained-load-evidence.py" in payload["commands"]["dry_run"] + assert "/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py" in payload["commands"]["dry_run"] assert "scripts/ops/" not in payload["commands"]["dry_run"] @@ -596,7 +596,7 @@ def test_sustained_load_controller_prioritizes_hot_gitea_container_over_control_ assert payload["next_action"] == "run_gitea_queue_or_hook_backlog_playbook_check_mode" assert payload["readback"]["control_plane_process_cpu_percent"] == 68.5 assert payload["readback"]["top_container_cpu"]["container_name"] == "gitea" - assert "/home/wooo/scripts/host-sustained-load-evidence.py" in payload["commands"]["dry_run"] + assert "/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py" in payload["commands"]["dry_run"] assert "/home/wooo/gitea/app.ini" not in result.stdout @@ -819,6 +819,7 @@ def test_sustained_load_controller_routes_gitea_process_pressure_without_hot_con assert payload["classification"] == "blocked_gitea_queue_or_hook_backlog_requires_playbook" assert payload["readback"]["gitea_process_cpu_percent"] == 55.5 assert payload["controlled_apply_allowed"] is False + assert "/home/wooo/scripts/gitea-queue-hook-backlog-playbook.py" in payload["commands"]["dry_run"] assert "/home/wooo/gitea/app.ini" not in result.stdout