awoooi/.gitea/workflows/cd.yaml at aaa617f00f01ebd96fc2d52a1c60ca9c3d6a0131

wooo/awoooi

Fork 0

Files

Your Name aaa617f00f

CD Pipeline / workflow-shape (push) Successful in 1s

Details

CD Pipeline / cancel-stale-cd (push) Has been skipped

Details

CD Pipeline / tests (push) Failing after 41s

Details

CD Pipeline / build-and-deploy (push) Has been skipped

Details

CD Pipeline / post-deploy-checks (push) Has been skipped

Details

fix(reboot): expose windows99 management channel readback

2026-07-02 15:23:08 +08:00

2713 lines

140 KiB

YAML

Raw Blame History

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

 # =============================================================================
 # AWOOOI CD Pipeline (Gitea Actions - 方案 B)
 # =============================================================================
 # 流程: Build → Push to Harbor → Deploy to K8s
 # 加速措施:
 #   1. Docker Layer Cache → Harbor registry cache
 #   2. 內部 Mirror → 192.168.0.110:5001 (Harbor Proxy Cache for DockerHub)
 #   3. 非 110 runner 的 Docker pull/push 走 registry.wooo.work HTTPS alias，
 #      避免要求 runner root 修改 insecure registry；K8s image pull 仍保留
 #      192.168.0.110:5000 內網 Harbor route。
 # 2026-03-29 Claude Code (ADR-039) - Retry after creating Harbor project
 name: CD Pipeline
 on:
   # 2026-06-29 Codex: restore main push CD only after the non-110
   # awoooi-non110-* runner lane read back registration metadata, active
   # service, capacity=1, pressure OK, rollback unit, and label target-match.
   # 110 incident runner labels and generic labels remain fail-closed via
   # ops/runner/guard-gitea-runner-pressure.py.
   push:
     branches:
       - main
   workflow_dispatch:
     # 手動觸發永遠可用（用於補跑、緊急部署）
 # 2026-04-02 Claude Code: 改為搶佔模式 — 新 push 立即取消舊 build，只部署最新
 # 原理: concurrency group 保證同時只有一個 job 跑；cancel-in-progress:true 讓新的取代舊的
 # 解決: 多個 commit 快速連推時不再排隊堆積，且 docker build 卡住時不會阻塞後續部署
 # 安全: deploy 步驟本身有 kubectl rollout status 保護，不會出現半部署狀態
 concurrency:
   group: cd-deploy-${{ github.ref }}
   cancel-in-progress: true
 env:
   HARBOR: registry.wooo.work
   SRE_GROUP_CHAT_ID: "-1003711974679"
   # Harbor Proxy Cache (指向 DockerHub 的內部 Mirror，避免拉取限額)
   HARBOR_MIRROR: 192.168.0.110:5001
   # OTEL CI/CD 監控 (2026-03-31 #46c - 遷移到 Gitea)
   OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
   OTEL_SERVICE_NAME: awoooi-cd
   OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=production
   CI_IMAGE: registry.wooo.work/awoooi/ci-runner:act-22.04
   # 2026-06-28 Codex: 110 runner pressure is an incident-grade capacity guard.
   # Do not flip this to warn-only until non-110 readiness is verified.
   HOST_WEB_BUILD_PRESSURE_WARN_ONLY: "0"
   # 2026-06-30 Codex: CD is now pinned to the dedicated awoooi-non110-host lane.
   # Keep the pressure guard fail-hard, but allow normal below-saturation load on
   # that dedicated lane so P0 deploys are not stuck behind load5/core 0.85-1.05.
   HOST_WEB_BUILD_PRESSURE_MAX_LOAD5_PER_CORE: "1.05"
   # Docker lock contention is also fail-hard during the same incident window.
   DOCKER_BUILD_LOCK_WARN_ONLY: "0"
   # 2026-05-24 Codex: deploy through the currently Ready control-plane node.
   # 120 is NotReady/SchedulingDisabled and its SSH/API endpoints are currently
   # unreachable; pinning CD to it blocks secret injection before GitOps deploy.
   K8S_SSH_HOST: 192.168.0.121
   K8S_API_SERVER: https://192.168.0.121:6443
   # 2026-06-01 Codex: post-deploy health/smoke probes use the production
   # public API. The old 192.168.0.125 NodePort VIP can be absent while the
   # public route and in-cluster service are healthy, causing false failures.
   API_HEALTH_URL: https://awoooi.wooo.work/api/v1/health
   ALERT_CHAIN_API_URL: https://awoooi.wooo.work
 jobs:
   workflow-shape:
     # 2026-06-28 Codex: Gitea 1.25 may mark a workflow invalid when every
     # root job has a job-level `if`. Keep one no-op root job without `if` so
     # cd.yaml stays parseable while deploy jobs remain guarded below.
     runs-on: awoooi-non110-host
     timeout-minutes: 1
     steps:
       - name: Confirm CD Workflow Shape
         run: echo "cd.yaml root job present; deploy jobs remain guarded."
   cancel-stale-cd:
     # 2026-06-28 Codex: keep a visible no-op run for controlled queue
     # cancellation. If every job is skipped, Gitea may not create a run and
     # the stale pre-guard CD queue is not superseded by concurrency.
     if: ${{ github.event_name == 'push' && contains(github.event.head_commit.message, 'cancel-stale-cd') }}
     runs-on: awoooi-non110-host
     timeout-minutes: 3
     steps:
       - name: Confirm Stale CD Queue Cancellation
         run: |
           echo "cancel-stale-cd marker accepted; deploy jobs are intentionally skipped."
   tests:
     # 2026-06-28 Codex: Gitea does not consistently short-circuit `[skip ci]`
     # on CD-generated deploy commits. Skip jobs explicitly so marker commits
     # do not trigger a self-feeding CD loop; `cancel-stale-cd` is a
     # controlled no-op trigger used only to cancel stale pre-guard runs.
     if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, 'cancel-stale-cd')) }}
     # 2026-04-30 Codex: run the tests job on the host runner and launch the
     # CI image explicitly. The act-managed job container can disappear mid-test
     # with Docker RWLayer=nil on the shared 110 daemon.
     timeout-minutes: 30
     runs-on: awoooi-non110-host
     # 2026-04-10 ogt: B5 改用 docker run 本地啟動，移除 services: 宣告
     # Gitea act runner 的 services: container name 為空，導致 CI 失敗
     steps:
       - name: Bootstrap Host Runner Tools
         # 2026-06-28 Codex: awoooi-non110-host maps to the dedicated
         # non-110 runner lane. Bootstrap tools defensively because host
         # runners can start without the CI toolchain preinstalled.
         run: |
           if command -v apk >/dev/null 2>&1; then
             apk add --no-cache nodejs npm git curl bash coreutils python3 openssh-client docker-cli docker-cli-buildx
           fi
       - uses: actions/checkout@v4
       - name: Wait for Host Web Build Pressure
         # 2026-06-28 Codex: 110 runner pressure remains incident-grade and
         # fail-hard until runner work is moved or hard-limited.
         run: bash scripts/ci/wait-host-web-build-pressure.sh
       - name: Guard Workflow Secret Surfaces
         run: node scripts/ci/check-gitea-step-env-secrets.js
       # 2026-03-31 ogt: 優化告警格式 - 提高可讀性
       - name: Get Commit Info
         id: commit
         run: |
           echo "short_sha=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
           echo "message=$(git log -1 --pretty=%s | head -c 50)" >> $GITHUB_OUTPUT
           echo "start_time=$(date +%s)" >> $GITHUB_OUTPUT
       - name: Notify Pipeline Start
         # 2026-04-16 ogt + Claude Sonnet 4.6: 改用 HTML 結構化格式，提升可讀性
         env:
           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
         run: |
           COMMIT_MSG="${{ steps.commit.outputs.message }}"
           SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
           ACTOR="${{ github.actor }}"
           # HTML escape commit message（防特殊字元破壞 HTML）
           COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
           MSG=$(printf '🚀 <b>AWOOOI 部署開始</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n└ 👤 %s' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
           # 2026-05-02 Claude Opus 4.7 + 統帥 ogt: notify 失敗不該擋整條 CI（鐵證:
           # curl 400 從 5/1 起連續炸 14 個 commit 的 build-and-deploy）— 對齊 line 922 既有 pattern
           if AWOOI_CICD_STATUS=running \
             AWOOI_CICD_STAGE=tests \
             AWOOI_CICD_JOB_NAME="AWOOOI 部署開始" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD start notification mirrored through AWOOI API"
           else
             echo "AWOOI API notify failed; direct Telegram fallback disabled to preserve AwoooP receipt chain"
           fi
       # 2026-03-31 ogt: Phase 22.0 CI 測試 (禁止 Mock - feedback_no_mock_testing.md)
       # 2026-04-01 ogt: 持久化 venv 加速 - /opt/api-venv 跨 run 保留
       # pyproject.toml hash 變才重裝，其餘直接 activate (節省 ~6-7 min)
       - name: Run API Tests
         run: |
           CHANGED_FILES=""
           if [ -r "${GITHUB_EVENT_PATH:-}" ]; then
             CHANGED_FILES="$(python3 - <<'PY'
           import json
           import os
           event_path = os.environ.get("GITHUB_EVENT_PATH")
           files = []
           with open(event_path, "r", encoding="utf-8") as handle:
               payload = json.load(handle)
           for commit in payload.get("commits", []) or []:
               for key in ("added", "modified", "removed"):
                   files.extend(commit.get(key, []) or [])
           for path in dict.fromkeys(files):
               print(path)
           PY
           )"
           fi
           if [ -z "$CHANGED_FILES" ]; then
             BASE_SHA="${{ github.event.before }}"
             if [ -n "$BASE_SHA" ] && ! printf '%s' "$BASE_SHA" | grep -Eq '^0+$'; then
               git fetch --no-tags --depth=50 origin "${GITHUB_REF_NAME:-main}" >/dev/null 2>&1 || true
               if git cat-file -e "${BASE_SHA}^{commit}" 2>/dev/null; then
                 CHANGED_FILES="$(git diff --name-only "$BASE_SHA" "${GITHUB_SHA:-HEAD}")"
               fi
             fi
           fi
           if [ -z "$CHANGED_FILES" ]; then
             CHANGED_FILES="$(git show --format= --name-only --no-renames HEAD)"
           fi
           printf 'CD changed files:\n%s\n' "$CHANGED_FILES"
           CONTROLLED_RUNTIME_TEST_PROFILE=1
           while IFS= read -r changed_file; do
             [ -z "$changed_file" ] && continue
             case "$changed_file" in
               # 2026-06-29 Codex: UI-only changes are verified by the
               # frontend build in build-and-deploy. Keep them on the narrow
               # profile so non-110 CD does not run B5's Docker/socket DB
               # integration for copy/layout fixes.
               apps/web/*)
                 ;;
               .gitea/workflows/cd.yaml)
                 ;;
               # 2026-06-30 Codex: workflow secret-transport and guard-only
               # hardening must stay on the narrow profile. These changes are
               # validated by workflow-shape, the secret-surface guard, and the
               # runner pressure/profile tests; sending them to full/B5 would
               # reintroduce the heavy runner path while not increasing coverage.
               .gitea/workflows/cd-dev.yaml)
                 ;;
               .gitea/workflows/code-review.yaml)
                 ;;
               .gitea/workflows/deploy-alerts.yaml)
                 ;;
               .gitea/workflows/e2e-health.yaml)
                 ;;
               .gitea/workflows/ansible-lint.yml)
                 ;;
               .gitea/workflows/harbor-110-local-repair.yaml)
                 ;;
               .gitea/workflows/run-migration.yml)
                 ;;
               scripts/ci/check-gitea-step-env-secrets.js)
                 ;;
               # 2026-06-29 Codex: the onboarding warning-step workflow is
               # copied in a disabled workflow_dispatch-only state. Treat the
               # source and template files as controlled-runtime sources so the
               # CD lane does not fall into full/B5 just for placing the inert
               # guarded workflow shell.
               .gitea/workflows/awoooi-onboarding-warning-step.yaml)
                 ;;
               docs/operations/templates/awoooi-gitea-onboarding-warning-step.workflow.yaml)
                 ;;
               # 2026-06-29 Codex: build-and-deploy writes only these GitOps
               # deploy marker files after image push. A later merge commit can
               # carry them back through CD; keep that marker merge on the
               # controlled profile so non-110 CD does not fall into B5's Docker
               # socket path just because the previous deploy recorded image
               # truth.
               k8s/awoooi-prod/04-configmap.yaml)
                 ;;
               k8s/awoooi-prod/06-deployment-api.yaml)
                 ;;
               k8s/awoooi-prod/08-deployment-worker.yaml)
                 ;;
               k8s/awoooi-prod/10-deployment-auto-repair-canary.yaml)
                 ;;
               k8s/awoooi-prod/kustomization.yaml)
                 ;;
               product.awoooi.yaml)
                 ;;
               docs/LOGBOOK.md)
                 ;;
               docs/runbooks/REBOOT-RECOVERY-SOP.md)
                 ;;
               docs/runbooks/REBOOT-POST-START-QUICK-CHECK.md)
                 ;;
               docs/runbooks/FULL-STACK-COLD-START-SOP.md)
                 ;;
               docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md)
                 ;;
               docs/workplans/2026-06-04-reboot-cold-start-backup-recovery-workplan.md)
                 ;;
               docs/workplans/2026-07-02-commander-inserted-requirements-priority-ledger.md)
                 ;;
               docs/schemas/product_awoooi_manifest_v1.schema.json)
                 ;;
               docs/operations/product-awoooi-manifest-standard.snapshot.json)
                 ;;
               docs/operations/awoooi-priority-work-order-readback.snapshot.json)
                 ;;
               docs/operations/awooop-conversation-event-hot-path-index-apply-receipt-*.snapshot.json)
                 ;;
               docs/operations/awoooi-credential-escrow-evidence-controlled-closeout-receipt.snapshot.json)
                 ;;
               docs/operations/awoooi-reboot-auto-recovery-slo-scorecard.snapshot.json)
                 ;;
               docs/operations/awoooi-gitea-private-inventory-p0-scorecard.snapshot.json)
                 ;;
               docs/operations/awoooi-gitea-private-inventory-controlled-closeout-receipt.snapshot.json)
                 ;;
               docs/operations/awoooi-gitea-authenticated-inventory-payload-validation.snapshot.json)
                 ;;
               docs/security/GITEA-REPO-INVENTORY-SNAPSHOT.md)
                 ;;
               docs/security/gitea-repo-inventory.snapshot.json)
                 ;;
               docs/operations/p0-cicd-baseline-source-readiness.snapshot.json)
                 ;;
               docs/operations/awoooi-gitea-onboarding-warning-step-template-copy-receipt.snapshot.json)
                 ;;
               .gitea/workflows/awoooi-onboarding-warning-step.yaml)
                 ;;
               docs/operations/templates/awoooi-gitea-onboarding-warning-step.workflow.yaml)
                 ;;
               docs/operations/awoooi-production-deploy-readback-blocker.snapshot.json)
                 ;;
               docs/evaluations/backup_dr_target_inventory_2026-06-04.json)
                 ;;
               docs/evaluations/backup_dr_readiness_matrix_2026-06-04.json)
                 ;;
               docs/operations/ai-agent-log-intelligence-runtime-sample-readback.snapshot.json)
                 ;;
               apps/api/src/api/v1/agents.py)
                 ;;
               apps/api/src/api/v1/iwooos.py)
                 ;;
               apps/api/src/api/v1/webhooks.py)
                 ;;
               apps/api/src/core/config.py)
                 ;;
               apps/api/src/db/base.py)
                 ;;
               apps/api/src/services/agent_replay_normalizer.py)
                 ;;
               apps/api/src/services/ai_agent_log_intelligence_integration_readback.py)
                 ;;
               apps/api/src/services/ai_agent_log_feedback_receipt_dry_run.py)
                 ;;
               apps/api/src/services/ai_agent_log_post_write_verifier_dry_run.py)
                 ;;
               apps/api/src/services/ai_agent_log_controlled_writeback_plan_readback.py)
                 ;;
               apps/api/src/services/ai_agent_log_controlled_writeback_executor_readback.py)
                 ;;
               apps/api/src/services/ai_agent_log_controlled_writeback_dispatch.py)
                 ;;
               apps/api/src/services/ai_agent_log_controlled_writeback_consumer_readback.py)
                 ;;
               apps/api/src/services/ai_agent_log_controlled_writeback_consumer_apply.py)
                 ;;
               apps/api/src/services/ai_agent_autonomous_runtime_control.py)
                 ;;
               apps/api/src/services/ai_agent_report_truth_actionability_review.py)
                 ;;
               apps/api/src/services/awooop_ansible_audit_service.py)
                 ;;
               apps/api/src/services/awooop_ansible_check_mode_service.py)
                 ;;
               apps/api/migrations/adr090e_ansible_learning_writeback_operation_type.sql)
                 ;;
               apps/api/migrations/adr090e_ansible_learning_writeback_operation_type_down.sql)
                 ;;
               apps/api/migrations/adr090f_log_controlled_writeback_dispatch_operation_type.sql)
                 ;;
               apps/api/migrations/adr090f_log_controlled_writeback_dispatch_operation_type_down.sql)
                 ;;
               apps/api/src/services/auto_approve.py)
                 ;;
               apps/api/src/services/decision_fusion.py)
                 ;;
               apps/api/src/services/heartbeat_report_service.py)
                 ;;
               apps/api/src/services/credential_escrow_evidence_intake_readiness.py)
                 ;;
               apps/api/src/services/gitea_authenticated_inventory_payload_validation.py)
                 ;;
               apps/api/src/services/gitea_owner_coverage_attestation_validation.py)
                 ;;
               apps/api/src/services/gitea_private_inventory_closeout_validation.py)
                 ;;
               apps/api/src/services/gitea_private_inventory_p0_scorecard.py)
                 ;;
               apps/api/src/services/gitea_workflow_runner_owner_attestation_request.py)
                 ;;
               apps/api/src/services/reboot_auto_recovery_slo_scorecard.py)
                 ;;
               apps/api/src/services/reboot_auto_recovery_drill_preflight.py)
                 ;;
               apps/api/src/services/stockplatform_public_api_runtime_readback.py)
                 ;;
               apps/api/src/services/stockplatform_public_api_controlled_recovery_preflight.py)
                 ;;
               apps/api/src/services/harbor_registry_controlled_recovery_preflight.py)
                 ;;
               apps/api/src/services/harbor_registry_controlled_recovery_receipt.py)
                 ;;
               apps/api/src/services/iwooos_security_operating_system.py)
                 ;;
               apps/api/Dockerfile)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_dashboard.py)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_owner_package.py)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_owner_response_preflight.py)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_template_copy_apply_gate.py)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_template_copy_execution_plan.py)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_template_copy_receipt.py)
                 ;;
               apps/api/src/services/awoooi_gitea_onboarding_warning_step_runtime_enablement_gate.py)
                 ;;
               apps/api/src/services/awoooi_new_product_onboarding_page_model.py)
                 ;;
               apps/api/src/services/awoooi_onboarding_reminder_contract.py)
                 ;;
               apps/api/src/services/awoooi_onboarding_source_contracts.py)
                 ;;
               apps/api/src/services/awoooi_priority_work_order_readback.py)
                 ;;
               apps/api/src/services/awoooi_product_onboarding_guard.py)
                 ;;
               apps/api/src/services/p0_cicd_baseline_source_readiness.py)
                 ;;
               apps/api/src/services/product_awoooi_manifest_standard.py)
                 ;;
               apps/api/src/api/v1/platform/events.py)
                 ;;
               apps/api/src/jobs/ai_slo_watchdog_job.py)
                 ;;
               apps/api/src/models/knowledge.py)
                 ;;
               apps/api/src/models/playbook.py)
                 ;;
               apps/api/src/services/auto_repair_service.py)
                 ;;
               apps/api/src/services/awoooi_production_deploy_readback_blocker.py)
                 ;;
               apps/api/src/services/backup_dr_target_inventory.py)
                 ;;
               apps/api/src/services/backup_dr_readiness_matrix.py)
                 ;;
               apps/api/src/services/decision_manager.py)
                 ;;
               apps/api/src/services/delivery_closure_workbench.py)
                 ;;
               apps/api/src/services/platform_operator_service.py)
                 ;;
               apps/api/src/services/telegram_gateway.py)
                 ;;
               apps/api/tests/test_agent_replay_normalizer.py)
                 ;;
               apps/api/tests/test_ai_agent_log_intelligence_integration_readback_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_feedback_receipt_dry_run_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_post_write_verifier_dry_run_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_controlled_writeback_plan_readback_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_controlled_writeback_dispatch_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_controlled_writeback_consumer_readback_api.py)
                 ;;
               apps/api/tests/test_ai_agent_log_controlled_writeback_consumer_apply_api.py)
                 ;;
               apps/api/tests/test_ai_agent_autonomous_runtime_control.py)
                 ;;
               apps/api/tests/test_ai_agent_report_truth_actionability_review.py)
                 ;;
               apps/api/tests/test_ai_agent_report_truth_actionability_review_api.py)
                 ;;
               apps/api/tests/test_awooop_truth_chain_service.py)
                 ;;
               apps/api/tests/test_shadow_auto_approve.py)
                 ;;
               apps/api/tests/test_destructive_patterns.py)
                 ;;
               apps/api/tests/test_approval_pending_visibility.py)
                 ;;
               apps/api/tests/test_awooop_operator_timeline_labels.py)
                 ;;
               apps/api/tests/test_config_url_validation.py)
                 ;;
               apps/api/tests/test_delivery_closure_workbench_api.py)
                 ;;
               apps/api/tests/test_runtime_bootstrap_guards.py)
                 ;;
               apps/api/tests/test_backup_dr_target_inventory.py)
                 ;;
               apps/api/tests/test_backup_dr_target_inventory_api.py)
                 ;;
               apps/api/tests/test_backup_dr_readiness_matrix.py)
                 ;;
               apps/api/tests/test_backup_dr_readiness_matrix_api.py)
                 ;;
               apps/api/tests/test_credential_escrow_evidence_intake_readiness_api.py)
                 ;;
               apps/api/tests/test_gitea_private_inventory_p0_scorecard_api.py)
                 ;;
               apps/api/tests/test_gitea_workflow_runner_owner_attestation_request_api.py)
                 ;;
               apps/api/tests/test_reboot_auto_recovery_slo_scorecard_api.py)
                 ;;
               apps/api/tests/test_stockplatform_public_api_runtime_readback.py)
                 ;;
               apps/api/tests/test_stockplatform_public_api_controlled_recovery_preflight.py)
                 ;;
               apps/api/tests/test_harbor_registry_controlled_recovery_preflight.py)
                 ;;
               apps/api/tests/test_harbor_registry_controlled_recovery_receipt.py)
                 ;;
               apps/api/tests/test_iwooos_security_operating_system.py)
                 ;;
               apps/api/tests/test_iwooos_wazuh_prod_manifest.py)
                 ;;
               apps/api/tests/test_awoooi_production_deploy_readback_blocker.py)
                 ;;
               apps/api/tests/test_awoooi_priority_work_order_readback_api.py)
                 ;;
               apps/api/tests/e2e_network_test.py)
                 ;;
               apps/api/tests/test_p0_cicd_baseline_source_readiness_api.py)
                 ;;
               apps/api/tests/test_product_awoooi_manifest_standard_api.py)
                 ;;
               apps/api/tests/test_trust_drift_watchdog.py)
                 ;;
               apps/web/src/app/\[locale\]/governance/tabs/events-tab.tsx)
                 ;;
               apps/web/src/app/\[locale\]/governance/tabs/queue-tab.tsx)
                 ;;
               apps/web/src/app/\[locale\]/governance/tabs/slo-tab.tsx)
                 ;;
               ops/runner/read-public-gitea-actions-queue.py)
                 ;;
               ops/runner/README.md)
                 ;;
               ops/runner/check-awoooi-non110-runner-readiness.sh)
                 ;;
               ops/runner/install-awoooi-non110-runner-user-service.sh)
                 ;;
               ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh)
                 ;;
               ops/runner/test_read_public_gitea_actions_queue.py)
                 ;;
               ops/runner/test_cd_controlled_runtime_profile.py)
                 ;;
               ops/runner/test_check_awoooi_non110_runner_readiness.py)
                 ;;
               ops/runner/test_install_awoooi_non110_runner_user_service.py)
                 ;;
               ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py)
                 ;;
               ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py)
                 ;;
               ops/runner/test_verify_awoooi_non110_cd_closure.py)
                 ;;
               ops/runner/awoooi-cd-lane-drain.service)
                 ;;
               ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh)
                 ;;
               ops/runner/verify-awoooi-non110-cd-closure.py)
                 ;;
               docs/operations/host-cpu-pressure-drain-readback-2026-07-01.snapshot.json)
                 ;;
               docs/operations/post-reboot-runtime-recovery-readback-2026-07-01.snapshot.json)
                 ;;
               ops/reboot-recovery/full-stack-cold-start-baseline.yml)
                 ;;
               ops/monitoring/alerts-unified.yml)
                 ;;
               ops/monitoring/alerts.yml)
                 ;;
               scripts/dev/awoooi-navigation-coverage-guard.py)
                 ;;
               scripts/ci/wait-host-web-build-pressure.sh)
                 ;;
               # 2026-07-01 Codex: backup freshness/readback scripts are
               # covered by shell syntax checks and focused exporter tests.
               # Keep them off B5 so a metadata/script recovery patch does not
               # require the host Docker socket.
               scripts/backup/backup-awoooi-frequent.sh)
                 ;;
               scripts/backup/backup-status.sh)
                 ;;
               scripts/backup/gitea-repo-bundle-backup.sh)
                 ;;
               scripts/backup/tests/test_backup_status_contract.py)
                 ;;
               scripts/ops/backup-alert-label-contract-check.py)
                 ;;
               scripts/ops/backup-health-textfile-exporter.py)
                 ;;
               scripts/ops/docker-disk-pressure-retention-cleanup.py)
                 ;;
               scripts/ops/gitea-queue-hook-backlog-playbook.py)
                 ;;
               scripts/ops/host-runaway-process-exporter.py)
                 ;;
               scripts/ops/host-sustained-load-controller.py)
                 ;;
               scripts/ops/host-sustained-load-evidence.py)
                 ;;
               scripts/ops/tests/test_backup_health_textfile_exporter.py)
                 ;;
               scripts/ops/tests/test_docker_disk_pressure_retention_cleanup.py)
                 ;;
               scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py)
                 ;;
               scripts/ops/tests/test_host_runaway_process_exporter.py)
                 ;;
               scripts/ops/tests/test_host_pressure_alert_contract.py)
                 ;;
               scripts/reboot-recovery/deploy-to-110.sh)
                 ;;
               scripts/reboot-recovery/deploy-to-188.sh)
                 ;;
               scripts/reboot-recovery/enforce-110-runner-failclosed.sh)
                 ;;
               scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh)
                 ;;
               scripts/reboot-recovery/apply-credential-escrow-closeout-receipt-to-110.sh)
                 ;;
               scripts/reboot-recovery/awoooi-startup.sh)
                 ;;
               scripts/reboot-recovery/awoooi-startup.service)
                 ;;
               scripts/reboot-recovery/dr-escrow-evidence-checklist.py)
                 ;;
               scripts/reboot-recovery/apply-credential-escrow-closeout-receipt-to-110.sh)
                 ;;
               scripts/reboot-recovery/tests/test_dr_escrow_evidence_checklist.py)
                 ;;
               scripts/reboot-recovery/post-reboot-owner-response-preflight.py)
                 ;;
               scripts/reboot-recovery/post-start-quick-check.sh)
                 ;;
               scripts/reboot-recovery/reboot-recovery-readiness-audit.sh)
                 ;;
               scripts/reboot-recovery/188-host-hygiene-maintenance-checklist.sh)
                 ;;
               scripts/reboot-recovery/full-stack-cold-start-check.sh)
                 ;;
               scripts/reboot-recovery/cold-start-textfile-exporter.sh)
                 ;;
               scripts/reboot-recovery/install-cold-start-monitor-110.sh)
                 ;;
               scripts/reboot-recovery/momo-drive-token-source-recovery-preflight.sh)
                 ;;
               scripts/reboot-recovery/momo-source-arrival-gate.py)
                 ;;
               scripts/reboot-recovery/full-stack-recovery-scorecard.sh)
                 ;;
               scripts/reboot-recovery/harbor-watchdog.sh)
                 ;;
               scripts/reboot-recovery/awoooi-startup-110.sh)
                 ;;
               scripts/reboot-recovery/diagnose-110-ssh-publickey-auth.sh)
                 ;;
               scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh)
                 ;;
               scripts/reboot-recovery/verify-cold-start-monitor-deploy.sh)
                 ;;
               scripts/reboot-recovery/tests/test_dr_escrow_evidence_checklist.py)
                 ;;
               scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py)
                 ;;
               scripts/reboot-recovery/tests/test_momo_source_arrival_gate.py)
                 ;;
               scripts/reboot-recovery/awoooi-reboot-auto-recovery-slo.service)
                 ;;
               scripts/reboot-recovery/awoooi-reboot-auto-recovery-slo.timer)
                 ;;
               scripts/reboot-recovery/install-reboot-auto-recovery-slo-110.sh)
                 ;;
               scripts/reboot-recovery/reboot-auto-recovery-host-probe.sh)
                 ;;
               scripts/reboot-recovery/reboot-auto-recovery-slo-exporter.sh)
                 ;;
               scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py)
                 ;;
               scripts/reboot-recovery/windows99-vmware-autostart.ps1)
                 ;;
               scripts/reboot-recovery/windows99-management-channel-probe.py)
                 ;;
               scripts/reboot-recovery/tests/test_reboot_auto_recovery_slo_installer.py)
                 ;;
               scripts/reboot-recovery/tests/test_reboot_auto_recovery_slo_scorecard.py)
                 ;;
               scripts/reboot-recovery/tests/test_188_host_hygiene_checklist.py)
                 ;;
               scripts/reboot-recovery/tests/test_post_start_quick_check_contract.py)
                 ;;
               scripts/reboot-recovery/tests/test_reboot_p0_operational_contract.py)
                 ;;
               scripts/reboot-recovery/tests/test_harbor_watchdog_contract.py)
                 ;;
               scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py)
                 ;;
               scripts/security/gitea-private-inventory-p0-scorecard.py)
                 ;;
               scripts/security/gitea-authenticated-inventory-payload-validator.py)
                 ;;
               scripts/security/tests/test_gitea_private_inventory_p0_scorecard.py)
                 ;;
               scripts/security/tests/test_gitea_authenticated_inventory_payload_validator.py)
                 ;;
               *)
                 CONTROLLED_RUNTIME_TEST_PROFILE=0
                 ;;
             esac
           done <<EOF
           $CHANGED_FILES
           EOF
           if [ "$CONTROLLED_RUNTIME_TEST_PROFILE" = "1" ]; then
             export AWOOOI_CD_TEST_PROFILE=controlled-runtime
             echo "AWOOOI_CD_TEST_PROFILE=controlled-runtime" >> "$GITHUB_ENV"
             echo "✅ controlled-runtime API test profile selected"
           else
             export AWOOOI_CD_TEST_PROFILE=full
             echo "AWOOOI_CD_TEST_PROFILE=full" >> "$GITHUB_ENV"
             echo "✅ full API test profile selected"
           fi
           printf '%s\n' "$AWOOOI_CD_TEST_PROFILE" > .awoooi-cd-test-profile
           cat > /tmp/awoooi-api-tests.sh <<'CI_SCRIPT'
           VENV=/opt/api-venv
           HASH_FILE=/opt/api-venv/.deps_hash
           CURRENT_HASH=$(md5sum apps/api/pyproject.toml | awk '{print $1}')
           # python3.11 是 runner 層級持久安裝，只在首次或版本消失時才 apt-get
           # 2026-04-05 Claude Code: 分離 apt-get 與 venv hash-guard，避免每次 deps 變更都重跑 apt
           # 2026-04-16 ogt + Claude Sonnet 4.6: 修復 apt index 失敗 → 改用 --fix-missing + retry
           if ! command -v python3.11 &>/dev/null; then
             echo "📦 安裝 python3.11..."
             apt-get clean && rm -rf /var/lib/apt/lists/*
             apt-get update -q --fix-missing || apt-get update -q || true
             apt-get install -y -q python3.11-venv python3.11 || \
               (add-apt-repository ppa:deadsnakes/python -y 2>/dev/null && apt-get update -q && apt-get install -y -q python3.11-venv python3.11) || true
           else
             echo "⚡ python3.11 已安裝，跳過 apt-get"
           fi
           # 確保 python3.11 存在，否則 fallback 到系統 python3
           if ! command -v python3.11 &>/dev/null; then
             echo "⚠️ python3.11 安裝失敗，使用 python3 fallback"
             ln -sf "$(which python3)" /usr/local/bin/python3.11 || true
           fi
           if [ ! -d "$VENV/bin" ] || [ "$(cat $HASH_FILE 2>/dev/null)" != "$CURRENT_HASH" ]; then
             echo "📦 deps 已變更，重建 venv..."
             # 2026-04-17 ogt: /opt/api-venv 是 volume mount，不能 rm -rf 目錄本身
             # 改用 find 清空內容，保留 mount point 目錄
             find "$VENV" -mindepth 1 -delete 2>/dev/null || true
             python3.11 -m venv $VENV
             source $VENV/bin/activate
             pip install -q uv
             cd apps/api && uv pip install -q -e ".[dev]" && cd -
             echo "$CURRENT_HASH" > $HASH_FILE
           else
             echo "⚡ 使用快取 venv (deps 未變更)"
             source $VENV/bin/activate
           fi
           cd apps/api
           cleanup_pytest_workspace_cache() {
             # 2026-05-19 Codex: CI image runs as root against a bind-mounted
             # checkout. Remove Python cache artifacts before act-runner cleanup
             # so successful jobs do not end with root-owned __pycache__ noise.
             find tests src -type d -name __pycache__ -prune -exec rm -rf {} + 2>/dev/null || true
             rm -rf .pytest_cache 2>/dev/null || true
           }
           # CI 排除需外部服務的測試 (Redis pool / Ollama — 2026-04-01 Claude Code)
           # 2026-04-05 Claude Code: 修正 exit code — | tail 會吃掉 segfault (exit 139)
           #   改用 tee + PIPESTATUS[0] 正確捕捉 pytest 本身的 exit code
           # 2026-04-05 Claude Code: 加 --ignore=tests/integration 排除需 asyncpg 連線的 DB 測試
           #   integration tests 在 prod K8s 部署後由 E2E Smoke Test 覆蓋
           # PYTHONFAULTHANDLER=1: 若 C extension segfault，輸出完整 Python stacktrace
           # 2026-04-05 Claude Code: test_github_webhook.py 已根治
           #   原問題: import src.main → asyncpg C ext segfault (exit 139)
           #   修復: 改用最小化 app，只掛載 github_webhook router，不走 DB import chain
           #   現在可安全加入 CI 測試
           # 2026-04-22 ogt: DATABASE_URL 改為必填後，單元測試需要此 env var 讓 Settings 通過驗證
           # 單元測試不連 DB，此 CI placeholder 僅供 Pydantic 驗證，不產生真實連線
           if [ "${AWOOOI_CD_TEST_PROFILE:-full}" = "controlled-runtime" ]; then
             echo "✅ controlled-runtime profile: running focused replay/auto-approve/copy tests"
             python3.11 -m py_compile \
               src/core/config.py \
               src/db/base.py \
               src/api/v1/platform/events.py \
               src/api/v1/agents.py \
               src/api/v1/iwooos.py \
               src/api/v1/webhooks.py \
               src/jobs/ai_slo_watchdog_job.py \
               src/models/knowledge.py \
               src/models/playbook.py \
               src/services/awoooi_production_deploy_readback_blocker.py \
               src/services/agent_replay_normalizer.py \
               src/services/ai_agent_log_intelligence_integration_readback.py \
               src/services/ai_agent_log_feedback_receipt_dry_run.py \
               src/services/ai_agent_log_post_write_verifier_dry_run.py \
               src/services/ai_agent_log_controlled_writeback_plan_readback.py \
               src/services/ai_agent_log_controlled_writeback_executor_readback.py \
               src/services/ai_agent_log_controlled_writeback_dispatch.py \
               src/services/ai_agent_log_controlled_writeback_consumer_readback.py \
               src/services/ai_agent_log_controlled_writeback_consumer_apply.py \
               src/services/ai_agent_autonomous_runtime_control.py \
               src/services/awooop_ansible_audit_service.py \
               src/services/awooop_ansible_check_mode_service.py \
               src/services/auto_repair_service.py \
               src/services/auto_approve.py \
               src/services/backup_dr_target_inventory.py \
               src/services/backup_dr_readiness_matrix.py \
               src/services/decision_fusion.py \
               src/services/delivery_closure_workbench.py \
               src/services/heartbeat_report_service.py \
               src/services/credential_escrow_evidence_intake_readiness.py \
               src/services/gitea_authenticated_inventory_payload_validation.py \
               src/services/gitea_owner_coverage_attestation_validation.py \
               src/services/gitea_private_inventory_closeout_validation.py \
               src/services/gitea_private_inventory_p0_scorecard.py \
               src/services/gitea_workflow_runner_owner_attestation_request.py \
               src/services/reboot_auto_recovery_slo_scorecard.py \
               src/services/reboot_auto_recovery_drill_preflight.py \
               src/services/stockplatform_public_api_runtime_readback.py \
               src/services/stockplatform_public_api_controlled_recovery_preflight.py \
               src/services/harbor_registry_controlled_recovery_preflight.py \
               src/services/harbor_registry_controlled_recovery_receipt.py \
               src/services/iwooos_security_operating_system.py \
               src/services/awoooi_gitea_onboarding_warning_step_dashboard.py \
               src/services/awoooi_gitea_onboarding_warning_step_owner_package.py \
               src/services/awoooi_gitea_onboarding_warning_step_owner_response_preflight.py \
               src/services/awoooi_gitea_onboarding_warning_step_template_copy_apply_gate.py \
               src/services/awoooi_gitea_onboarding_warning_step_template_copy_execution_plan.py \
               src/services/awoooi_gitea_onboarding_warning_step_template_copy_receipt.py \
               src/services/awoooi_gitea_onboarding_warning_step_runtime_enablement_gate.py \
               src/services/awoooi_new_product_onboarding_page_model.py \
               src/services/awoooi_onboarding_reminder_contract.py \
               src/services/awoooi_onboarding_source_contracts.py \
               src/services/awoooi_priority_work_order_readback.py \
               src/services/awoooi_product_onboarding_guard.py \
               src/services/p0_cicd_baseline_source_readiness.py \
               src/services/product_awoooi_manifest_standard.py \
               src/services/platform_operator_service.py \
               src/services/telegram_gateway.py
             python3.11 -m py_compile \
               ../../scripts/reboot-recovery/dr-escrow-evidence-checklist.py \
               ../../scripts/reboot-recovery/post-reboot-owner-response-preflight.py \
               ../../scripts/reboot-recovery/momo-source-arrival-gate.py \
               ../../scripts/reboot-recovery/reboot-auto-recovery-slo-scorecard.py \
               ../../scripts/reboot-recovery/windows99-management-channel-probe.py \
               ../../scripts/ops/backup-alert-label-contract-check.py \
               ../../scripts/ops/backup-health-textfile-exporter.py \
               ../../scripts/ops/docker-disk-pressure-retention-cleanup.py \
               ../../scripts/ops/gitea-queue-hook-backlog-playbook.py \
               ../../scripts/ops/host-runaway-process-exporter.py \
               ../../scripts/ops/host-sustained-load-controller.py \
               ../../scripts/ops/host-sustained-load-evidence.py \
               ../../scripts/security/gitea-private-inventory-p0-scorecard.py \
               ../../scripts/security/gitea-authenticated-inventory-payload-validator.py
             python3.11 -c "import yaml; yaml.safe_load(open('../../ops/monitoring/alerts-unified.yml')); print('alerts-unified YAML OK')"
             python3.11 -c "import yaml; yaml.safe_load(open('../../ops/monitoring/alerts.yml')); print('alerts YAML OK')"
             python3.11 -c "import yaml; yaml.safe_load(open('../../ops/reboot-recovery/full-stack-cold-start-baseline.yml')); print('full-stack-cold-start-baseline YAML OK')"
             bash -n \
               ../../ops/runner/check-awoooi-110-controlled-cd-lane-readiness.sh \
               ../../ops/runner/check-awoooi-non110-runner-readiness.sh \
               ../../ops/runner/install-awoooi-non110-runner-user-service.sh \
               ../../ops/runner/register-awoooi-110-controlled-cd-lane-drain.sh \
               ../../scripts/reboot-recovery/deploy-to-110.sh \
               ../../scripts/reboot-recovery/deploy-to-188.sh \
               ../../scripts/reboot-recovery/enforce-110-runner-failclosed.sh \
               ../../scripts/reboot-recovery/recover-110-control-path-and-harbor-local.sh \
               ../../scripts/reboot-recovery/awoooi-startup.sh \
               ../../scripts/reboot-recovery/install-reboot-auto-recovery-slo-110.sh \
               ../../scripts/reboot-recovery/reboot-auto-recovery-host-probe.sh \
               ../../scripts/reboot-recovery/reboot-auto-recovery-slo-exporter.sh \
               ../../scripts/reboot-recovery/post-start-quick-check.sh \
               ../../scripts/reboot-recovery/188-host-hygiene-maintenance-checklist.sh \
               ../../scripts/reboot-recovery/full-stack-cold-start-check.sh \
               ../../scripts/reboot-recovery/cold-start-textfile-exporter.sh \
               ../../scripts/reboot-recovery/install-cold-start-monitor-110.sh \
               ../../scripts/reboot-recovery/momo-drive-token-source-recovery-preflight.sh \
               ../../scripts/reboot-recovery/full-stack-recovery-scorecard.sh \
               ../../scripts/reboot-recovery/harbor-watchdog.sh \
               ../../scripts/reboot-recovery/awoooi-startup-110.sh \
               ../../scripts/reboot-recovery/diagnose-110-ssh-publickey-auth.sh \
               ../../scripts/reboot-recovery/repair-110-ssh-publickey-auth-local.sh \
               ../../scripts/reboot-recovery/verify-cold-start-monitor-deploy.sh \
               ../../scripts/reboot-recovery/apply-credential-escrow-closeout-receipt-to-110.sh \
               ../../scripts/backup/backup-awoooi-frequent.sh \
               ../../scripts/backup/backup-status.sh \
               ../../scripts/backup/gitea-repo-bundle-backup.sh
             bash -n ../../scripts/reboot-recovery/apply-credential-escrow-closeout-receipt-to-110.sh
             DATABASE_URL="${DATABASE_URL:-postgresql+asyncpg://ci:ci@localhost/ci}" \
             PYTHONFAULTHANDLER=1 python3.11 -m pytest \
               tests/test_agent_replay_normalizer.py \
               tests/test_ai_agent_log_intelligence_integration_readback_api.py \
               tests/test_ai_agent_log_feedback_receipt_dry_run_api.py \
               tests/test_ai_agent_log_post_write_verifier_dry_run_api.py \
               tests/test_ai_agent_log_controlled_writeback_plan_readback_api.py \
               tests/test_ai_agent_log_controlled_writeback_executor_readback_api.py \
               tests/test_ai_agent_log_controlled_writeback_dispatch_api.py \
               tests/test_ai_agent_log_controlled_writeback_consumer_readback_api.py \
               tests/test_ai_agent_log_controlled_writeback_consumer_apply_api.py \
               tests/test_ai_agent_autonomous_runtime_control.py \
               tests/test_awooop_truth_chain_service.py \
               tests/test_shadow_auto_approve.py \
               tests/test_destructive_patterns.py \
               tests/test_approval_pending_visibility.py \
               tests/test_awooop_operator_timeline_labels.py::test_outbound_timeline_title_labels_runbook_review \
               tests/test_config_url_validation.py \
               tests/test_delivery_closure_workbench_api.py \
               tests/test_runtime_bootstrap_guards.py \
               tests/test_backup_dr_target_inventory.py \
               tests/test_backup_dr_target_inventory_api.py \
               tests/test_backup_dr_readiness_matrix.py \
               tests/test_backup_dr_readiness_matrix_api.py \
               tests/test_credential_escrow_evidence_intake_readiness_api.py \
               tests/test_gitea_private_inventory_p0_scorecard_api.py \
               tests/test_gitea_workflow_runner_owner_attestation_request_api.py \
               tests/test_reboot_auto_recovery_slo_scorecard_api.py \
               tests/test_stockplatform_public_api_runtime_readback.py \
               tests/test_stockplatform_public_api_controlled_recovery_preflight.py \
               tests/test_harbor_registry_controlled_recovery_preflight.py \
               tests/test_harbor_registry_controlled_recovery_receipt.py \
               tests/test_iwooos_security_operating_system.py \
               tests/test_awoooi_production_deploy_readback_blocker.py \
               tests/test_awoooi_priority_work_order_readback_api.py \
               tests/e2e_network_test.py::TestHMACVerification::test_valid_hmac_signature \
               tests/test_p0_cicd_baseline_source_readiness_api.py \
               tests/test_product_awoooi_manifest_standard_api.py \
               tests/test_trust_drift_watchdog.py \
               ../../ops/runner/test_read_public_gitea_actions_queue.py \
               ../../ops/runner/test_cd_controlled_runtime_profile.py \
               ../../ops/runner/test_check_awoooi_non110_runner_readiness.py \
               ../../ops/runner/test_install_awoooi_non110_runner_user_service.py \
               ../../ops/runner/test_register_awoooi_110_controlled_cd_lane_drain.py \
               ../../ops/runner/test_check_awoooi_110_controlled_cd_lane_readiness.py \
               ../../ops/runner/test_verify_awoooi_non110_cd_closure.py \
               ../../scripts/backup/tests/test_backup_status_contract.py \
               ../../scripts/ops/tests/test_backup_health_textfile_exporter.py \
               ../../scripts/ops/tests/test_docker_disk_pressure_retention_cleanup.py \
               ../../scripts/ops/tests/test_gitea_queue_hook_backlog_playbook.py \
               ../../scripts/ops/tests/test_host_runaway_process_exporter.py \
               ../../scripts/ops/tests/test_host_pressure_alert_contract.py \
               ../../scripts/reboot-recovery/tests/test_dr_escrow_evidence_checklist.py \
               ../../scripts/reboot-recovery/tests/test_cold_start_monitor_bounded_probes.py \
               ../../scripts/reboot-recovery/tests/test_momo_source_arrival_gate.py \
               ../../scripts/reboot-recovery/tests/test_reboot_auto_recovery_slo_installer.py \
               ../../scripts/reboot-recovery/tests/test_reboot_auto_recovery_slo_scorecard.py \
               ../../scripts/reboot-recovery/tests/test_188_host_hygiene_checklist.py \
               ../../scripts/reboot-recovery/tests/test_post_start_quick_check_contract.py \
               ../../scripts/reboot-recovery/tests/test_reboot_p0_operational_contract.py \
               ../../scripts/reboot-recovery/tests/test_harbor_watchdog_contract.py \
               ../../scripts/reboot-recovery/tests/test_recover_110_control_path_and_harbor_local.py \
               ../../scripts/security/tests/test_gitea_private_inventory_p0_scorecard.py \
               ../../scripts/security/tests/test_gitea_authenticated_inventory_payload_validator.py \
               -v --tb=short -x -p no:cacheprovider \
 >&1 | tee /tmp/pytest-output.txt; PYTEST_EXIT=${PIPESTATUS[0]}
           else
             DATABASE_URL="${DATABASE_URL:-postgresql+asyncpg://ci:ci@localhost/ci}" \
             PYTHONFAULTHANDLER=1 python3.11 -m pytest tests/ -v --tb=short -x -p no:cacheprovider \
               --ignore=tests/integration \
               --ignore=tests/test_anomaly_counter.py \
               --ignore=tests/test_global_repair_cooldown.py \
               --ignore=tests/test_redis_multisig.py \
               --ignore=tests/test_model_regression.py \
               --ignore=tests/test_prompt_validation.py \
               --ignore=tests/e2e_network_test.py \
 >&1 | tee /tmp/pytest-output.txt; PYTEST_EXIT=${PIPESTATUS[0]}
           fi
           tail -60 /tmp/pytest-output.txt
           cleanup_pytest_workspace_cache
           exit $PYTEST_EXIT
           CI_SCRIPT
           docker run --rm \
             --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-api-tests" \
             -e AWOOOI_CD_TEST_PROFILE="${AWOOOI_CD_TEST_PROFILE:-full}" \
             --cpus "2.0" \
             --memory "6g" \
             --memory-swap "8g" \
             -v "$PWD:/workspace" \
             -v /tmp/awoooi-api-tests.sh:/tmp/awoooi-api-tests.sh:ro \
             -v awoooi-api-venv-cache:/opt/api-venv \
             -w /workspace \
             "${{ env.CI_IMAGE }}" \
             bash /tmp/awoooi-api-tests.sh
       # ── 整合測試 B5 (2026-04-10) ──────────────────────────────────────────
       # B5 整合測試 — postgres-test 由 services: 提供，localhost:15432 直連
       # 2026-04-10 Claude Sonnet 4.6: 用 psql 直連 localhost:15432 初始化 schema
       #   (docker exec 在 act runner 內無法取得 service container name)
       # B5: Gitea act runner 的 services: 實作與 GitHub Actions 不同
       # service container 啟動後需直連，但 act 的 container name 可能為空
       # 2026-04-10 ogt: 改用 docker run 本地啟動取代 services: 宣告
       # 2026-04-19 ogt + Claude Opus 4.7: cd 連續 2 次 fail (run 984/985)
       #   真因: act runner 把 ci-runner 跑在獨立 user-defined network,
       #         pg-test-b5 預設用 host bridge → 兩邊隔離無法連 (172.17.0.2 timeout)
       #   修法: 把 pg-test-b5 加入 act task 的 network,用 container name 連線
       - name: Integration Tests (B5 — 真實 DB)
         run: |
           if [ -z "${AWOOOI_CD_TEST_PROFILE:-}" ] && [ -f .awoooi-cd-test-profile ]; then
             AWOOOI_CD_TEST_PROFILE="$(tr -d '\r\n' < .awoooi-cd-test-profile)"
             export AWOOOI_CD_TEST_PROFILE
           fi
           echo "B5 effective test profile=${AWOOOI_CD_TEST_PROFILE:-full}"
           if [ "${AWOOOI_CD_TEST_PROFILE:-full}" = "controlled-runtime" ]; then
             echo "✅ controlled-runtime profile: B5 DB integration unchanged; skipping B5 for this narrow release lane"
             exit 0
           fi
           cat > /tmp/awoooi-b5-tests.sh <<'CI_SCRIPT'
           set -euo pipefail
           cd apps/api
           # 安裝 psql client
           if ! command -v psql &>/dev/null; then
             apt-get install -y -q postgresql-client
           fi
           if ! docker info >/dev/null 2>&1; then
             echo "BLOCKER b5_docker_socket_unavailable"
             echo "NEXT_ACTION ensure_b5_ci_container_runs_with_docker_socket_permission_then_retry_cd"
             exit 65
           fi
           # 2026-04-19 ogt + Claude Opus 4.7 v3: 主動創 shared network
           # 之前 grep ACT_NET 在 c0f3509 run 沒 match → fallback bridge → container name DNS 失效
           # 真因: default bridge 不支援 container name DNS,必須 user-defined network
           # 修法: 主動建 'b5-test-net' (idempotent),ci-runner + pg-test-b5 都加入
           B5_NET="b5-test-net"
           docker network create "$B5_NET" 2>/dev/null || true
           # 當前 ci-runner container (hostname == short container id) 連上此 network
           # 若已連 → docker network connect 回 error 1,用 || true 吞掉
           docker network connect "$B5_NET" "$HOSTNAME" 2>/dev/null || true
           echo "B5 shared network: $B5_NET (ci-runner hostname: $HOSTNAME)"
           # 啟動測試 DB 於 shared network,用 container name 'pg-test-b5' 連線
           docker rm -f pg-test-b5 2>/dev/null || true
           docker run -d --name pg-test-b5 \
             --network="$B5_NET" \
             -e POSTGRES_DB=awoooi_test \
             -e POSTGRES_USER=awoooi \
             -e POSTGRES_PASSWORD=awoooi_test_2026 \
             pgvector/pgvector:pg16
           # 等待就緒（用 container name,最多 60 秒）
           B5_DB_READY=0
           for i in $(seq 1 30); do
             if PGPASSWORD=awoooi_test_2026 pg_isready -h pg-test-b5 -p 5432 -U awoooi; then
               B5_DB_READY=1
               break
             fi
             sleep 2
           done
           if [ "$B5_DB_READY" != "1" ]; then
             echo "BLOCKER b5_pg_test_container_not_ready"
             echo "NEXT_ACTION inspect_b5_test_network_and_docker_socket_then_retry_cd"
             docker ps --filter name=pg-test-b5 --format 'b5_container={{.Names}} status={{.Status}}' || true
             exit 66
           fi
           # 初始化 schema
           PGPASSWORD=awoooi_test_2026 psql \
             -h pg-test-b5 -p 5432 -U awoooi -d awoooi_test \
             -f tests/integration/setup_test_schema.sql
           # 跑測試
           # B5 整合測試嚴格模式 (2026-04-13 ogt: 恢復 Break-Glass 移除)
           # -m integration: override pyproject.toml addopts "-m 'not integration'"，讓標記測試可執行
           # 2026-04-22 ogt: DATABASE_URL 改為必填後，import chain 需要此 env var 讓 Settings 通過驗證
           DATABASE_URL="postgresql+asyncpg://awoooi:awoooi_test_2026@pg-test-b5:5432/awoooi_test?ssl=disable" \
           TEST_DATABASE_URL="postgresql+asyncpg://awoooi:awoooi_test_2026@pg-test-b5:5432/awoooi_test?ssl=disable" \
             /opt/api-venv/bin/pytest tests/integration/test_b5_core_flows.py -v --tb=short -m integration -p no:cacheprovider || PYTEST_EXIT=$?
           # 清理
           docker rm -f pg-test-b5 || true
           # 2026-05-20 Codex: B5 imports shared tests helpers, so cleanup the
           # whole tests tree to avoid root-owned __pycache__ act-runner noise.
           find tests src -type d -name __pycache__ -prune -exec rm -rf {} + 2>/dev/null || true
           rm -rf .pytest_cache 2>/dev/null || true
           exit "${PYTEST_EXIT:-0}"
           CI_SCRIPT
           docker run --rm \
             --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-b5-tests" \
             --user 0:0 \
             --cpus "2.0" \
             --memory "2g" \
             -v "$PWD:/workspace" \
             -v /tmp/awoooi-b5-tests.sh:/tmp/awoooi-b5-tests.sh:ro \
             -v /var/run/docker.sock:/var/run/docker.sock \
             -v awoooi-api-venv-cache:/opt/api-venv \
             -w /workspace \
             "${{ env.CI_IMAGE }}" \
             bash /tmp/awoooi-b5-tests.sh
       - name: Clean Test Workspace Artifacts
         if: always()
         env:
           HOST_RUNNER_CLEANUP_IMAGE: ${{ env.CI_IMAGE }}
         run: bash scripts/ci/cleanup-host-runner-workspace.sh
       - name: Notify Pipeline Failure
         # 2026-04-30 Codex: tests job failure notifier; no jq dependency for host parity.
         if: failure()
         env:
           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
         run: |
           COMMIT_MSG="${{ steps.commit.outputs.message }}"
           SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
           ACTOR="${{ github.actor }}"
           COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
           MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🧪 Stage: tests\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
           if AWOOI_CICD_STATUS=failed \
             AWOOI_CICD_STAGE=tests \
             AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD tests failure notification mirrored through AWOOI API"
           else
             echo "AWOOI API notify failed; direct Telegram fallback disabled to preserve AwoooP receipt chain"
           fi
   build-and-deploy:
     # 2026-06-28 Codex: keep CD-generated `[skip ci]` deploy commits and
     # `cancel-stale-cd` queue-cleaning commits from re-entering build/deploy.
     # 2026-07-01 Codex: metadata-only controlled-runtime fixes already run the
     # focused tests above; do not spend the Docker build lock or deploy marker
     # when no app image can change.
     if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, 'cancel-stale-cd') && !contains(github.event.head_commit.message, '[metadata-only]')) }}
     # 2026-04-30 Codex: Docker builds run on the host runner. Long docker build
     # steps were killing the transient act job container with RWLayer=nil.
     needs: [tests]
     timeout-minutes: 60
     runs-on: awoooi-non110-host
     steps:
       - name: Bootstrap Host Runner Tools
         # 2026-05-05 Codex: keep the host-mode runner self-healing before
         # actions/checkout@v4 and Telegram failure notifications run.
         run: |
           if command -v apk >/dev/null 2>&1; then
             apk add --no-cache nodejs npm git curl bash coreutils python3 openssh-client docker-cli docker-cli-buildx
           fi
       - uses: actions/checkout@v4
       - name: Wait for Host Web Build Pressure
         # 2026-06-27 Codex: post-deploy smoke is also browser-heavy. Refuse to
         # add another smoke run while active CI/build/smoke pressure is present.
         run: bash scripts/ci/wait-host-web-build-pressure.sh
       - name: Get Commit Info
         id: commit
         run: |
           echo "short_sha=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
           echo "message=$(git log -1 --pretty=%s | head -c 50)" >> $GITHUB_OUTPUT
           echo "start_time=$(date +%s)" >> $GITHUB_OUTPUT
       - name: Notify Build Deploy Start
         run: |
           ACTOR="${{ github.actor }}"
           if AWOOI_CICD_STATUS=running \
             AWOOI_CICD_STAGE=build-and-deploy \
             AWOOI_CICD_JOB_NAME="AWOOOI 建置部署開始" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_SUMMARY="${{ steps.commit.outputs.message }}" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD build-deploy start notification mirrored through AWOOI API"
           else
             echo "⚠️ CI/CD build-deploy start notification failed (non-fatal)"
           fi
       - name: Login to Harbor
         env:
           HARBOR_PASSWORD: ${{ secrets.HARBOR_PASSWORD }}
           HARBOR_USERNAME: ${{ secrets.HARBOR_USERNAME }}
         run: |
           HARBOR_REGISTRY="${{ env.HARBOR }}"
           LOGIN_ATTEMPTS="${HARBOR_LOGIN_ATTEMPTS:-3}"
           LOGIN_SLEEP_SECONDS="${HARBOR_LOGIN_SLEEP_SECONDS:-5}"
           WATCHDOG="/usr/local/bin/harbor-watchdog.sh"
           HARBOR_110_NODE_EXPORTER_URL="${HARBOR_110_NODE_EXPORTER_URL:-http://192.168.0.110:9100/metrics}"
           HARBOR_110_MAX_LOAD5_PER_CORE="${HARBOR_110_MAX_LOAD5_PER_CORE:-1.25}"
           HARBOR_110_MAX_GITEA_CPU_CORES="${HARBOR_110_MAX_GITEA_CPU_CORES:-3.0}"
           probe_registry_v2() {
             curl --silent --show-error \
               --output /dev/null \
               --write-out "%{http_code}" \
               --max-time 10 \
               "https://${HARBOR_REGISTRY}/v2/" || true
           }
           host_has_110_ip() {
             command -v ip >/dev/null 2>&1 || return 1
             ip -o -4 addr show 2>/dev/null | grep -q " 192.168.0.110/"
           }
           run_watchdog() {
             if [ "$(id -u)" = "0" ]; then
               "${WATCHDOG}" "$@"
             else
               sudo -n "${WATCHDOG}" "$@"
             fi
           }
           greater_than() {
             awk -v left="$1" -v right="$2" 'BEGIN { exit !(left > right) }'
           }
           preflight_110_capacity() {
             metrics="$(curl --silent --show-error --max-time 5 "${HARBOR_110_NODE_EXPORTER_URL}" || true)"
             if [ -z "${metrics}" ]; then
               echo "harbor_110_capacity_metrics_unavailable=1"
               return 0
             fi
             load5_per_core="$(printf '%s\n' "${metrics}" | awk '$1 ~ /^awoooi_host_load5_per_core\\{/ && $0 ~ /host=\"110\"/ { value = $NF } END { print value }')"
             gitea_cpu_cores="$(printf '%s\n' "${metrics}" | awk '$1 ~ /^docker_container_cpu_cores\\{/ && $0 ~ /container_name=\"gitea\"/ { value = $NF } END { print value }')"
             if [ -n "${load5_per_core}" ]; then
               echo "harbor_110_load5_per_core=${load5_per_core}"
               if greater_than "${load5_per_core}" "${HARBOR_110_MAX_LOAD5_PER_CORE}"; then
                 echo "BLOCKER harbor_110_host_pressure_high load5_per_core=${load5_per_core} threshold=${HARBOR_110_MAX_LOAD5_PER_CORE}"
                 return 1
               fi
             fi
             if [ -n "${gitea_cpu_cores}" ]; then
               echo "harbor_110_gitea_cpu_cores=${gitea_cpu_cores}"
               if greater_than "${gitea_cpu_cores}" "${HARBOR_110_MAX_GITEA_CPU_CORES}"; then
                 echo "BLOCKER harbor_110_gitea_cpu_saturated cpu_cores=${gitea_cpu_cores} threshold=${HARBOR_110_MAX_GITEA_CPU_CORES}"
                 return 1
               fi
             fi
             return 0
           }
           controlled_harbor_repair_once() {
             if [ "${AWOOOI_CD_HARBOR_CONTROLLED_REPAIR:-1}" != "1" ]; then
               echo "harbor_controlled_repair_skipped=disabled"
               return 1
             fi
             if ! host_has_110_ip; then
               echo "harbor_controlled_repair_skipped=not_110_host"
               return 1
             fi
             if [ ! -x "${WATCHDOG}" ]; then
               echo "harbor_controlled_repair_skipped=watchdog_missing"
               return 1
             fi
             echo "harbor_controlled_repair_check_start=1"
             set +e
             check_output="$(run_watchdog --check 2>&1)"
             check_rc=$?
             set -e
             printf '%s\n' "${check_output}" | sed -n '1,80p'
             echo "harbor_controlled_repair_check_rc=${check_rc}"
             if [ "${check_rc}" -ne 0 ]; then
               return 1
             fi
             if ! printf '%s\n' "${check_output}" | grep -q "AWOOOI_HARBOR_WATCHDOG_CHECK"; then
               echo "harbor_controlled_repair_blocked=missing_watchdog_check_marker"
               return 1
             fi
             if printf '%s\n' "${check_output}" | grep -q "harbor_ready=true"; then
               echo "harbor_controlled_repair_skipped=already_ready"
               return 0
             fi
             echo "harbor_controlled_repair_once_start=1"
             set +e
             repair_output="$(run_watchdog --repair-once 2>&1)"
             repair_rc=$?
             set -e
             printf '%s\n' "${repair_output}" | sed -n '1,140p'
             echo "harbor_controlled_repair_once_rc=${repair_rc}"
             [ "${repair_rc}" -eq 0 ]
           }
           attempt=1
           repair_attempted=0
           if ! preflight_110_capacity; then
             echo "NEXT_ACTION wait_for_110_load_to_normalize_then_rerun_harbor_watchdog_check"
             echo "NEXT_ACTION if_110_load_stays_high_use_local_console: sudo /usr/local/bin/recover-110-control-path-and-harbor-local.sh --check"
             exit 1
           fi
           while [ "${attempt}" -le "${LOGIN_ATTEMPTS}" ]; do
             registry_status="$(probe_registry_v2)"
             if [ -z "${registry_status}" ]; then
               registry_status="000"
             fi
             if [ "${registry_status}" = "200" ] || [ "${registry_status}" = "401" ]; then
               if printf '%s\n' "${HARBOR_PASSWORD}" | \
                 docker login "${HARBOR_REGISTRY}" \
                   -u "${HARBOR_USERNAME}" \
                   --password-stdin; then
                 echo "harbor_login_ready=1"
                 exit 0
               fi
               echo "harbor_login_attempt=${attempt} docker_login_failed"
             else
               echo "harbor_login_attempt=${attempt} registry_v2_status=${registry_status}"
               if [ "${repair_attempted}" = "0" ]; then
                 repair_attempted=1
                 if controlled_harbor_repair_once; then
                   registry_status="$(probe_registry_v2)"
                   if [ -z "${registry_status}" ]; then
                     registry_status="000"
                   fi
                   echo "harbor_controlled_repair_public_registry_v2_status=${registry_status}"
                   continue
                 fi
               fi
             fi
             if [ "${attempt}" -ge "${LOGIN_ATTEMPTS}" ]; then
               echo "BLOCKER harbor_registry_public_route_unavailable registry_v2_status=${registry_status}"
               echo "NEXT_ACTION run_on_110_local_console_or_restored_ssh: sudo /usr/local/bin/harbor-watchdog.sh --check"
               echo "NEXT_ACTION if_check_confirms_unhealthy_on_110: sudo /usr/local/bin/harbor-watchdog.sh --repair-once"
               echo "NEXT_ACTION combined_110_control_path_then_harbor: sudo /usr/local/bin/recover-110-control-path-and-harbor-local.sh --apply-all"
               echo "NEXT_ACTION controlled_workflow_dispatch: .gitea/workflows/harbor-110-local-repair.yaml"
               exit 1
             fi
             sleep "${LOGIN_SLEEP_SECONDS}"
             attempt=$((attempt + 1))
           done
       # 2026-05-21 Codex: AWOOI workflow concurrency and the Docker network
       # lock only protect AWOOI/Docker work. Other repos can still run
       # host-side Next/Turbo builds on the same 110 runner and starve this
       # deploy. Wait for those foreign web builds before starting our image
       # build; the gate is read-only and never kills another process.
       - name: Wait for Host Web Build Pressure
         run: bash scripts/ci/wait-host-web-build-pressure.sh
       # 2026-04-30 Codex: Gitea act-runner shares one Docker daemon across repos.
       # When another repo starts a heavy docker build while AWOOOI Web is still
       # building, the job container can disappear and Docker reports RWLayer=nil.
       # A Docker-network lock is global to the host daemon and survives container
       # namespaces, unlike /tmp/flock inside the transient job container.
       # 2026-06-28 Codex: 110 runner pressure remains incident-grade; Docker
       # build lock contention is fail-hard until non-110 readiness is verified.
       - name: Acquire Docker Build Lock
         run: |
           LOCK_NAME="awoooi-cd-docker-build-lock"
           LOCK_WARN_ONLY="${DOCKER_BUILD_LOCK_WARN_ONLY:-0}"
           STALE_SECONDS="${DOCKER_BUILD_LOCK_STALE_SECONDS:-7200}"
           EMPTY_LOCK_SECONDS="${DOCKER_BUILD_LOCK_EMPTY_SECONDS:-300}"
           WAIT_ATTEMPTS="${DOCKER_BUILD_LOCK_WAIT_ATTEMPTS:-180}"
           WAIT_SLEEP_SECONDS="${DOCKER_BUILD_LOCK_SLEEP_SECONDS:-10}"
           for attempt in $(seq 1 "$WAIT_ATTEMPTS"); do
             if docker network create \
               --label awoooi.ci-lock=docker-build \
               --label awoooi.owner=cd-pipeline \
               "$LOCK_NAME" >/dev/null 2>&1; then
               echo "DOCKER_BUILD_LOCK=${LOCK_NAME}" >> "$GITHUB_ENV"
               echo "✅ Docker build lock acquired: ${LOCK_NAME}"
               exit 0
             fi
             CREATED_AT=$(docker network inspect "$LOCK_NAME" \
               --format '{{.Created}}' 2>/dev/null || true)
             if [ -n "$CREATED_AT" ]; then
               # 2026-05-03 ogt: 修復 stale 偵測 — Docker 回傳 "2006-01-02 15:04:05.999999999 -0700 MST"
               # date -d 不接受奈秒小數點與末尾時區縮寫（CST/MST 等），導致 CREATED_EPOCH=0 → stale 永不觸發
               # 2026-06-18 Codex: act-runner 容器可能沒有 GNU date / python3；
               # node 由 bootstrap 安裝，作為 Docker CreatedAt 的穩定解析 fallback。
               # 2026-06-19 Codex: Docker / Gitea runner 可能回傳 ISO
               # `2026-06-18T16:20:00.123456789Z`；若 CREATED_EPOCH=0，
               # empty lock 永遠不會自清，下一輪 deploy 會卡滿 30 分鐘。
               CREATED_CLEAN=$(echo "$CREATED_AT" | sed 's/\.[0-9]*//' | sed 's/ [A-Z][A-Z]*$//')
               CREATED_EPOCH=$(date -d "$CREATED_CLEAN" +%s 2>/dev/null || \
                 node -e 'const raw = process.argv[1] || ""; const base = raw.replace(/\.\d+/, "").replace(/\s+[A-Z]{2,4}$/, ""); const spaced = base.replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2})\s+([+-]\d{2})(\d{2})$/, "$1T$2$3:$4"); const iso = base.replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2})(Z|[+-]\d{2}:?\d{2})$/, "$1T$2$3"); const candidates = [raw, base, spaced, iso]; for (const candidate of candidates) { const ms = Date.parse(candidate); if (Number.isFinite(ms)) { console.log(Math.floor(ms / 1000)); process.exit(0); } } process.exit(1);' \
                 "$CREATED_AT" 2>/dev/null || \
                 python3 -c "import sys, datetime, re; ts = re.sub(r'\\.\d+', '', sys.argv[1]); ts = re.sub(r'\\s+[A-Z]{2,4}$', '', ts.strip()); print(int(datetime.datetime.strptime(ts, '%Y-%m-%d %H:%M:%S %z').timestamp()))" \
                 "$CREATED_AT" 2>/dev/null || echo 0)
               NOW_EPOCH=$(date +%s)
               LOCK_AGE=$((NOW_EPOCH - CREATED_EPOCH))
               # 2026-05-05 Codex: dirty reboot / cancelled Actions can leave
               # the Docker-network lock behind with no active build or push.
               # Waiting the full 30m CD timeout keeps deploys queued even
               # though no job is protected, so clear empty locks after 5m.
               # 2026-06-18 Codex: 只靠 bracket pattern 仍會命中 lock-check
               # bash/awk 自己的指令列；必須排除檢查器本身，取消後留下的
               # empty lock network 才能在 5 分鐘後自清。
               ACTIVE_DOCKER_WORK=$(ps -eo pid,args | awk '
                 $0 ~ /[d]ocker (build|push)|[b]uildx build/ &&
                 $0 !~ /ACTIVE_DOCKER_WORK/ &&
                 $0 !~ /awk/ &&
                 $0 !~ /ps -eo pid,args/ {print}
               ' || true)
               if [ "$CREATED_EPOCH" -eq 0 ] && \
                  [ $((attempt * WAIT_SLEEP_SECONDS)) -gt $((EMPTY_LOCK_SECONDS * 2)) ] && \
                  [ -z "$ACTIVE_DOCKER_WORK" ]; then
                 echo "⚠️ Docker build lock has unparsable CreatedAt (${CREATED_AT}) and no active docker build/push after $((attempt * WAIT_SLEEP_SECONDS))s, removing ${LOCK_NAME}"
                 docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
                 continue
               fi
               if [ "$CREATED_EPOCH" -gt 0 ] && \
                  [ "$LOCK_AGE" -gt "$EMPTY_LOCK_SECONDS" ] && \
                  [ -z "$ACTIVE_DOCKER_WORK" ]; then
                 echo "⚠️ empty Docker build lock detected (age=${LOCK_AGE}s > ${EMPTY_LOCK_SECONDS}s, no active docker build/push), removing ${LOCK_NAME}"
                 docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
                 continue
               fi
               if [ "$CREATED_EPOCH" -gt 0 ] && \
                  [ "$LOCK_AGE" -gt "$STALE_SECONDS" ]; then
                 echo "⚠️ stale Docker build lock detected (age=${LOCK_AGE}s > ${STALE_SECONDS}s), removing ${LOCK_NAME}"
                 docker network rm "$LOCK_NAME" >/dev/null 2>&1 || true
                 continue
               fi
             fi
             echo "⏳ Docker build lock busy (attempt ${attempt}/${WAIT_ATTEMPTS}); waiting ${WAIT_SLEEP_SECONDS}s..."
             if [ "$attempt" -lt "$WAIT_ATTEMPTS" ]; then
               sleep "$WAIT_SLEEP_SECONDS"
             fi
           done
           echo "⚠️ timed out waiting for Docker build lock"
           if [ "$LOCK_WARN_ONLY" = "1" ]; then
             echo "⚠️ continuing without exclusive Docker build lock under commander controlled automation"
             exit 0
           fi
           echo "❌ refusing to continue without Docker build lock"
           exit 1
       # ── API 鏡像建置（含 Layer Cache 加速）──────────────────────────────
       # 2026-04-01 ogt: CACHE_BUST=git_sha 確保 src/ 和 models.json 層每次重建
       # deps 層 (pip install) 仍可 cache → 加速；代碼/配置層強制失效
       # 2026-05-05 Codex: host runner bootstrap installs docker-cli-buildx;
       # keep BuildKit enabled because the web Dockerfile uses RUN --mount.
       - name: Build and Push API
         env:
           DOCKER_BUILDKIT: "1"
         run: |
           docker build -f apps/api/Dockerfile \
             --build-arg BUILDKIT_INLINE_CACHE=1 \
             --cache-from ${{ env.HARBOR }}/awoooi/api:latest \
             --build-arg CACHE_BUST=${{ github.sha }} \
             -t ${{ env.HARBOR }}/awoooi/api:${{ github.sha }} \
             -t ${{ env.HARBOR }}/awoooi/api:latest \
             .
           docker push ${{ env.HARBOR }}/awoooi/api:${{ github.sha }}
           docker push ${{ env.HARBOR }}/awoooi/api:latest
       # 2026-03-31 ogt: 移除中間通知，減少訊息雜訊
       # ── Web 鏡像建置（精準快取失效）──────────────────────────────
       # 2026-03-30 ogt: NEXT_PUBLIC_* 必須用公網域名 (build-time 寫死)
       # 2026-04-01 Claude Code: CACHE_BUST=git_sha 取代 --no-cache
       # - deps 層 (pnpm install) 仍可 cache → 節省 ~2-3 min
       # - COPY . . 以下由 CACHE_BUST 強制失效 → 業務邏輯/CSRF 等變更正確進入 bundle
       # 2026-05-05 Codex: mirror API build mode; BuildKit required for cache mounts.
       - name: Build and Push Web
         env:
           DOCKER_BUILDKIT: "1"
         run: |
           docker build -f apps/web/Dockerfile \
             --build-arg NEXT_PUBLIC_API_URL=https://awoooi.wooo.work \
             --build-arg CACHE_BUST=${{ github.sha }} \
             --build-arg BUILDKIT_INLINE_CACHE=1 \
             --cache-from ${{ env.HARBOR }}/awoooi/web:latest \
             -t ${{ env.HARBOR }}/awoooi/web:${{ github.sha }} \
             -t ${{ env.HARBOR }}/awoooi/web:latest \
             .
           docker push ${{ env.HARBOR }}/awoooi/web:${{ github.sha }}
           docker push ${{ env.HARBOR }}/awoooi/web:latest
       - name: Release Docker Build Lock
         if: always()
         run: |
           if [ -n "${DOCKER_BUILD_LOCK:-}" ]; then
             docker network rm "$DOCKER_BUILD_LOCK" >/dev/null 2>&1 || true
             echo "✅ Docker build lock released: ${DOCKER_BUILD_LOCK}"
           else
             echo "⚡ no Docker build lock to release"
           fi
       # 2026-03-31 ogt: 移除中間通知
       # 2026-03-31 ogt: P0-1 Secrets 自動注入 (ADR-035 強制)
       # 2026-03-31 ogt: 加入 AI API Keys (修復 mock_fallback 問題)
       - name: Inject K8s Secrets
         env:
           ARGOCD_API_TOKEN: ${{ secrets.ARGOCD_API_TOKEN }}
           AWOOOI_GITEA_API_TOKEN: ${{ secrets.AWOOOI_GITEA_API_TOKEN }}
           AWOOOI_GITEA_WEBHOOK_SECRET: ${{ secrets.AWOOOI_GITEA_WEBHOOK_SECRET }}
           AWOOOP_OPERATOR_API_KEY: ${{ secrets.AWOOOP_OPERATOR_API_KEY }}
           CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }}
           DATABASE_URL: ${{ secrets.DATABASE_URL }}
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
           JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
           JWT_SECRET: ${{ secrets.JWT_SECRET }}
           LANGFUSE_PUBLIC_KEY: ${{ secrets.LANGFUSE_PUBLIC_KEY }}
           LANGFUSE_SECRET_KEY: ${{ secrets.LANGFUSE_SECRET_KEY }}
           MIGRATION_DATABASE_URL: ${{ secrets.MIGRATION_DATABASE_URL }}
           NEMOTRON_BOT_TOKEN: ${{ secrets.NEMOTRON_BOT_TOKEN }}
           NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
           OPENCLAW_BOT_TOKEN: ${{ secrets.OPENCLAW_BOT_TOKEN }}
           OPENCLAW_TG_USER_WHITELIST: ${{ secrets.OPENCLAW_TG_USER_WHITELIST }}
           REDIS_URL: ${{ secrets.REDIS_URL }}
           SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
           SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
           SMTP_HOST: ${{ secrets.SMTP_HOST }}
           SRE_GROUP_CHAT_ID: ${{ secrets.SRE_GROUP_CHAT_ID }}
           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
           WEBHOOK_HMAC_SECRET: ${{ secrets.WEBHOOK_HMAC_SECRET }}
         run: |
           # 2026-06-30 Codex: do not inline action secret expressions in run scripts.
           # Gitea renders the shell body into job logs before masking. Keep
           # secret values in process env, then read by variable name only.
           secret_b64_env() {
             local env_name="$1"
             if command -v python3.11 >/dev/null 2>&1; then
               SECRET_ENV_NAME="${env_name}" python3.11 - <<'PY'
           import base64
           import os
           data = os.environ.get(os.environ["SECRET_ENV_NAME"], "").encode()
           data = data[:-1] if data.endswith(b"\n") else data
           print(base64.b64encode(data).decode(), end="")
           PY
             elif command -v python3 >/dev/null 2>&1; then
               SECRET_ENV_NAME="${env_name}" python3 - <<'PY'
           import base64
           import os
           data = os.environ.get(os.environ["SECRET_ENV_NAME"], "").encode()
           data = data[:-1] if data.endswith(b"\n") else data
           print(base64.b64encode(data).decode(), end="")
           PY
             else
               secret_value="$(printenv "${env_name}" || true)"
               printf '%s' "${secret_value}" | base64 | tr -d '\n'
             fi
           }
           prepare_deploy_key() {
             mkdir -p "${HOME}/.ssh"
             umask 077
             local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
             if [ ! -r "${source_key}" ]; then
               echo "❌ deploy ssh key file missing: ${source_key}" >&2
               exit 1
             fi
             if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
               cp "${source_key}" "${HOME}/.ssh/deploy_key"
             fi
             chmod 600 "${HOME}/.ssh/deploy_key"
           }
           TG_BOT_TOKEN_B64="$(secret_b64_env TELEGRAM_BOT_TOKEN)"
           TG_CHAT_ID_B64="$(secret_b64_env SRE_GROUP_CHAT_ID)"
           NVIDIA_API_KEY_B64="$(secret_b64_env NVIDIA_API_KEY)"
           GEMINI_API_KEY_B64="$(secret_b64_env GEMINI_API_KEY)"
           LANGFUSE_PUBLIC_KEY_B64="$(secret_b64_env LANGFUSE_PUBLIC_KEY)"
           LANGFUSE_SECRET_KEY_B64="$(secret_b64_env LANGFUSE_SECRET_KEY)"
           TG_USER_WHITELIST_B64="$(secret_b64_env OPENCLAW_TG_USER_WHITELIST)"
           SENTRY_AUTH_TOKEN_B64="$(secret_b64_env SENTRY_AUTH_TOKEN)"
           GITEA_WEBHOOK_SECRET_B64="$(secret_b64_env AWOOOI_GITEA_WEBHOOK_SECRET)"
           ARGOCD_API_TOKEN_B64="$(secret_b64_env ARGOCD_API_TOKEN)"
           DATABASE_URL_B64="$(secret_b64_env DATABASE_URL)"
           MIGRATION_DATABASE_URL_B64="$(secret_b64_env MIGRATION_DATABASE_URL)"
           REDIS_URL_B64="$(secret_b64_env REDIS_URL)"
           JWT_SECRET_B64="$(secret_b64_env JWT_SECRET)"
           JWT_ALGORITHM_B64="$(secret_b64_env JWT_ALGORITHM)"
           WEBHOOK_HMAC_SECRET_B64="$(secret_b64_env WEBHOOK_HMAC_SECRET)"
           AWOOOP_OPERATOR_API_KEY_B64="$(secret_b64_env AWOOOP_OPERATOR_API_KEY)"
           SENTRY_DSN_B64="$(secret_b64_env SENTRY_DSN)"
           CLAUDE_API_KEY_B64="$(secret_b64_env CLAUDE_API_KEY)"
           GITEA_API_TOKEN_B64="$(secret_b64_env AWOOOI_GITEA_API_TOKEN)"
           NEMOTRON_BOT_TOKEN_B64="$(secret_b64_env NEMOTRON_BOT_TOKEN)"
           OPENCLAW_BOT_TOKEN_B64="$(secret_b64_env OPENCLAW_BOT_TOKEN)"
           SMTP_HOST_B64="$(secret_b64_env SMTP_HOST)"
           SRE_GROUP_CHAT_ID_B64="$(secret_b64_env SRE_GROUP_CHAT_ID)"
           # S1/S2: 統一命名 deploy_key，改用 ssh-keyscan 與強制 host key 驗證。
           prepare_deploy_key
           # 2026-05-13 Codex: keyscan must include ED25519 explicitly. Some
           # OpenSSH builds otherwise record only RSA/ECDSA, then strict deploy
           # SSH fails with "No ED25519 host key is known" after image push.
           # 2026-06-13 Codex: keep deploy-time host keys in a dedicated file.
           # The runner user's global known_hosts is shared by cold-start and
           # backup checks for 120/188; overwriting it here caused strict SSH
           # recovery gates to flap after every CD run.
           DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"
           ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${DEPLOY_KNOWN_HOSTS}" 2>/dev/null
           test -s "${DEPLOY_KNOWN_HOSTS}" || { echo "❌ K8S host keyscan failed: ${K8S_SSH_HOST}"; exit 1; }
           SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${DEPLOY_KNOWN_HOSTS} -o ConnectTimeout=10"
           ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" << SECRETS
           set -e
           K8S_API_SERVER="${{ env.K8S_API_SERVER }}"
           KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=\${K8S_API_SERVER}"
           # 注入 Telegram Secrets (ADR-035 鐵律)
           # 2026-06-12 Codex: OPENCLAW_TG_CHAT_ID 僅作舊欄位相容，
           # 實際值必須與 SRE_GROUP_CHAT_ID 一致，避免正式告警旁路到其他群組。
           \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
             {"op":"add","path":"/data/OPENCLAW_TG_BOT_TOKEN","value":"${TG_BOT_TOKEN_B64}"},
             {"op":"add","path":"/data/OPENCLAW_TG_CHAT_ID","value":"${TG_CHAT_ID_B64}"}
           ]' || { echo "❌ Telegram Secrets patch 失敗 — ADR-035 鐵律"; exit 1; }
           # 2026-03-31 ogt: 注入 AI API Keys (修復 NVIDIA/Gemini mock_fallback)
           # 2026-04-01 Claude Code: base64 -w 0 防止長 key 換行破壞 JSON
           # NVIDIA NIM (免費 tier)
           if [ -n "${NVIDIA_API_KEY_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/NVIDIA_API_KEY","value":"${NVIDIA_API_KEY_B64}"}
             ]' && echo "✅ NVIDIA_API_KEY 已注入" || echo "⚠️ NVIDIA_API_KEY patch 失敗"
           else
             echo "⚠️ NVIDIA_API_KEY 未設定，跳過"
           fi
           # Gemini (備援)
           if [ -n "${GEMINI_API_KEY_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/GEMINI_API_KEY","value":"${GEMINI_API_KEY_B64}"}
             ]' && echo "✅ GEMINI_API_KEY 已注入" || echo "⚠️ GEMINI_API_KEY patch 失敗"
           else
             echo "⚠️ GEMINI_API_KEY 未設定，跳過"
           fi
           # 2026-04-01 Claude Code: Langfuse LLMOps keys (補齊 CD 注入，之前只有手動設定)
           if [ -n "${LANGFUSE_PUBLIC_KEY_B64}" ] && [ -n "${LANGFUSE_SECRET_KEY_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/LANGFUSE_PUBLIC_KEY","value":"${LANGFUSE_PUBLIC_KEY_B64}"},
               {"op":"add","path":"/data/LANGFUSE_SECRET_KEY","value":"${LANGFUSE_SECRET_KEY_B64}"}
             ]' && echo "✅ LANGFUSE keys 已注入" || echo "⚠️ LANGFUSE keys patch 失敗"
           else
             echo "⚠️ LANGFUSE_PUBLIC_KEY/SECRET_KEY 未設定，跳過 (現有 K8s secret 值維持不變)"
           fi
           # 2026-04-02 Claude Code: Telegram Whitelist (授權簽核用戶 ID)
           if [ -n "${TG_USER_WHITELIST_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/OPENCLAW_TG_USER_WHITELIST","value":"${TG_USER_WHITELIST_B64}"}
             ]' && echo "✅ TG_USER_WHITELIST 已注入" || echo "⚠️ TG_USER_WHITELIST patch 失敗"
           fi
           # Phase O-4.1 2026-04-02: Sentry Auth Token (Wave A.1 ADR-037)
           if [ -n "${SENTRY_AUTH_TOKEN_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/SENTRY_AUTH_TOKEN","value":"${SENTRY_AUTH_TOKEN_B64}"}
             ]' && echo "✅ SENTRY_AUTH_TOKEN 已注入" || echo "⚠️ SENTRY_AUTH_TOKEN patch 失敗"
           else
             echo "⚠️ SENTRY_AUTH_TOKEN 未設定，Sentry Comment API 將跳過"
           fi
           # ADR-059 2026-04-05 Claude Code: Gitea Webhook Secret
           if [ -n "${GITEA_WEBHOOK_SECRET_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/GITEA_WEBHOOK_SECRET","value":"${GITEA_WEBHOOK_SECRET_B64}"}
             ]' && echo "✅ GITEA_WEBHOOK_SECRET 已注入" || echo "⚠️ GITEA_WEBHOOK_SECRET patch 失敗"
           else
             echo "⚠️ GITEA_WEBHOOK_SECRET 未設定，Gitea Webhook 簽章驗證將在 prod 失效"
           fi
           # MCP Phase 3: ArgoCD API Token (2026-04-11 Claude Sonnet 4.6)
           if [ -n "${ARGOCD_API_TOKEN_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/ARGOCD_API_TOKEN","value":"${ARGOCD_API_TOKEN_B64}"}
             ]' && echo "✅ ARGOCD_API_TOKEN 已注入" || echo "⚠️ ARGOCD_API_TOKEN patch 失敗"
           else
             echo "⚠️ ARGOCD_API_TOKEN 未設定，ArgoCD MCP 將使用空 token"
           fi
           # ============================================================================
           # ADR-090-B 2026-04-18 ogt + Claude Opus 4.7: L3-only 升級 L2（13 個 key）
           # ============================================================================
           # 目的: 消滅「只存 K8s etcd 單點」的災難盲區，Gitea Secret 成為正式真相來源
           # 注意: 每個 block 與上方維持相同結構（if guard + base64 -w 0 + json patch）
           # DATABASE_URL — PG 應用連線串（2026-04-18 輪替）
           if [ -n "${DATABASE_URL_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/DATABASE_URL","value":"${DATABASE_URL_B64}"}
             ]' && echo "✅ DATABASE_URL 已注入" || echo "⚠️ DATABASE_URL patch 失敗"
           else
             echo "⚠️ DATABASE_URL 未設定，awoooi-api 將無法連 PG"
           fi
           # MIGRATION_DATABASE_URL — CI migration 用 awoooi_migrator 限權帳號（ADR-090-B）
           if [ -n "${MIGRATION_DATABASE_URL_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/MIGRATION_DATABASE_URL","value":"${MIGRATION_DATABASE_URL_B64}"}
             ]' && echo "✅ MIGRATION_DATABASE_URL 已注入" || echo "⚠️ MIGRATION_DATABASE_URL patch 失敗"
           fi
           # REDIS_URL — Redis 連線（6380 on 188）
           if [ -n "${REDIS_URL_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/REDIS_URL","value":"${REDIS_URL_B64}"}
             ]' && echo "✅ REDIS_URL 已注入" || echo "⚠️ REDIS_URL patch 失敗"
           else
             echo "⚠️ REDIS_URL 未設定"
           fi
           # JWT_SECRET / JWT_ALGORITHM — API 認證
           if [ -n "${JWT_SECRET_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/JWT_SECRET","value":"${JWT_SECRET_B64}"}
             ]' && echo "✅ JWT_SECRET 已注入" || echo "⚠️ JWT_SECRET patch 失敗"
           fi
           if [ -n "${JWT_ALGORITHM_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/JWT_ALGORITHM","value":"${JWT_ALGORITHM_B64}"}
             ]' && echo "✅ JWT_ALGORITHM 已注入" || echo "⚠️ JWT_ALGORITHM patch 失敗"
           fi
           # WEBHOOK_HMAC_SECRET — Alertmanager webhook HMAC 簽章
           if [ -n "${WEBHOOK_HMAC_SECRET_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/WEBHOOK_HMAC_SECRET","value":"${WEBHOOK_HMAC_SECRET_B64}"}
             ]' && echo "✅ WEBHOOK_HMAC_SECRET 已注入" || echo "⚠️ WEBHOOK_HMAC_SECRET patch 失敗"
           fi
           # AWOOOP_OPERATOR_API_KEY — AwoooP Operator mutation endpoints
           if [ -n "${AWOOOP_OPERATOR_API_KEY_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/AWOOOP_OPERATOR_API_KEY","value":"${AWOOOP_OPERATOR_API_KEY_B64}"}
             ]' && echo "✅ AWOOOP_OPERATOR_API_KEY 已注入" || echo "⚠️ AWOOOP_OPERATOR_API_KEY patch 失敗"
           fi
           # SENTRY_DSN — Sentry 錯誤追蹤（不是 auth token）
           if [ -n "${SENTRY_DSN_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/SENTRY_DSN","value":"${SENTRY_DSN_B64}"}
             ]' && echo "✅ SENTRY_DSN 已注入" || echo "⚠️ SENTRY_DSN patch 失敗"
           fi
           # CLAUDE_API_KEY — Claude 備援 LLM
           if [ -n "${CLAUDE_API_KEY_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/CLAUDE_API_KEY","value":"${CLAUDE_API_KEY_B64}"}
             ]' && echo "✅ CLAUDE_API_KEY 已注入" || echo "⚠️ CLAUDE_API_KEY patch 失敗"
           fi
           # GITEA_API_TOKEN — Gitea API Token（從 AWOOOI_GITEA_API_TOKEN 映射）
           if [ -n "${GITEA_API_TOKEN_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/GITEA_API_TOKEN","value":"${GITEA_API_TOKEN_B64}"}
             ]' && echo "✅ GITEA_API_TOKEN 已注入" || echo "⚠️ GITEA_API_TOKEN patch 失敗"
           fi
           # NEMOTRON_BOT_TOKEN / OPENCLAW_BOT_TOKEN — 多 Bot 架構
           if [ -n "${NEMOTRON_BOT_TOKEN_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/NEMOTRON_BOT_TOKEN","value":"${NEMOTRON_BOT_TOKEN_B64}"}
             ]' && echo "✅ NEMOTRON_BOT_TOKEN 已注入" || echo "⚠️ NEMOTRON_BOT_TOKEN patch 失敗"
           fi
           if [ -n "${OPENCLAW_BOT_TOKEN_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/OPENCLAW_BOT_TOKEN","value":"${OPENCLAW_BOT_TOKEN_B64}"}
             ]' && echo "✅ OPENCLAW_BOT_TOKEN 已注入" || echo "⚠️ OPENCLAW_BOT_TOKEN patch 失敗"
           fi
           # SMTP_HOST / SRE_GROUP_CHAT_ID
           if [ -n "${SMTP_HOST_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/SMTP_HOST","value":"${SMTP_HOST_B64}"}
             ]' && echo "✅ SMTP_HOST 已注入" || echo "⚠️ SMTP_HOST patch 失敗"
           fi
           if [ -n "${SRE_GROUP_CHAT_ID_B64}" ]; then
             \$KUBECTL patch secret awoooi-secrets -n awoooi-prod --type='json' -p='[
               {"op":"add","path":"/data/SRE_GROUP_CHAT_ID","value":"${SRE_GROUP_CHAT_ID_B64}"}
             ]' && echo "✅ SRE_GROUP_CHAT_ID 已注入" || echo "⚠️ SRE_GROUP_CHAT_ID patch 失敗"
           fi
           # 2026-04-06 Claude Code: Sprint 3 T2 — known_hosts Secret (Security Fix A1)
           # 替換關閉 host key 驗證的舊做法，讓 SSH 修復路徑使用已知主機指紋。
           # asyncssh reads /etc/ssh-mcp/known_hosts and requires a non-empty
           # OpenSSH known_hosts file. Keep hosts unhashed so both asyncssh and
           # CLI diagnostics can trust the same secret.
           # 2026-05-02 ogt + Claude Sonnet 4.6: 加 4 台主機完整性檢查
           # 根因：partial scan（如 110 timeout、其他成功）會讓 [-s file] 通過、
           #       後續 patch 推進缺漏的 known_hosts → asyncssh 拒所有 SSH。
           # 修法：scan 完用 grep -c 驗證 4 台主機都在；缺任何一台就 abort，
           #       不能覆蓋現有 secret，防止 production SSH 自動修復路徑癱瘓。
           ssh-keyscan 192.168.0.110 192.168.0.120 192.168.0.121 192.168.0.188 > /tmp/known_hosts_repair 2>/tmp/known_hosts_scan_err || true
           EXPECTED_HOSTS=4
           PRESENT=0
           for ip in 192.168.0.110 192.168.0.120 192.168.0.121 192.168.0.188; do
             if grep -qE "^\${ip}[[:space:]]" /tmp/known_hosts_repair 2>/dev/null; then
               PRESENT=\$((PRESENT + 1))
             else
               echo "⚠️ ssh-keyscan 缺主機 \${ip}"
             fi
           done
           if [ "\$PRESENT" -eq "\$EXPECTED_HOSTS" ]; then
             \$KUBECTL create secret generic awoooi-repair-known-hosts \
               -n awoooi-prod \
               --from-file=known_hosts=/tmp/known_hosts_repair \
               --dry-run=client -o yaml | \$KUBECTL apply -f - \
               && echo "✅ awoooi-repair-known-hosts Secret 已建立/更新" \
               || echo "⚠️ awoooi-repair-known-hosts Secret 建立失敗 (非致命)"
             KNOWN_HOSTS_B64=\$(base64 -w 0 /tmp/known_hosts_repair)
             \$KUBECTL patch secret ssh-mcp-key -n awoooi-prod --type=merge \
               -p="{\"data\":{\"known_hosts\":\"\${KNOWN_HOSTS_B64}\"}}" \
               && echo "✅ ssh-mcp-key known_hosts 已更新（4 台主機完整）" \
               || echo "⚠️ ssh-mcp-key known_hosts 更新失敗 (非致命)"
             rm -f /tmp/known_hosts_repair /tmp/known_hosts_scan_err
           else
             echo "❌ ssh-keyscan 只抓到 \${PRESENT}/\${EXPECTED_HOSTS} 台主機，跳過 patch（保留現有 secret）"
             cat /tmp/known_hosts_scan_err 2>/dev/null | head -10
             rm -f /tmp/known_hosts_repair /tmp/known_hosts_scan_err
           fi
           echo "✅ 所有 Secrets 注入完成"
           SECRETS
       # 2026-04-11 Claude Sonnet 4.6 (Sprint B-3 ADR-069):
       # Deploy 改為 ArgoCD GitOps 模式：更新 kustomization.yaml → git push [skip ci] → ArgoCD sync
       # 舊做法 (kubectl set image) 與 ArgoCD selfHeal 衝突 — ArgoCD 會 revert 任何直接 kubectl 操作
       # 新做法流程：
       #   1. 更新 kustomization.yaml image tag（用 kustomize edit set image）
       #   2. Apply ConfigMap/ServiceRegistry（不含 Deployment，由 ArgoCD 管）
       #   3. git commit [skip ci] + push → 觸發 ArgoCD automated sync
       #   4. 等待 ArgoCD sync + rollout 完成
       #   5. Health Check
       - name: Deploy to K8s (ArgoCD GitOps)
         env:
           CD_PUSH_TOKEN: ${{ secrets.CD_PUSH_TOKEN }}
         run: |
           prepare_deploy_key() {
             mkdir -p "${HOME}/.ssh"
             umask 077
             local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
             if [ ! -r "${source_key}" ]; then
               echo "❌ deploy ssh key file missing: ${source_key}" >&2
               exit 1
             fi
             if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
               cp "${source_key}" "${HOME}/.ssh/deploy_key"
             fi
             chmod 600 "${HOME}/.ssh/deploy_key"
           }
           mkdir -p ~/.ssh
           prepare_deploy_key
           # 2026-05-13 Codex: mirror Inject K8s Secrets host-key handling so the
           # deploy job never reaches SSH with a known_hosts file missing ED25519.
           # 2026-06-13 Codex: use the deploy-only known_hosts file so this
           # stage cannot wipe cold-start/backup host trust for 120/188.
           DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"
           ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${DEPLOY_KNOWN_HOSTS}" 2>/dev/null
           test -s "${DEPLOY_KNOWN_HOSTS}" || { echo "❌ K8S host keyscan failed: ${K8S_SSH_HOST}"; exit 1; }
           SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${DEPLOY_KNOWN_HOSTS} -o ConnectTimeout=10"
           export IMAGE_TAG="${{ github.sha }}"
           HARBOR=192.168.0.110:5000
           # ─── Step 1: Apply ConfigMap + ServiceRegistry (ArgoCD 管的是 Deployment，ConfigMap 仍直接 apply) ───
           cat k8s/awoooi-prod/04-configmap.yaml | \
             ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
             "KUBECTL='sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${{ env.K8S_API_SERVER }}'; \$KUBECTL apply -f -"
           echo "✅ ConfigMap 已更新"
           cat k8s/awoooi-prod/15-service-registry-configmap.yaml | \
             ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
             "KUBECTL='sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${{ env.K8S_API_SERVER }}'; \$KUBECTL apply -f -"
           echo "✅ Service Registry ConfigMap 已更新"
           # ─── Step 2: 更新 kustomization.yaml image tag ───
           # host runner 不保證有 root 權限，kustomize 安裝在使用者目錄。
           export PATH="${HOME}/.local/bin:${PATH}"
           if ! command -v kustomize &>/dev/null; then
             mkdir -p "${HOME}/.local/bin"
             curl -sL https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv5.3.0/kustomize_v5.3.0_linux_amd64.tar.gz \
               | tar xz -C "${HOME}/.local/bin"
             chmod +x "${HOME}/.local/bin/kustomize"
           fi
           cd k8s/awoooi-prod
           # kustomize edit set image 更新 tag
           kustomize edit set image \
 .168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER=${HARBOR}/awoooi/api:${IMAGE_TAG}
           kustomize edit set image \
 .168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER=${HARBOR}/awoooi/web:${IMAGE_TAG}
           cd ../..
           # 2026-06-29 Codex: API deploy readback uses AWOOOI_BUILD_COMMIT_SHA
           # to compare runtime image/source truth. Keep it in the same deploy
           # marker commit as kustomization.yaml so the production Workbench does
           # not depend on stale committed snapshots.
           python3 - <<'PY'
           import os
           import re
           from pathlib import Path
           path = Path("k8s/awoooi-prod/06-deployment-api.yaml")
           image_tag = os.environ["IMAGE_TAG"]
           text = path.read_text(encoding="utf-8")
           def replace_env_value(source: str, env_name: str) -> str:
               pattern = (
                   rf'(\n\s+- name: {re.escape(env_name)}\n'
                   r'(?:\s+# [^\n]*\n)*'
                   r'\s+value: ")[^"]*(")'
               )
               if not re.search(pattern, source):
                   raise SystemExit(f"{env_name} env block not found")
               return re.sub(
                   pattern,
                   lambda match: f"{match.group(1)}{image_tag}{match.group(2)}",
                   source,
                   count=1,
               )
           text = replace_env_value(text, "AWOOOI_BUILD_COMMIT_SHA")
           text = replace_env_value(text, "AWOOOI_DESIRED_API_IMAGE_TAG")
           path.write_text(text, encoding="utf-8")
           PY
           # ─── Step 3: git commit [skip ci] + push → 觸發 ArgoCD sync ───
           git config user.email "cd@awoooi.internal"
           git config user.name "AWOOOI CD"
           git add k8s/awoooi-prod/kustomization.yaml k8s/awoooi-prod/06-deployment-api.yaml
           DEPLOY_REVISION=""
           git diff --cached --quiet && echo "⚡ kustomization.yaml 無變化，跳過 push" || {
             git commit -m "chore(cd): deploy ${IMAGE_TAG::7} [skip ci]"
             # 用 token 推送（避免 SSH key 需要額外設定 push 權限）
             git remote remove gitea 2>/dev/null || true
             git remote add gitea "http://wooo:${CD_PUSH_TOKEN}@192.168.0.110:3001/wooo/awoooi.git"
             # 先 rebase 避免 non-fast-forward (其他 commit 在 CI 期間已推入)
             # 2026-04-17 ogt: -X theirs — kustomization.yaml 衝突時採用當次部署的 image tag
             git fetch gitea main
             git rebase -X theirs gitea/main
             DEPLOY_REVISION=$(git rev-parse HEAD)
             git push gitea main
             echo "✅ kustomization.yaml 已 push，等待 ArgoCD sync 到 ${DEPLOY_REVISION:0:8}..."
           }
           # ─── Step 4: 等待 ArgoCD sync + rollout ───
           ROLLOUT_LOG="$(mktemp)"
           set +e
           ssh $SSH_OPTS "wooo@${{ env.K8S_SSH_HOST }}" \
             "EXPECTED_REVISION='${DEPLOY_REVISION}' bash -s" 2>&1 << 'ARGOCD_WAIT' | tee "$ROLLOUT_LOG"
           set -e
           K8S_API_SERVER="${{ env.K8S_API_SERVER }}"
           KUBECTL="sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${K8S_API_SERVER}"
           RISK_FILE="$(mktemp)"
           UNKNOWN_STATUS_COUNT=0
           HEALTH_FAILURE_COUNT=0
           record_rollout_risk() {
             local message="$1"
             printf '%s\n' "$message" >> "$RISK_FILE"
             echo "⚠️ Rollout risk observed: $message" >&2
           }
           emit_rollout_evidence() {
             if [ -s "$RISK_FILE" ]; then
               local summary
               local kubectl_count
               kubectl_count=$(grep -c '^argocd_.*_query_failed=' "$RISK_FILE" 2>/dev/null || true)
               summary=$(tr '\n' '; ' < "$RISK_FILE" | sed 's/[[:cntrl:]]//g' | cut -c1-700)
               echo "AWOOOI_ROLLOUT_RISK=1"
               echo "AWOOOI_ROLLOUT_SUMMARY=unknown_status_count=${UNKNOWN_STATUS_COUNT}; health_failure_count=${HEALTH_FAILURE_COUNT}; kubectl_failure_count=${kubectl_count}; ${summary}"
             else
               echo "AWOOOI_ROLLOUT_RISK=0"
             fi
             rm -f "$RISK_FILE"
           }
           trap emit_rollout_evidence EXIT
           app_field() {
             local jsonpath="$1"
             local label="$2"
             local output
             local status
             local kubectl_seen
             set +e
             output=$($KUBECTL get application awoooi-prod -n argocd -o jsonpath="$jsonpath" 2>&1)
             status=$?
             set -e
             if [ "$status" -ne 0 ]; then
               kubectl_seen=$(grep -c '^argocd_.*_query_failed=' "$RISK_FILE" 2>/dev/null || true)
               if [ "$kubectl_seen" -lt 3 ]; then
                 record_rollout_risk "argocd_${label}_query_failed=$(echo "$output" | head -c 180)"
               fi
               printf 'Unknown'
               return 0
             fi
             printf '%s' "$output"
           }
           probe_public_health() {
             local phase="$1"
             local http_code
             local status
             set +e
             http_code=$(curl -sS -w "%{http_code}" -o /dev/null --connect-timeout 3 --max-time 8 "${{ env.API_HEALTH_URL }}" 2>/dev/null)
             status=$?
             set -e
             if [ "$status" -ne 0 ]; then
               http_code="curl_error_${status}"
             fi
             if [ "$http_code" != "200" ]; then
               HEALTH_FAILURE_COUNT=$((HEALTH_FAILURE_COUNT + 1))
               if [ "$HEALTH_FAILURE_COUNT" -le 3 ]; then
                 record_rollout_risk "public_health_${phase}_http=${http_code}"
               fi
             fi
           }
           collect_argocd_resource_evidence() {
             local template
             local output
             local status
             template='{{range .status.resources}}{{if ne .status "Synced"}}{{.kind}}/{{.name}}{{if .namespace}} ns={{.namespace}}{{end}} sync={{.status}}{{if .health.status}} health={{.health.status}}{{end}}{{"\n"}}{{end}}{{if .health.status}}{{if ne .health.status "Healthy"}}{{.kind}}/{{.name}}{{if .namespace}} ns={{.namespace}}{{end}} sync={{.status}} health={{.health.status}}{{if .health.message}} msg={{.health.message}}{{end}}{{"\n"}}{{end}}{{end}}{{end}}'
             set +e
             output=$($KUBECTL get application awoooi-prod -n argocd -o "go-template=${template}" 2>&1)
             status=$?
             set -e
             if [ "$status" -ne 0 ]; then
               local output_snippet
               output_snippet=$(printf '%s' "$output" | head -c 180)
               echo "resource_query_failed=${output_snippet}"
               return 0
             fi
             echo "$output" \
               | awk 'NF && !seen[$0]++ {print}' \
               | head -5 \
               | tr '\n' ';' \
               | sed 's/[[:cntrl:]]//g; s/;*$//'
           }
           validate_argocd_source_contract() {
             local target_revision
             local image_override
             target_revision=$(app_field '{.spec.source.targetRevision}' source_target_revision)
             image_override=$(app_field '{.spec.source.kustomize.images}' source_kustomize_images)
             if [ "$target_revision" != "main" ]; then
               record_rollout_risk "argocd_source_target_revision_not_main targetRevision=$target_revision"
               echo "❌ ArgoCD source targetRevision must be main, got: $target_revision" >&2
               exit 1
             fi
             if [ -n "$image_override" ]; then
               local image_override_snippet
               image_override_snippet=$(printf '%s' "$image_override" | head -c 180)
               record_rollout_risk "argocd_source_image_override_present images=${image_override_snippet}"
               echo "❌ ArgoCD source kustomize.images override must be empty; image truth belongs in k8s/awoooi-prod/kustomization.yaml" >&2
               exit 1
             fi
           }
           # 等待 ArgoCD Application 同步到目標 revision（最多 180s）。
           # 2026-05-24 Codex: top-level Application health can stay Degraded
           # without per-resource health detail. Treat that as rollout evidence,
           # then let kubectl rollout status and API health decide pass/fail.
           echo "⏳ 等待 ArgoCD sync..."
           validate_argocd_source_contract
           $KUBECTL annotate application awoooi-prod -n argocd \
             argocd.argoproj.io/refresh=hard --overwrite >/dev/null 2>&1 || true
           for i in $(seq 1 36); do
             SYNC=$(app_field '{.status.sync.status}' sync)
             HEALTH=$(app_field '{.status.health.status}' health)
             REVISION=$(app_field '{.status.sync.revision}' revision)
             SHORT_REVISION=$(echo "$REVISION" | cut -c1-8)
             SHORT_EXPECTED=$(echo "$EXPECTED_REVISION" | cut -c1-8)
             echo "  ArgoCD: sync=$SYNC health=$HEALTH revision=$SHORT_REVISION expected=${SHORT_EXPECTED:-any}"
             probe_public_health "argocd_wait"
             if [ "$SYNC" = "Unknown" ] || [ "$HEALTH" = "Unknown" ] || [ "$REVISION" = "Unknown" ]; then
               UNKNOWN_STATUS_COUNT=$((UNKNOWN_STATUS_COUNT + 1))
               if [ "$UNKNOWN_STATUS_COUNT" -le 3 ]; then
                 record_rollout_risk "argocd_status_unknown sync=$SYNC health=$HEALTH revision=$SHORT_REVISION expected=${SHORT_EXPECTED:-any}"
               fi
             fi
             if [ "$SYNC" = "Synced" ]; then
               if [ -z "$EXPECTED_REVISION" ] || [ "$REVISION" = "$EXPECTED_REVISION" ]; then
                 if [ "$HEALTH" != "Healthy" ]; then
                   RESOURCE_EVIDENCE=$(collect_argocd_resource_evidence)
                   if [ -n "$RESOURCE_EVIDENCE" ]; then
                     record_rollout_risk "argocd_health_not_healthy health=$HEALTH revision=$SHORT_REVISION resources=$RESOURCE_EVIDENCE"
                   else
                     record_rollout_risk "argocd_health_not_healthy health=$HEALTH revision=$SHORT_REVISION resources=none_visible"
                   fi
                 fi
                 echo "✅ ArgoCD Synced to target revision (health=$HEALTH)"
                 break
               fi
             fi
             if [ "$i" = "36" ]; then
               echo "❌ ArgoCD 未在期限內同步到目標 revision"
               exit 1
             fi
             sleep 5
           done
           # 確認 rollout 完成
           $KUBECTL rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s
           $KUBECTL rollout status deployment/awoooi-web -n awoooi-prod --timeout=120s
           $KUBECTL rollout status deployment/awoooi-worker -n awoooi-prod --timeout=120s
           echo "✅ 部署完成"
           # Health Check
           HEALTH_PASS=0
           for i in 1 2 3; do
             set +e
             HTTP_CODE=$(curl -sS -w "%{http_code}" -o /dev/null --connect-timeout 10 --max-time 20 "${{ env.API_HEALTH_URL }}" 2>/dev/null)
             CURL_STATUS=$?
             set -e
             if [ "$CURL_STATUS" -ne 0 ]; then
               HTTP_CODE="curl_error_${CURL_STATUS}"
             fi
             if [ "$HTTP_CODE" = "200" ]; then
               echo "✅ API 健康檢查通過"
               HEALTH_PASS=1
               break
             fi
             echo "⏳ 嘗試 #$i: HTTP $HTTP_CODE，等待 10s..."
             sleep 10
           done
           if [ "$HEALTH_PASS" = "0" ]; then
             record_rollout_risk "public_health_final_failed"
             echo "❌ API 健康檢查失敗"
             exit 1
           fi
           ARGOCD_WAIT
           ROLLOUT_EXIT=${PIPESTATUS[0]}
           set -e
           ROLLOUT_RISK="0"
           ROLLOUT_SUMMARY=""
           if grep -q '^AWOOOI_ROLLOUT_RISK=1$' "$ROLLOUT_LOG"; then
             ROLLOUT_RISK="1"
             ROLLOUT_SUMMARY=$(grep '^AWOOOI_ROLLOUT_SUMMARY=' "$ROLLOUT_LOG" | tail -1 | sed 's/^AWOOOI_ROLLOUT_SUMMARY=//' | cut -c1-700)
           fi
           if [ -n "${GITHUB_ENV:-}" ]; then
             {
               echo "AWOOI_ROLLOUT_RISK=${ROLLOUT_RISK}"
               echo "AWOOI_ROLLOUT_SUMMARY=${ROLLOUT_SUMMARY}"
             } >> "$GITHUB_ENV"
           fi
           rm -f "$ROLLOUT_LOG"
           DEPLOY_READBACK_EXIT=0
           python3 - <<'PY' || DEPLOY_READBACK_EXIT=$?
           import json
           import os
           import sys
           import time
           import urllib.request
           expected = os.environ["IMAGE_TAG"].strip().lower()
           expected_short = expected[:10]
           url = "https://awoooi.wooo.work/api/v1/agents/delivery-closure-workbench"
           attempts = int(os.environ.get("DEPLOY_READBACK_ATTEMPTS", "36"))
           sleep_seconds = int(os.environ.get("DEPLOY_READBACK_SLEEP_SECONDS", "10"))
           last_error = ""
           for attempt in range(1, attempts + 1):
               try:
                   with urllib.request.urlopen(url, timeout=20) as response:
                       payload = json.load(response)
               except Exception as exc:
                   last_error = f"fetch_failed={type(exc).__name__}"
                   print(
                       "production_deploy_readback_attempt="
                       f"{attempt}/{attempts};{last_error}",
                       file=sys.stderr,
                   )
               else:
                   summary = payload.get("summary") if isinstance(payload, dict) else {}
                   if not isinstance(summary, dict):
                       summary = {}
                   runtime_short = str(
                       summary.get("production_deploy_runtime_build_commit_short_sha")
                       or ""
                   )
                   desired_short = str(
                       summary.get(
                           "production_deploy_desired_main_api_image_tag_short_sha"
                       )
                       or ""
                   )
                   desired_status = str(
                       summary.get(
                           "production_deploy_desired_main_api_image_tag_readback_status"
                       )
                       or ""
                   )
                   matches_main = (
                       summary.get("production_deploy_image_tag_matches_main") is True
                   )
                   if (
                       runtime_short == expected_short
                       and desired_short == expected_short
                       and desired_status == "ok"
                   ):
                       print(
                           "✅ Production deploy readback matches this build and "
                           "GitOps desired image tag "
                           f"({expected_short}) on attempt {attempt}/{attempts};"
                           f"matches_main={matches_main}"
                       )
                       raise SystemExit(0)
                   last_error = (
                       f"expected={expected_short};runtime={runtime_short};"
                       f"desired={desired_short};desired_status={desired_status};"
                       f"matches_main={matches_main}"
                   )
                   print(
                       "production_deploy_readback_attempt="
                       f"{attempt}/{attempts};{last_error}",
                       file=sys.stderr,
                   )
               if attempt < attempts:
                   time.sleep(sleep_seconds)
           print(
               "production_deploy_readback_mismatch=" + last_error,
               file=sys.stderr,
           )
           raise SystemExit(1)
           PY
           if [ "$DEPLOY_READBACK_EXIT" -ne 0 ]; then
             exit "$DEPLOY_READBACK_EXIT"
           fi
           if [ "$ROLLOUT_EXIT" -ne 0 ]; then
             if [ "$ROLLOUT_RISK" = "1" ]; then
               ROLLOUT_SUMMARY="${ROLLOUT_SUMMARY}; rollout_exit=${ROLLOUT_EXIT}; production_deploy_readback_matched=true"
             else
               ROLLOUT_RISK="1"
               ROLLOUT_SUMMARY="rollout_exit=${ROLLOUT_EXIT}; production_deploy_readback_matched=true"
             fi
             ROLLOUT_SUMMARY=$(printf '%s' "$ROLLOUT_SUMMARY" | cut -c1-700)
             if [ -n "${GITHUB_ENV:-}" ]; then
               {
                 echo "AWOOI_ROLLOUT_RISK=${ROLLOUT_RISK}"
                 echo "AWOOI_ROLLOUT_SUMMARY=${ROLLOUT_SUMMARY}"
               } >> "$GITHUB_ENV"
             fi
             echo "⚠️ ArgoCD/rollout wait exited ${ROLLOUT_EXIT}, but production deploy readback matched; treating as rollout risk, not deploy failure."
           fi
           if [ "$ROLLOUT_RISK" = "1" ]; then
             ACTOR="${GITHUB_ACTOR:-${{ github.actor }}}"
             if AWOOI_CICD_STATUS=pending \
               AWOOI_CICD_STAGE=rollout-risk \
               AWOOI_CICD_JOB_NAME="AWOOOI 部署完成但仍有風險證據" \
               AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
               AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
               AWOOI_CICD_SUMMARY="${ROLLOUT_SUMMARY}" \
               scripts/ci/notify-awoooi-cicd.sh; then
               echo "✅ CI/CD rollout risk notification mirrored through AWOOI API"
             else
               echo "⚠️ CI/CD rollout risk notification failed (non-fatal)"
             fi
           fi
           exit 0
       - name: Notify Build Deploy Success
         run: |
           END_TIME=$(date +%s)
           DURATION=$((END_TIME - ${{ steps.commit.outputs.start_time }}))
           ACTOR="${{ github.actor }}"
           if AWOOI_CICD_STATUS=success \
             AWOOI_CICD_STAGE=build-and-deploy \
             AWOOI_CICD_JOB_NAME="AWOOOI 建置部署完成" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
             AWOOI_CICD_SUMMARY="Image build/push + ArgoCD rollout + API health passed" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD build-deploy success notification mirrored through AWOOI API"
           else
             echo "⚠️ CI/CD build-deploy success notification failed (non-fatal)"
           fi
       # 2026-04-09 Claude Sonnet 4.6: Sprint 5.2 — 同步 ops 腳本到 188 (ollama user)
       # 188 deploy key is rotated and must not be read by this disabled step.
       # 腳本: docker-health-monitor.sh + pg-backup.sh + notify-awoooi-ops.sh
       # 感知層與備份通知都先走 AWOOI API/AwoooP，Telegram 直發只保留 API 離線 fallback。
       - name: Sync Ops Scripts to 188
         # 2026-05-13 Codex T14e/P0:
         # Disabled until the 188 ops sync path is moved to a file-secret or
         # Ansible-controlled channel. Gitea Actions logs step env values, and
         # multiline SSH secrets must not be exposed through CD logs.
         if: ${{ false }}
         continue-on-error: true
         run: |
           echo "188 ops script sync disabled pending secure key rotation path"
       - name: Notify Pipeline Failure
         if: failure()
         env:
           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
         run: |
           COMMIT_MSG="${{ steps.commit.outputs.message }}"
           SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
           ACTOR="${{ github.actor }}"
           FAILURE_SUMMARY="${AWOOI_ROLLOUT_SUMMARY:-${COMMIT_MSG}}"
           COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
           MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🏗️ Stage: build-and-deploy\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
           if AWOOI_CICD_STATUS=failed \
             AWOOI_CICD_STAGE=build-and-deploy \
             AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_SUMMARY="${FAILURE_SUMMARY}" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD build failure notification mirrored through AWOOI API"
           else
             echo "AWOOI API notify failed; direct Telegram fallback disabled to preserve AwoooP receipt chain"
           fi
   post-deploy-checks:
     # 2026-06-28 Codex: post-deploy checks belong to real deploy runs; skip
     # marker/no-op commits already accounted for by the previous deploy run.
     # 2026-07-01 Codex: `[metadata-only]` commits do not roll a new image, so
     # post-deploy smokes would only retest the previous production artifact.
     if: ${{ github.event_name != 'push' || (!contains(github.event.head_commit.message, '[skip ci]') && !contains(github.event.head_commit.message, 'cancel-stale-cd') && !contains(github.event.head_commit.message, '[metadata-only]')) }}
     needs: [build-and-deploy]
     timeout-minutes: 30
     # 2026-04-30 Codex: keep post-deploy on the host runner too. Playwright
     # install-deps can also kill the act-managed job container with RWLayer=nil.
     runs-on: awoooi-non110-host
     steps:
       - name: Bootstrap Host Runner Tools
         # 2026-05-05 Codex: post-deploy also uses checkout and curl-based
         # notifications, so it needs the same runner bootstrap as earlier jobs.
         run: |
           if command -v apk >/dev/null 2>&1; then
             apk add --no-cache nodejs npm git curl bash coreutils python3 openssh-client docker-cli docker-cli-buildx
           fi
       - uses: actions/checkout@v4
       - name: Wait for Host Web Build Pressure
         # 2026-06-28 Codex: post-deploy is browser-heavy; fail closed on host
         # pressure until runner load is isolated from production.
         run: bash scripts/ci/wait-host-web-build-pressure.sh
       - name: Get Commit Info
         id: commit
         run: |
           echo "short_sha=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
           echo "message=$(git log -1 --pretty=%s | head -c 50)" >> $GITHUB_OUTPUT
           echo "start_time=$(date +%s)" >> $GITHUB_OUTPUT
       - name: Notify Post Deploy Checks Start
         run: |
           ACTOR="${{ github.actor }}"
           if AWOOI_CICD_STATUS=running \
             AWOOI_CICD_STAGE=post-deploy-checks \
             AWOOI_CICD_JOB_NAME="AWOOOI 部署後驗證開始" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_SUMMARY="Alert Chain / Source Link / Monitoring / Smoke gates started" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD post-deploy start notification mirrored through AWOOI API"
           else
             echo "⚠️ CI/CD post-deploy start notification failed (non-fatal)"
           fi
       # Phase O-4.5 2026-04-02: Alert Chain Smoke Test (Wave A.6 + B.2 ADR-037)
       # 驗證告警鏈路 E2E: API Health + Webhook + OTEL + Event Exporter
       # 2026-04-05 Claude Code cache優化: 使用 /opt/api-venv (已有 requests)，移除 Setup Python Tools step
       # 2026-06-28 Codex: commander controlled automation keeps the canary
       # evidence and notification signal, but no longer blocks CD completion.
       - name: Alert Chain Smoke Test
         id: alert_chain_smoke
         run: |
           prepare_deploy_key() {
             mkdir -p "${HOME}/.ssh"
             umask 077
             local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
             if [ ! -r "${source_key}" ]; then
               echo "❌ deploy ssh key file missing: ${source_key}" >&2
               exit 1
             fi
             if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
               cp "${source_key}" "${HOME}/.ssh/deploy_key"
             fi
             chmod 600 "${HOME}/.ssh/deploy_key"
           }
           collect_observability_statuses() {
             local component="$1"
             ssh $SSH_OPTS "wooo@${K8S_SSH_HOST}" \
               "sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${K8S_API_SERVER} get pods -n observability -l app.kubernetes.io/name=${component} --no-headers -o custom-columns=STATUS:.status.phase"
           }
           capture_observability_statuses() {
             local component="$1"
             local output
             if output="$(collect_observability_statuses "${component}" 2>&1)"; then
               printf '%s' "${output}"
               return 0
             fi
             printf '%s' "${output}"
             return 1
           }
           # 2026-05-19 Codex: the smoke test runs inside CI_IMAGE, but the
           # observability pod checks need the K3s host kubectl context. Capture
           # those read-only statuses on the host and pass them into the
           # container, instead of making the container own kube credentials.
           OBSERVABILITY_PREFLIGHT_ERROR=""
           OTEL_COLLECTOR_ERROR=""
           EVENT_EXPORTER_ERROR=""
           OTEL_COLLECTOR_STATUSES=""
           EVENT_EXPORTER_STATUSES=""
           prepare_deploy_key
           DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"
           if ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${DEPLOY_KNOWN_HOSTS}" 2>/dev/null && test -s "${DEPLOY_KNOWN_HOSTS}"; then
             SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${DEPLOY_KNOWN_HOSTS} -o ConnectTimeout=10"
             if ! OTEL_COLLECTOR_STATUSES="$(capture_observability_statuses otel-collector)"; then
               OTEL_COLLECTOR_ERROR="$(printf '%s' "${OTEL_COLLECTOR_STATUSES}" | tail -1 | head -c 200)"
               OTEL_COLLECTOR_STATUSES=""
             fi
             if ! EVENT_EXPORTER_STATUSES="$(capture_observability_statuses event-exporter)"; then
               EVENT_EXPORTER_ERROR="$(printf '%s' "${EVENT_EXPORTER_STATUSES}" | tail -1 | head -c 200)"
               EVENT_EXPORTER_STATUSES=""
             fi
           else
             OBSERVABILITY_PREFLIGHT_ERROR="K8s host keyscan failed"
             OTEL_COLLECTOR_ERROR="${OBSERVABILITY_PREFLIGHT_ERROR}"
             EVENT_EXPORTER_ERROR="${OBSERVABILITY_PREFLIGHT_ERROR}"
           fi
           SOURCE_LINK_RUN_REF="gitea-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}"
           SOURCE_LINK_CANARY_WORK_ITEM_ID="source-evidence:sentry:upstream_canary:awoooi-source-link-canary-${SOURCE_LINK_RUN_REF}"
           SOURCE_LINK_CANARY_EVENT_ID="sentry:source_correlation_linked:awoooi-source-link-canary-${SOURCE_LINK_RUN_REF}"
           echo "source_link_canary_work_item_id=${SOURCE_LINK_CANARY_WORK_ITEM_ID}" >> "$GITHUB_OUTPUT"
           echo "source_link_canary_event_id=${SOURCE_LINK_CANARY_EVENT_ID}" >> "$GITHUB_OUTPUT"
           AWOOOP_OPERATOR_API_KEY="$(
             ssh $SSH_OPTS "wooo@${K8S_SSH_HOST}" \
               "sudo kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml --server=${K8S_API_SERVER} get secret awoooi-secrets -n awoooi-prod -o jsonpath='{.data.AWOOOP_OPERATOR_API_KEY}' | base64 -d"
           )"
           if [ -z "${AWOOOP_OPERATOR_API_KEY}" ]; then
             echo "❌ AWOOOP_OPERATOR_API_KEY missing from production secret; source-link canary cannot run"
             echo "alert_chain_status=fail" >> $GITHUB_OUTPUT
             exit 0
           fi
           export AWOOOP_OPERATOR_API_KEY
           # 2026-05-05 Codex: use the keepalived VIP instead of a fixed node.
           # Host runner launches the CI image explicitly to avoid act RWLayer=nil.
           if docker run --rm \
             --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-alert-smoke" \
             --cpus "1.0" \
             --memory "1g" \
             -v "$PWD:/workspace" \
             -v awoooi-api-venv-cache:/opt/api-venv \
             -w /workspace \
             -e AWOOOI_OTEL_COLLECTOR_STATUSES="${OTEL_COLLECTOR_STATUSES}" \
             -e AWOOOI_OTEL_COLLECTOR_ERROR="${OTEL_COLLECTOR_ERROR}" \
             -e AWOOOI_EVENT_EXPORTER_STATUSES="${EVENT_EXPORTER_STATUSES}" \
             -e AWOOOI_EVENT_EXPORTER_ERROR="${EVENT_EXPORTER_ERROR}" \
             -e AWOOOP_OPERATOR_API_KEY \
             -e AWOOOP_OPERATOR_ID="gitea-cd-post-deploy" \
             -e SOURCE_LINK_RUN_REF="${SOURCE_LINK_RUN_REF}" \
             "${{ env.CI_IMAGE }}" \
             bash -lc 'set -o pipefail; source /opt/api-venv/bin/activate && python3 scripts/alert_chain_smoke_test.py --api-url ${{ env.ALERT_CHAIN_API_URL }} --source-link-canary-target-incident-id INC-20260505-25E744 --run-ref "${SOURCE_LINK_RUN_REF}" --json | tee /tmp/alert_chain_result.json'; then
             echo "alert_chain_status=pass" >> $GITHUB_OUTPUT
           else
             echo "alert_chain_status=fail" >> $GITHUB_OUTPUT
             echo "⚠️ Alert Chain smoke failed; continuing under commander controlled automation"
             exit 0
           fi
       # Phase O-5 Wave C.2 2026-04-02 ogt: 監控覆蓋率驗證 (generate_monitoring.py --check)
       # 2026-06-28 Codex: coverage remains measured and notified, but no longer
       # turns a deployed runtime into a blocked terminal CD state by default.
       - name: Monitoring Coverage Check
         id: monitoring_coverage
         run: |
           if docker run --rm \
             --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-coverage" \
             --cpus "1.0" \
             --memory "1g" \
             -v "$PWD:/workspace" \
             -v awoooi-api-venv-cache:/opt/api-venv \
             -w /workspace \
             "${{ env.CI_IMAGE }}" \
             bash -lc 'source /opt/api-venv/bin/activate && python3 scripts/generate_monitoring.py --check'; then
             echo "coverage_status=pass" >> $GITHUB_OUTPUT
           else
             echo "coverage_status=fail" >> $GITHUB_OUTPUT
             echo "⚠️ Monitoring coverage check failed; continuing under commander controlled automation"
             exit 0
           fi
       - name: AwoooP Source Correlation Applied-Link Smoke
         id: source_correlation_apply_smoke
         run: |
           SOURCE_LINK_CANARY_WORK_ITEM_ID="${{ steps.alert_chain_smoke.outputs.source_link_canary_work_item_id }}"
           SOURCE_LINK_CANARY_EVENT_ID="${{ steps.alert_chain_smoke.outputs.source_link_canary_event_id }}"
           export SOURCE_LINK_CANARY_WORK_ITEM_ID SOURCE_LINK_CANARY_EVENT_ID
           if docker run --rm \
             --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-source-link-smoke" \
             --cpus "0.5" \
             --memory "512m" \
             -v "$PWD:/workspace" \
             -v awoooi-api-venv-cache:/opt/api-venv \
             -w /workspace \
             -e SOURCE_LINK_CANARY_WORK_ITEM_ID \
             -e SOURCE_LINK_CANARY_EVENT_ID \
             "${{ env.CI_IMAGE }}" \
             bash -lc 'set -o pipefail; source /opt/api-venv/bin/activate && python3 scripts/awooop_source_correlation_apply_smoke.py \
               --api-url ${{ env.ALERT_CHAIN_API_URL }} \
               --target-incident-id INC-20260505-25E744 \
               --work-item-id "${SOURCE_LINK_CANARY_WORK_ITEM_ID}" \
               --expected-source-event-provider-event-id "${SOURCE_LINK_CANARY_EVENT_ID}" \
               --allow-existing-apply \
               --refresh-if-stale-days 6 \
               --refresh-work-item-id "${SOURCE_LINK_CANARY_WORK_ITEM_ID}" \
               --verify-refresh-candidate \
               --reviewer-id gitea_cd_source_link_canary \
               --operator-note "CD dedicated source-link canary; append-only status-chain proof" \
               | tee /tmp/source_correlation_apply_smoke.json'; then
             echo "source_correlation_apply_status=pass" >> $GITHUB_OUTPUT
           else
             echo "source_correlation_apply_status=fail" >> $GITHUB_OUTPUT
             echo "⚠️ Source correlation applied-link smoke failed; continuing under commander controlled automation"
             exit 0
           fi
       # [首席架構師] 新增 Playwright E2E Smoke Test 步驟 v1.0.0 2026-04-01 (台北時間)
       # continue-on-error: true — smoke 失敗不阻塞部署，但結果會反映在 TG 通知
       - name: E2E Smoke Test
         id: smoke
         continue-on-error: true
         run: |
           cat > /tmp/awoooi-smoke.sh <<'CI_SCRIPT'
           set -euo pipefail
           # 首席架構師 Review I4 + 2026-04-05 Claude Code cache優化:
           # playwright.config.ts import @playwright/test — 必須先安裝 pnpm node_modules
           # pnpm store 持久化到 /opt/pnpm-store，pnpm-lock.yaml hash 未變則 --prefer-offline
           SOURCE_WORKDIR=/source
           SMOKE_WORKDIR=/tmp/awoooi-smoke-workspace
           cleanup_smoke_workspace_artifacts() {
             rm -rf "$SMOKE_WORKDIR" \
               /tmp/pnpm-install.log \
               /tmp/playwright-install-deps.log \
 >/dev/null || true
           }
           trap cleanup_smoke_workspace_artifacts EXIT
           rm -rf "$SMOKE_WORKDIR"
           mkdir -p "$SMOKE_WORKDIR"
           if command -v tar >/dev/null 2>&1; then
             tar \
               --exclude='./.git' \
               --exclude='./node_modules' \
               --exclude='./apps/web/node_modules' \
               --exclude='./apps/web/test-results' \
               --exclude='./apps/web/playwright-report' \
               --exclude='./packages/*/node_modules' \
               -cf - -C "$SOURCE_WORKDIR" . | tar -xf - -C "$SMOKE_WORKDIR"
           else
             cp -a "$SOURCE_WORKDIR/." "$SMOKE_WORKDIR/"
             rm -rf "$SMOKE_WORKDIR/.git" \
               "$SMOKE_WORKDIR/node_modules" \
               "$SMOKE_WORKDIR/apps/web/node_modules" \
               "$SMOKE_WORKDIR/apps/web/test-results" \
               "$SMOKE_WORKDIR/apps/web/playwright-report" \
 >/dev/null || true
           fi
           cd "$SMOKE_WORKDIR"
           PNPM_STORE=/opt/pnpm-store
           PNPM_HASH_FILE=/opt/pnpm-store/.lock_hash
           CURRENT_PNPM_HASH=$(md5sum pnpm-lock.yaml | awk '{print $1}')
           corepack enable 2>/dev/null || npm install -g pnpm@9 -q
           mkdir -p "$PNPM_STORE"
           pnpm config set store-dir $PNPM_STORE
           if [ "$(cat $PNPM_HASH_FILE 2>/dev/null)" != "$CURRENT_PNPM_HASH" ]; then
             echo "📦 pnpm lock 已變更，重裝 node_modules..."
             pnpm install --frozen-lockfile 2>&1 | tee /tmp/pnpm-install.log | tail -20
             echo "$CURRENT_PNPM_HASH" > $PNPM_HASH_FILE
           else
             echo "⚡ 使用快取 pnpm store (lock 未變更)，prefer-offline..."
             pnpm install --frozen-lockfile --prefer-offline 2>&1 | tee /tmp/pnpm-install.log | tail -20
           fi
           pnpm --dir apps/web exec node -e "require.resolve('@playwright/test')"
           cd apps/web
           # Playwright Chromium 持久化到 /opt/playwright-browsers，版本 hash guard
           export PLAYWRIGHT_BROWSERS_PATH=/opt/playwright-browsers
           PLAYWRIGHT_VER=$(node -e "console.log(require('./package.json').devDependencies['@playwright/test'] || '')" 2>/dev/null || echo "unknown")
           PLAYWRIGHT_HASH_FILE=/opt/playwright-browsers/.version_hash
           BROWSER_PATH=$(node -e "const { chromium } = require('@playwright/test'); console.log(chromium.executablePath())")
           if [ "$(cat $PLAYWRIGHT_HASH_FILE 2>/dev/null)" != "$PLAYWRIGHT_VER" ] || [ ! -x "$BROWSER_PATH" ]; then
             echo "📦 Playwright browser cache missing/stale ($PLAYWRIGHT_VER): $BROWSER_PATH"
             pnpm exec playwright install chromium --with-deps 2>&1 | tail -20
             BROWSER_PATH=$(node -e "const { chromium } = require('@playwright/test'); console.log(chromium.executablePath())")
             test -x "$BROWSER_PATH" || { echo "❌ Playwright browser executable missing after install: $BROWSER_PATH"; exit 1; }
             echo "$PLAYWRIGHT_VER" > $PLAYWRIGHT_HASH_FILE
           else
             echo "⚡ 使用快取 Playwright Chromium ($PLAYWRIGHT_VER): $BROWSER_PATH"
           fi
           # Browser cache 命中時也要確認 OS shared libs 存在；否則 smoke 會只測到
           # chromium launch failure（例如 libnspr4.so missing）。
           if ! ldconfig -p 2>/dev/null | grep -q 'libnspr4'; then
             echo "📦 Playwright system deps missing，補安裝 Chromium deps..."
             pnpm exec playwright install-deps chromium > /tmp/playwright-install-deps.log 2>&1 || {
               tail -40 /tmp/playwright-install-deps.log
               exit 1
             }
             tail -20 /tmp/playwright-install-deps.log
           fi
           # 對已部署的生產環境跑 smoke test
           SMOKE_STATUS=pass
           pnpm exec playwright test tests/e2e/smoke.spec.ts --reporter=line || SMOKE_STATUS=fail
           echo "smoke_status=${SMOKE_STATUS}" >> $GITHUB_OUTPUT
           CI_SCRIPT
           SMOKE_OUTPUT="$PWD/.awoooi-smoke-output"
           rm -f "$SMOKE_OUTPUT"
           touch "$SMOKE_OUTPUT"
           chmod 666 "$SMOKE_OUTPUT"
           SMOKE_DOCKER_STATUS=0
           # 2026-06-01 Codex: post-deploy smoke can pass, then hang in
           # runner cleanup and incorrectly mark the deploy failed. Bound only
           # the smoke container; preserve pass evidence if it was written.
           if command -v timeout >/dev/null 2>&1; then
             # 2026-06-14 Codex: act-runner host may provide BusyBox timeout,
             # which rejects GNU-only --kill-after. The short -k form works
             # with BusyBox and GNU timeout.
             timeout -k 20s 300s docker run --rm \
               --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-e2e-smoke" \
               --cpus "1.5" \
               --memory "2g" \
               -v "$PWD:/source:ro" \
               -v "$SMOKE_OUTPUT:/github-output" \
               -v /tmp/awoooi-smoke.sh:/tmp/awoooi-smoke.sh:ro \
               -v awoooi-pnpm-store:/opt/pnpm-store \
               -v awoooi-playwright-browsers:/opt/playwright-browsers \
               -w /tmp \
               -e GITHUB_OUTPUT=/github-output \
               -e CI=true \
               -e PLAYWRIGHT_BASE_URL=https://awoooi.wooo.work \
               "${{ env.CI_IMAGE }}" \
               bash /tmp/awoooi-smoke.sh || SMOKE_DOCKER_STATUS=$?
           else
             docker run --rm \
               --name "awoooi-cd-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}-e2e-smoke" \
               --cpus "1.5" \
               --memory "2g" \
               -v "$PWD:/source:ro" \
               -v "$SMOKE_OUTPUT:/github-output" \
               -v /tmp/awoooi-smoke.sh:/tmp/awoooi-smoke.sh:ro \
               -v awoooi-pnpm-store:/opt/pnpm-store \
               -v awoooi-playwright-browsers:/opt/playwright-browsers \
               -w /tmp \
               -e GITHUB_OUTPUT=/github-output \
               -e CI=true \
               -e PLAYWRIGHT_BASE_URL=https://awoooi.wooo.work \
               "${{ env.CI_IMAGE }}" \
               bash /tmp/awoooi-smoke.sh || SMOKE_DOCKER_STATUS=$?
           fi
           if [ "$SMOKE_DOCKER_STATUS" != "0" ] && ! grep -q '^smoke_status=pass$' "$SMOKE_OUTPUT"; then
             echo "smoke_status=fail" > "$SMOKE_OUTPUT"
             echo "E2E smoke container failed before pass evidence: ${SMOKE_DOCKER_STATUS}"
             exit "$SMOKE_DOCKER_STATUS"
           fi
           if [ "$SMOKE_DOCKER_STATUS" != "0" ]; then
             echo "E2E smoke pass evidence was written; treating container exit ${SMOKE_DOCKER_STATUS} as cleanup timeout"
           fi
           cat "$SMOKE_OUTPUT" >> "$GITHUB_OUTPUT"
         env:
           CI: "true"
           # 直接測試已部署的生產環境，不啟動本地 dev server
           PLAYWRIGHT_BASE_URL: "https://awoooi.wooo.work"
       - name: Notify Health Check Success
         env:
           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
           SMOKE_RESULT: ${{ steps.smoke.outputs.smoke_status == 'pass' && '✅' || '⚠️' }}
           ALERT_CHAIN_RESULT: ${{ steps.alert_chain_smoke.outputs.alert_chain_status == 'pass' && '✅' || '⚠️' }}
           MONITORING_RESULT: ${{ steps.monitoring_coverage.outputs.coverage_status == 'pass' && '✅' || '⚠️' }}
           SOURCE_LINK_RESULT: ${{ steps.source_correlation_apply_smoke.outputs.source_correlation_apply_status == 'pass' && '✅' || '⚠️' }}
         run: |
           END_TIME=$(date +%s)
           DURATION=$((END_TIME - ${{ steps.commit.outputs.start_time }}))
           MINUTES=$((DURATION / 60))
           SECONDS=$((DURATION % 60))
           # 2026-04-05 ogt: TG_MSG 必須在 shell 中組裝，才能展開 ${MINUTES}/${SECONDS} 等 shell 變數
           # 2026-04-05 ogt: 移除 parse_mode=HTML，避免 commit message 含特殊字元導致 400
           COMMIT_MSG="${{ steps.commit.outputs.message }}"
           SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
           TG_MSG="✅ AWOOOI 部署完成\n├ 📝 ${COMMIT_MSG}\n├ 🔖 ${SHORT_SHA}\n├ ⏱️ 耗時: ${MINUTES}m ${SECONDS}s\n├ 📦 API: ✅ Web: ✅\n├ 🩺 Health: ✅\n├ 🔗 Alert Chain: ${ALERT_CHAIN_RESULT}\n├ 🧷 Source Link: ${SOURCE_LINK_RESULT}\n├ 📊 Monitoring: ${MONITORING_RESULT}\n└ 🎭 Smoke: ${SMOKE_RESULT}"
           if AWOOI_CICD_STATUS=success \
             AWOOI_CICD_STAGE=post-deploy \
             AWOOI_CICD_JOB_NAME="AWOOOI 部署完成" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_DURATION_SECONDS="${DURATION}" \
             AWOOI_CICD_SUMMARY="API=✅; Web=✅; AlertChain=${ALERT_CHAIN_RESULT}; SourceLink=${SOURCE_LINK_RESULT}; Monitoring=${MONITORING_RESULT}; Smoke=${SMOKE_RESULT}" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD success notification mirrored through AWOOI API"
           else
             echo "AWOOI API notify failed; direct Telegram fallback disabled to preserve AwoooP receipt chain"
           fi
       - name: Notify Pipeline Failure
         # 2026-04-16 ogt + Claude Sonnet 4.6: 改用 HTML 結構化格式
         if: failure()
         env:
           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
         run: |
           COMMIT_MSG="${{ steps.commit.outputs.message }}"
           SHORT_SHA="${{ steps.commit.outputs.short_sha }}"
           ACTOR="${{ github.actor }}"
           COMMIT_ESC=$(echo "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
           MSG=$(printf '❌ <b>AWOOOI 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n├ 🩺 Stage: post-deploy-checks\n└ 🔗 http://192.168.0.110:3001/wooo/awoooi/actions' "${COMMIT_ESC}" "${SHORT_SHA}" "${ACTOR}")
           if AWOOI_CICD_STATUS=failed \
             AWOOI_CICD_STAGE=post-deploy-checks \
             AWOOI_CICD_JOB_NAME="AWOOOI 部署失敗" \
             AWOOI_CICD_COMMIT_SHA="${GITHUB_SHA}" \
             AWOOI_CICD_TRIGGERED_BY="${ACTOR}" \
             AWOOI_CICD_SUMMARY="${COMMIT_MSG}" \
             scripts/ci/notify-awoooi-cicd.sh; then
             echo "✅ CI/CD post-deploy failure notification mirrored through AWOOI API"
           else
             echo "AWOOI API notify failed; direct Telegram fallback disabled to preserve AwoooP receipt chain"
           fi
       - name: Clean Post-Deploy Workspace Artifacts
         if: always()
         env:
           HOST_RUNNER_CLEANUP_IMAGE: ${{ env.CI_IMAGE }}
         run: bash scripts/ci/cleanup-host-runner-workspace.sh

2713 lines 140 KiB YAML Raw Blame History Unescape Escape

2713 lines

140 KiB

YAML

Raw Blame History