fix(ci): remove ubuntu-latest jobs (HARD RULE compliance)

刪除 external-sentinel 和 telegram-connectivity jobs
- 禁止 ubuntu-latest (GitHub Billing 限制)
- 只保留 self-hosted runner jobs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-03-25 02:13:55 +08:00
parent ad00eda73b
commit 5f3271174f

View File

@@ -1,11 +1,11 @@
# =============================================================================
# AWOOOI - Self-hosted Runner 健康檢查 Workflow
# =============================================================================
# 移植自 WOOO AIOps (OPS.157)
# 設計原則:
# - 「外部哨兵」: 使用 GitHub 託管 runner 監控 self-hosted runner
# - 即使 110 主機當機external-sentinel Job 仍能執行並發送告警
# - 每 10 分鐘檢查一次 Runner 狀態
# - 只使用 self-hosted runner (禁止 ubuntu-latest)
#
# 🔴 HARD RULE: 禁止 ubuntu-latest (GitHub Billing 限制)
# =============================================================================
name: Runner Health Check
@@ -27,167 +27,6 @@ env:
RUNNER_HOST: 192.168.0.110
jobs:
# ===========================================
# 外部哨兵 - 已停用 (2026-03-25)
# 原因: GitHub Billing 限制ubuntu-latest 無法使用
# ===========================================
external-sentinel:
name: "External Sentinel (GitHub-hosted)"
if: false # 🔴 已停用 - GitHub Billing 限制
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: "Check Self-Hosted Runners via API"
id: api_check
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "🔍 外部哨兵檢查 Self-Hosted Runners 狀態..."
echo "時間: $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo ""
# 獲取 Runner 列表
set +e
RUNNERS=$(gh api repos/${{ github.repository }}/actions/runners --jq '.runners // []' 2>&1)
API_EXIT_CODE=$?
set -e
# 檢查是否是權限錯誤
if echo "$RUNNERS" | grep -q "403"; then
echo "⚠️ GitHub API 權限不足 (403 Forbidden)"
echo "all_healthy=unknown" >> $GITHUB_OUTPUT
echo "api_error=permission_denied" >> $GITHUB_OUTPUT
exit 0
fi
if [ "$API_EXIT_CODE" -ne 0 ] || [ -z "$RUNNERS" ] || [ "$RUNNERS" = "[]" ]; then
echo "⚠️ 未找到任何 Self-Hosted Runner"
echo "offline_count=0" >> $GITHUB_OUTPUT
echo "total_runners=0" >> $GITHUB_OUTPUT
echo "all_healthy=true" >> $GITHUB_OUTPUT
exit 0
fi
# 解析 Runner 狀態
TOTAL=$(echo "$RUNNERS" | jq 'length')
OFFLINE_RUNNERS=$(echo "$RUNNERS" | jq -r '[.[] | select(.status != "online")] | .[].name' | tr '\n' ',' | sed 's/,$//' || echo "")
OFFLINE_COUNT=$(echo "$RUNNERS" | jq '[.[] | select(.status != "online")] | length')
ONLINE_COUNT=$(echo "$RUNNERS" | jq '[.[] | select(.status == "online")] | length')
echo "📊 Runner 統計:"
echo " - 總數: $TOTAL"
echo " - 線上: $ONLINE_COUNT"
echo " - 離線: $OFFLINE_COUNT"
echo ""
# 列出所有 Runner 詳情
echo "📋 Runner 詳情:"
echo "$RUNNERS" | jq -r '.[] | " - \(.name): \(.status) (busy: \(.busy))"'
# 設定輸出變數
echo "offline_count=$OFFLINE_COUNT" >> $GITHUB_OUTPUT
echo "offline_runners=${OFFLINE_RUNNERS:-none}" >> $GITHUB_OUTPUT
echo "total_runners=$TOTAL" >> $GITHUB_OUTPUT
echo "online_count=$ONLINE_COUNT" >> $GITHUB_OUTPUT
if [ "$OFFLINE_COUNT" -eq 0 ]; then
echo "all_healthy=true" >> $GITHUB_OUTPUT
else
echo "all_healthy=false" >> $GITHUB_OUTPUT
fi
- name: "Send Critical Alert if Runners Offline"
if: steps.api_check.outputs.all_healthy == 'false'
env:
TELEGRAM_TOKEN: ${{ secrets.OPENCLAW_TG_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.OPENCLAW_TG_CHAT_ID }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
echo "🚨 發現離線 Runner發送緊急告警..."
OFFLINE_COUNT="${{ steps.api_check.outputs.offline_count }}"
OFFLINE_RUNNERS="${{ steps.api_check.outputs.offline_runners }}"
TOTAL_RUNNERS="${{ steps.api_check.outputs.total_runners }}"
MESSAGE=$(cat <<MSGEOF
🚨 [AWOOOI] Runner 離線告警
📍 主機: 192.168.0.110
🔴 受影響: ${OFFLINE_RUNNERS}
📊 離線: ${OFFLINE_COUNT}/${TOTAL_RUNNERS}
⚠️ 影響: CI/CD 部署已停擺
🔧 修復:
ssh wooo@192.168.0.110 'cd actions-runner-awoooi && ./run.sh'
🔗 ${RUN_URL}
MSGEOF
)
if [ -n "$TELEGRAM_TOKEN" ] && [ -n "$TELEGRAM_CHAT_ID" ]; then
curl -sf -X POST "https://api.telegram.org/bot${TELEGRAM_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "{\"chat_id\":\"${TELEGRAM_CHAT_ID}\",\"text\":\"${MESSAGE}\",\"disable_web_page_preview\":true}" \
&& echo "✅ Telegram 告警發送成功" || echo "⚠️ Telegram 告警發送失敗"
fi
exit 1
- name: "Report All Healthy"
if: steps.api_check.outputs.all_healthy == 'true'
run: |
echo "✅ 外部哨兵確認: 所有 Runner 運行正常"
echo " - 線上: ${{ steps.api_check.outputs.online_count }} / ${{ steps.api_check.outputs.total_runners }}"
# ===========================================
# Telegram 連通性監控 - 已停用 (2026-03-25)
# 原因: GitHub Billing 限制ubuntu-latest 無法使用
# ===========================================
telegram-connectivity:
name: "Telegram Connectivity Check"
if: false # 🔴 已停用 - GitHub Billing 限制
runs-on: ubuntu-latest
timeout-minutes: 3
steps:
- name: "Check Telegram API Connectivity"
id: telegram_api
env:
TELEGRAM_TOKEN: ${{ secrets.OPENCLAW_TG_BOT_TOKEN }}
run: |
echo "🔍 檢查 Telegram API 連通性..."
RESPONSE=$(curl -sf --max-time 10 \
"https://api.telegram.org/bot${TELEGRAM_TOKEN}/getMe" 2>&1 || echo "FAILED")
if echo "$RESPONSE" | grep -q '"ok":true'; then
BOT_NAME=$(echo "$RESPONSE" | jq -r '.result.username // "unknown"')
echo "✅ Telegram Bot 有效: @$BOT_NAME"
echo "telegram_api=healthy" >> $GITHUB_OUTPUT
else
echo "❌ Telegram API 無法連接"
echo "telegram_api=unhealthy" >> $GITHUB_OUTPUT
fi
- name: "Test OpenClaw Telegram Relay"
id: openclaw_relay
run: |
echo "🔍 測試 OpenClaw Telegram 轉發能力..."
RESPONSE=$(curl -sf --max-time 15 \
"${{ env.OPENCLAW_URL }}/api/v1/health/telegram" 2>&1 || echo "FAILED")
if echo "$RESPONSE" | grep -q '"telegram"'; then
TELE_STATUS=$(echo "$RESPONSE" | jq -r '.telegram // "unknown"')
echo "openclaw_telegram=$TELE_STATUS" >> $GITHUB_OUTPUT
echo "OpenClaw Telegram 狀態: $TELE_STATUS"
else
echo "openclaw_telegram=unreachable" >> $GITHUB_OUTPUT
echo "❌ 無法連接 OpenClaw"
fi
# ===========================================
# 內部健康檢查 - 在 Self-hosted Runner 執行
# ===========================================