fix(ci): remove ubuntu-latest jobs (HARD RULE compliance)
刪除 external-sentinel 和 telegram-connectivity jobs - 禁止 ubuntu-latest (GitHub Billing 限制) - 只保留 self-hosted runner jobs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
167
.github/workflows/runner-healthcheck.yml
vendored
167
.github/workflows/runner-healthcheck.yml
vendored
@@ -1,11 +1,11 @@
|
||||
# =============================================================================
|
||||
# AWOOOI - Self-hosted Runner 健康檢查 Workflow
|
||||
# =============================================================================
|
||||
# 移植自 WOOO AIOps (OPS.157)
|
||||
# 設計原則:
|
||||
# - 「外部哨兵」: 使用 GitHub 託管 runner 監控 self-hosted runner
|
||||
# - 即使 110 主機當機,external-sentinel Job 仍能執行並發送告警
|
||||
# - 每 10 分鐘檢查一次 Runner 狀態
|
||||
# - 只使用 self-hosted runner (禁止 ubuntu-latest)
|
||||
#
|
||||
# 🔴 HARD RULE: 禁止 ubuntu-latest (GitHub Billing 限制)
|
||||
# =============================================================================
|
||||
|
||||
name: Runner Health Check
|
||||
@@ -27,167 +27,6 @@ env:
|
||||
RUNNER_HOST: 192.168.0.110
|
||||
|
||||
jobs:
|
||||
# ===========================================
|
||||
# 外部哨兵 - 已停用 (2026-03-25)
|
||||
# 原因: GitHub Billing 限制,ubuntu-latest 無法使用
|
||||
# ===========================================
|
||||
external-sentinel:
|
||||
name: "External Sentinel (GitHub-hosted)"
|
||||
if: false # 🔴 已停用 - GitHub Billing 限制
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
|
||||
steps:
|
||||
- name: "Check Self-Hosted Runners via API"
|
||||
id: api_check
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
echo "🔍 外部哨兵檢查 Self-Hosted Runners 狀態..."
|
||||
echo "時間: $(date '+%Y-%m-%d %H:%M:%S %Z')"
|
||||
echo ""
|
||||
|
||||
# 獲取 Runner 列表
|
||||
set +e
|
||||
RUNNERS=$(gh api repos/${{ github.repository }}/actions/runners --jq '.runners // []' 2>&1)
|
||||
API_EXIT_CODE=$?
|
||||
set -e
|
||||
|
||||
# 檢查是否是權限錯誤
|
||||
if echo "$RUNNERS" | grep -q "403"; then
|
||||
echo "⚠️ GitHub API 權限不足 (403 Forbidden)"
|
||||
echo "all_healthy=unknown" >> $GITHUB_OUTPUT
|
||||
echo "api_error=permission_denied" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$API_EXIT_CODE" -ne 0 ] || [ -z "$RUNNERS" ] || [ "$RUNNERS" = "[]" ]; then
|
||||
echo "⚠️ 未找到任何 Self-Hosted Runner"
|
||||
echo "offline_count=0" >> $GITHUB_OUTPUT
|
||||
echo "total_runners=0" >> $GITHUB_OUTPUT
|
||||
echo "all_healthy=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 解析 Runner 狀態
|
||||
TOTAL=$(echo "$RUNNERS" | jq 'length')
|
||||
OFFLINE_RUNNERS=$(echo "$RUNNERS" | jq -r '[.[] | select(.status != "online")] | .[].name' | tr '\n' ',' | sed 's/,$//' || echo "")
|
||||
OFFLINE_COUNT=$(echo "$RUNNERS" | jq '[.[] | select(.status != "online")] | length')
|
||||
ONLINE_COUNT=$(echo "$RUNNERS" | jq '[.[] | select(.status == "online")] | length')
|
||||
|
||||
echo "📊 Runner 統計:"
|
||||
echo " - 總數: $TOTAL"
|
||||
echo " - 線上: $ONLINE_COUNT"
|
||||
echo " - 離線: $OFFLINE_COUNT"
|
||||
echo ""
|
||||
|
||||
# 列出所有 Runner 詳情
|
||||
echo "📋 Runner 詳情:"
|
||||
echo "$RUNNERS" | jq -r '.[] | " - \(.name): \(.status) (busy: \(.busy))"'
|
||||
|
||||
# 設定輸出變數
|
||||
echo "offline_count=$OFFLINE_COUNT" >> $GITHUB_OUTPUT
|
||||
echo "offline_runners=${OFFLINE_RUNNERS:-none}" >> $GITHUB_OUTPUT
|
||||
echo "total_runners=$TOTAL" >> $GITHUB_OUTPUT
|
||||
echo "online_count=$ONLINE_COUNT" >> $GITHUB_OUTPUT
|
||||
|
||||
if [ "$OFFLINE_COUNT" -eq 0 ]; then
|
||||
echo "all_healthy=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "all_healthy=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: "Send Critical Alert if Runners Offline"
|
||||
if: steps.api_check.outputs.all_healthy == 'false'
|
||||
env:
|
||||
TELEGRAM_TOKEN: ${{ secrets.OPENCLAW_TG_BOT_TOKEN }}
|
||||
TELEGRAM_CHAT_ID: ${{ secrets.OPENCLAW_TG_CHAT_ID }}
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
run: |
|
||||
echo "🚨 發現離線 Runner,發送緊急告警..."
|
||||
|
||||
OFFLINE_COUNT="${{ steps.api_check.outputs.offline_count }}"
|
||||
OFFLINE_RUNNERS="${{ steps.api_check.outputs.offline_runners }}"
|
||||
TOTAL_RUNNERS="${{ steps.api_check.outputs.total_runners }}"
|
||||
|
||||
MESSAGE=$(cat <<MSGEOF
|
||||
🚨 [AWOOOI] Runner 離線告警
|
||||
|
||||
📍 主機: 192.168.0.110
|
||||
🔴 受影響: ${OFFLINE_RUNNERS}
|
||||
📊 離線: ${OFFLINE_COUNT}/${TOTAL_RUNNERS}
|
||||
|
||||
⚠️ 影響: CI/CD 部署已停擺
|
||||
|
||||
🔧 修復:
|
||||
ssh wooo@192.168.0.110 'cd actions-runner-awoooi && ./run.sh'
|
||||
|
||||
🔗 ${RUN_URL}
|
||||
MSGEOF
|
||||
)
|
||||
|
||||
if [ -n "$TELEGRAM_TOKEN" ] && [ -n "$TELEGRAM_CHAT_ID" ]; then
|
||||
curl -sf -X POST "https://api.telegram.org/bot${TELEGRAM_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"chat_id\":\"${TELEGRAM_CHAT_ID}\",\"text\":\"${MESSAGE}\",\"disable_web_page_preview\":true}" \
|
||||
&& echo "✅ Telegram 告警發送成功" || echo "⚠️ Telegram 告警發送失敗"
|
||||
fi
|
||||
|
||||
exit 1
|
||||
|
||||
- name: "Report All Healthy"
|
||||
if: steps.api_check.outputs.all_healthy == 'true'
|
||||
run: |
|
||||
echo "✅ 外部哨兵確認: 所有 Runner 運行正常"
|
||||
echo " - 線上: ${{ steps.api_check.outputs.online_count }} / ${{ steps.api_check.outputs.total_runners }}"
|
||||
|
||||
# ===========================================
|
||||
# Telegram 連通性監控 - 已停用 (2026-03-25)
|
||||
# 原因: GitHub Billing 限制,ubuntu-latest 無法使用
|
||||
# ===========================================
|
||||
telegram-connectivity:
|
||||
name: "Telegram Connectivity Check"
|
||||
if: false # 🔴 已停用 - GitHub Billing 限制
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
|
||||
steps:
|
||||
- name: "Check Telegram API Connectivity"
|
||||
id: telegram_api
|
||||
env:
|
||||
TELEGRAM_TOKEN: ${{ secrets.OPENCLAW_TG_BOT_TOKEN }}
|
||||
run: |
|
||||
echo "🔍 檢查 Telegram API 連通性..."
|
||||
|
||||
RESPONSE=$(curl -sf --max-time 10 \
|
||||
"https://api.telegram.org/bot${TELEGRAM_TOKEN}/getMe" 2>&1 || echo "FAILED")
|
||||
|
||||
if echo "$RESPONSE" | grep -q '"ok":true'; then
|
||||
BOT_NAME=$(echo "$RESPONSE" | jq -r '.result.username // "unknown"')
|
||||
echo "✅ Telegram Bot 有效: @$BOT_NAME"
|
||||
echo "telegram_api=healthy" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "❌ Telegram API 無法連接"
|
||||
echo "telegram_api=unhealthy" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: "Test OpenClaw Telegram Relay"
|
||||
id: openclaw_relay
|
||||
run: |
|
||||
echo "🔍 測試 OpenClaw Telegram 轉發能力..."
|
||||
|
||||
RESPONSE=$(curl -sf --max-time 15 \
|
||||
"${{ env.OPENCLAW_URL }}/api/v1/health/telegram" 2>&1 || echo "FAILED")
|
||||
|
||||
if echo "$RESPONSE" | grep -q '"telegram"'; then
|
||||
TELE_STATUS=$(echo "$RESPONSE" | jq -r '.telegram // "unknown"')
|
||||
echo "openclaw_telegram=$TELE_STATUS" >> $GITHUB_OUTPUT
|
||||
echo "OpenClaw Telegram 狀態: $TELE_STATUS"
|
||||
else
|
||||
echo "openclaw_telegram=unreachable" >> $GITHUB_OUTPUT
|
||||
echo "❌ 無法連接 OpenClaw"
|
||||
fi
|
||||
|
||||
# ===========================================
|
||||
# 內部健康檢查 - 在 Self-hosted Runner 執行
|
||||
# ===========================================
|
||||
|
||||
Reference in New Issue
Block a user