All checks were successful
CD Pipeline / deploy (push) Successful in 1m14s
- Added AiderHealExecutor for SSH remote execution of aider-chat - Added CODE_FIX action_type to AutoHealService - Added code_exception trigger to Elephant Alpha engine (Traceback log scanning) - Added 014 playbook migration script
307 lines
12 KiB
Python
307 lines
12 KiB
Python
"""
|
||
services/aider_heal_executor.py
|
||
ADR-014: Autonomous Code Heal Pipeline
|
||
|
||
透過 SSH 在 110 主機執行 Aider,自動修復 momo-pro repo 的程式碼問題,
|
||
修復後直接 git push,觸發 Gitea CD Pipeline 部署。
|
||
|
||
安全護欄:
|
||
L1 - 檔案白名單(只改 services/ routes/ database/ 內 .py)
|
||
L2 - diff 限制(>50 行 → 拒絕,不 push)
|
||
L3 - 每小時最多 5 次 CODE_FIX
|
||
L4 - health check 失敗 → 自動 git revert + push
|
||
L5 - Telegram 通知每次修復結果(成功/失敗/回滾)
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import time
|
||
import subprocess
|
||
import threading
|
||
import requests
|
||
from datetime import datetime, timedelta
|
||
from typing import Optional
|
||
from services.logger_manager import SystemLogger
|
||
|
||
logger = SystemLogger("AiderHealExecutor").get_logger()
|
||
|
||
# ── 設定 ──────────────────────────────────────────────────────────────────────
|
||
HEAL_SSH_HOST = "192.168.0.110"
|
||
HEAL_SSH_USER = "wooo"
|
||
HEAL_SSH_KEY = os.getenv("DEPLOY_SSH_KEY_PATH", "/root/.ssh/id_deploy")
|
||
|
||
REPO_PATH_110 = os.getenv("AIDER_REPO_PATH", "/home/wooo/ewoooc")
|
||
GITEA_REMOTE = "origin"
|
||
HEALTH_CHECK_URL = os.getenv("MOMO_BASE_URL", "https://mo.wooo.work") + "/health"
|
||
|
||
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
|
||
AIDER_MODEL = os.getenv("AIDER_MODEL", "gemini/gemini-2.0-flash")
|
||
|
||
MAX_DIFF_LINES = int(os.getenv("AIDER_MAX_DIFF_LINES", "50"))
|
||
MAX_HOURLY_FIX = int(os.getenv("AIDER_MAX_HOURLY_FIX", "5"))
|
||
|
||
TELEGRAM_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN", "")
|
||
TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID", "")
|
||
|
||
# 允許 Aider 修改的路徑(正則)
|
||
ALLOWED_FILE_PATTERN = re.compile(
|
||
r'^(services|routes|database)/[a-zA-Z0-9_]+\.py$'
|
||
)
|
||
|
||
# ── 速率計數器(執行緒安全) ────────────────────────────────────────────────
|
||
_lock = threading.Lock()
|
||
_fix_history: list[datetime] = []
|
||
|
||
|
||
def _check_rate_limit() -> bool:
|
||
"""回傳 True 表示尚未超限,可執行修復。"""
|
||
now = datetime.utcnow()
|
||
cutoff = now - timedelta(hours=1)
|
||
with _lock:
|
||
global _fix_history
|
||
_fix_history = [t for t in _fix_history if t > cutoff]
|
||
if len(_fix_history) >= MAX_HOURLY_FIX:
|
||
return False
|
||
_fix_history.append(now)
|
||
return True
|
||
|
||
|
||
def _notify_telegram(msg: str):
|
||
"""發送 Telegram 通知(非阻塞,忽略失敗)"""
|
||
if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID:
|
||
return
|
||
try:
|
||
requests.post(
|
||
f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage",
|
||
json={"chat_id": TELEGRAM_CHAT_ID, "text": msg, "parse_mode": "HTML"},
|
||
timeout=5
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _ssh_run(cmd: str, timeout: int = 60) -> tuple[int, str, str]:
|
||
"""在 110 主機執行指令,回傳 (returncode, stdout, stderr)"""
|
||
full_cmd = [
|
||
"ssh",
|
||
"-i", HEAL_SSH_KEY,
|
||
"-o", "StrictHostKeyChecking=no",
|
||
"-o", "ConnectTimeout=10",
|
||
f"{HEAL_SSH_USER}@{HEAL_SSH_HOST}",
|
||
cmd
|
||
]
|
||
try:
|
||
result = subprocess.run(
|
||
full_cmd, capture_output=True, text=True, timeout=timeout
|
||
)
|
||
return result.returncode, result.stdout.strip(), result.stderr.strip()
|
||
except subprocess.TimeoutExpired:
|
||
return -1, "", f"SSH timeout after {timeout}s"
|
||
except Exception as e:
|
||
return -1, "", str(e)
|
||
|
||
|
||
def _health_check(retries: int = 6, interval: int = 10) -> bool:
|
||
"""等待健康檢查通過,最多 retries * interval 秒"""
|
||
for i in range(retries):
|
||
try:
|
||
r = requests.get(HEALTH_CHECK_URL, timeout=10)
|
||
if r.status_code == 200:
|
||
return True
|
||
except Exception:
|
||
pass
|
||
if i < retries - 1:
|
||
time.sleep(interval)
|
||
return False
|
||
|
||
|
||
def execute_code_fix(
|
||
error_type: str,
|
||
error_message: str,
|
||
target_file: str,
|
||
context: dict | None = None,
|
||
) -> dict:
|
||
"""
|
||
主要入口:針對指定檔案執行 Aider 自動修復並推版。
|
||
|
||
Args:
|
||
error_type: 錯誤類型(如 'ImportError', 'RuntimeError')
|
||
error_message: 完整錯誤訊息(來自容器 log)
|
||
target_file: 相對於 repo root 的檔案路徑(如 'services/pchome_crawler.py')
|
||
context: 額外上下文字典(可選)
|
||
|
||
Returns:
|
||
{
|
||
'success': bool,
|
||
'action': 'CODE_FIX',
|
||
'message': str,
|
||
'commit_sha': str | None,
|
||
'reverted': bool,
|
||
}
|
||
"""
|
||
ts = datetime.now().strftime("%Y/%m/%d %H:%M:%S")
|
||
ctx = context or {}
|
||
|
||
# L1:檔案白名單
|
||
if not ALLOWED_FILE_PATTERN.match(target_file):
|
||
reason = f"[AiderHeal] 檔案不在白名單: {target_file}"
|
||
logger.warning(reason)
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": reason, "commit_sha": None, "reverted": False}
|
||
|
||
# L3:速率限制
|
||
if not _check_rate_limit():
|
||
reason = f"[AiderHeal] 每小時上限 {MAX_HOURLY_FIX} 次,跳過"
|
||
logger.warning(reason)
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": reason, "commit_sha": None, "reverted": False}
|
||
|
||
_notify_telegram(
|
||
f"🔧 <b>AiderHeal 啟動</b>\n"
|
||
f"├ 錯誤類型: <code>{error_type}</code>\n"
|
||
f"├ 目標檔案: <code>{target_file}</code>\n"
|
||
f"└ 時間: {ts}"
|
||
)
|
||
logger.info("[AiderHeal] 開始修復: %s → %s", error_type, target_file)
|
||
|
||
# ── Step 1:準備 repo(在 110 上)──────────────────────────────────────────
|
||
setup_cmds = (
|
||
f"cd {REPO_PATH_110} && "
|
||
f"git fetch {GITEA_REMOTE} main 2>&1 && "
|
||
f"git reset --hard {GITEA_REMOTE}/main 2>&1 && "
|
||
f"git stash 2>&1 || true"
|
||
)
|
||
rc, out, err = _ssh_run(setup_cmds, timeout=30)
|
||
if rc != 0:
|
||
msg = f"[AiderHeal] git 準備失敗: {err or out}"
|
||
logger.error(msg)
|
||
_notify_telegram(f"❌ AiderHeal 失敗(git 準備)\n<code>{msg}</code>")
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": msg, "commit_sha": None, "reverted": False}
|
||
|
||
# ── Step 2:組裝 Aider 指令 ────────────────────────────────────────────────
|
||
# 截斷 error_message,避免 shell 注入問題
|
||
safe_error = error_message[:500].replace('"', "'").replace('`', "'").replace('$', '')
|
||
instruction = (
|
||
f"Fix the following {error_type} in this file. "
|
||
f"Only fix what is necessary, do not refactor or add features. "
|
||
f"Error: {safe_error}"
|
||
)
|
||
|
||
aider_cmd = (
|
||
f"cd {REPO_PATH_110} && "
|
||
f"GEMINI_API_KEY={GEMINI_API_KEY} "
|
||
f"aider --model {AIDER_MODEL} "
|
||
f"--yes-always --no-git "
|
||
f'--message "{instruction}" '
|
||
f"{target_file} 2>&1"
|
||
)
|
||
logger.info("[AiderHeal] 執行 aider on 110...")
|
||
rc, aider_out, aider_err = _ssh_run(aider_cmd, timeout=180)
|
||
logger.info("[AiderHeal] aider 輸出: %s", (aider_out or aider_err)[:300])
|
||
|
||
# ── Step 3:diff 行數檢查(L2 護欄)───────────────────────────────────────
|
||
diff_cmd = f"cd {REPO_PATH_110} && git diff --unified=0 | wc -l"
|
||
rc2, diff_lines_str, _ = _ssh_run(diff_cmd)
|
||
diff_lines = int(diff_lines_str.strip()) if diff_lines_str.strip().isdigit() else 999
|
||
|
||
if diff_lines == 0:
|
||
msg = f"[AiderHeal] Aider 未產生任何修改(diff=0行),可能已自動解決或模型失效"
|
||
logger.warning(msg)
|
||
_notify_telegram(f"⚠️ AiderHeal:無修改產生\n<code>{target_file}</code>")
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": msg, "commit_sha": None, "reverted": False}
|
||
|
||
if diff_lines > MAX_DIFF_LINES:
|
||
# 改動太大,丟棄並升級告警
|
||
_ssh_run(f"cd {REPO_PATH_110} && git checkout -- .", timeout=10)
|
||
msg = (f"[AiderHeal] diff 超出限制 {diff_lines}>{MAX_DIFF_LINES} 行,"
|
||
f"已丟棄,需人工介入")
|
||
logger.warning(msg)
|
||
_notify_telegram(
|
||
f"⚠️ <b>AiderHeal:diff 過大,需人工審核</b>\n"
|
||
f"├ 檔案: <code>{target_file}</code>\n"
|
||
f"├ diff: {diff_lines} 行(上限 {MAX_DIFF_LINES})\n"
|
||
f"└ 錯誤: <code>{error_type}</code>"
|
||
)
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": msg, "commit_sha": None, "reverted": False}
|
||
|
||
# ── Step 4:git commit + push ──────────────────────────────────────────────
|
||
fix_msg = (
|
||
f"fix(autoheal): [{error_type}] auto-fix {target_file}\n\n"
|
||
f"Triggered by AiderHealExecutor (ADR-014)\n"
|
||
f"Error: {safe_error[:200]}"
|
||
)
|
||
commit_cmd = (
|
||
f"cd {REPO_PATH_110} && "
|
||
f'git add {target_file} && '
|
||
f'git commit -m "{fix_msg}" 2>&1 && '
|
||
f"git push {GITEA_REMOTE} main 2>&1"
|
||
)
|
||
rc3, commit_out, commit_err = _ssh_run(commit_cmd, timeout=30)
|
||
|
||
# 取得 commit SHA
|
||
sha_cmd = f"cd {REPO_PATH_110} && git rev-parse --short HEAD"
|
||
_, commit_sha, _ = _ssh_run(sha_cmd)
|
||
commit_sha = commit_sha.strip() or "unknown"
|
||
|
||
if rc3 != 0:
|
||
msg = f"[AiderHeal] git push 失敗: {commit_err or commit_out}"
|
||
logger.error(msg)
|
||
_notify_telegram(f"❌ AiderHeal git push 失敗\n<code>{msg}</code>")
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": msg, "commit_sha": None, "reverted": False}
|
||
|
||
logger.info("[AiderHeal] push 成功,commit=%s,等待健康檢查...", commit_sha)
|
||
_notify_telegram(
|
||
f"🚀 <b>AiderHeal push 完成</b>\n"
|
||
f"├ commit: <code>{commit_sha}</code>\n"
|
||
f"├ 檔案: <code>{target_file}</code>\n"
|
||
f"└ 等待健康檢查..."
|
||
)
|
||
|
||
# ── Step 5:健康檢查(L4 護欄)────────────────────────────────────────────
|
||
time.sleep(20) # 等 CD 部署啟動
|
||
healthy = _health_check(retries=6, interval=10)
|
||
|
||
if healthy:
|
||
msg = f"[AiderHeal] 修復成功並部署完成: {target_file} ({commit_sha})"
|
||
logger.info(msg)
|
||
_notify_telegram(
|
||
f"✅ <b>AiderHeal 修復完成</b>\n"
|
||
f"├ 錯誤: <code>{error_type}</code>\n"
|
||
f"├ 檔案: <code>{target_file}</code>\n"
|
||
f"├ commit: <code>{commit_sha}</code>\n"
|
||
f"└ diff: {diff_lines} 行"
|
||
)
|
||
return {"success": True, "action": "CODE_FIX",
|
||
"message": msg, "commit_sha": commit_sha, "reverted": False}
|
||
|
||
# ── Step 6:健康檢查失敗 → 自動 revert(L4 護欄)─────────────────────────
|
||
logger.error("[AiderHeal] 健康檢查失敗,執行自動 revert...")
|
||
revert_cmd = (
|
||
f"cd {REPO_PATH_110} && "
|
||
f"git revert --no-edit {commit_sha} 2>&1 && "
|
||
f"git push {GITEA_REMOTE} main 2>&1"
|
||
)
|
||
rc4, rev_out, rev_err = _ssh_run(revert_cmd, timeout=30)
|
||
if rc4 == 0:
|
||
_, revert_sha, _ = _ssh_run(sha_cmd)
|
||
revert_sha = revert_sha.strip()
|
||
msg = f"[AiderHeal] 健康檢查失敗,已自動 revert: {commit_sha} → {revert_sha}"
|
||
logger.warning(msg)
|
||
_notify_telegram(
|
||
f"🔄 <b>AiderHeal 自動回滾</b>\n"
|
||
f"├ 原 commit: <code>{commit_sha}</code>\n"
|
||
f"├ 回滾 commit: <code>{revert_sha}</code>\n"
|
||
f"└ 需人工排查: <code>{error_type}</code> in <code>{target_file}</code>"
|
||
)
|
||
else:
|
||
msg = f"[AiderHeal] revert 失敗!需立即人工介入: {rev_err}"
|
||
logger.critical(msg)
|
||
_notify_telegram(f"🚨 <b>AiderHeal revert 失敗!請立即人工介入</b>\n<code>{msg}</code>")
|
||
|
||
return {"success": False, "action": "CODE_FIX",
|
||
"message": msg, "commit_sha": commit_sha, "reverted": rc4 == 0}
|