feat(api): POST /playbooks/ 建立端點 + seed-repair-playbooks.py (Task 14)
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 57s
CD Pipeline / Deploy Prometheus Alert Rules (push) Has been skipped

- playbooks.py: 新增 POST / 端點供直接建立 Playbook (seed/管理用)
- seed-repair-playbooks.py: 5個 Host Repair Playbooks (ssh_command)
  sentry/harbor/gitea/alertmanager (110) + openclaw (188)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-05 11:53:49 +08:00
parent e7a0727ab0
commit 76f7330c9d
2 changed files with 175 additions and 0 deletions

View File

@@ -0,0 +1,152 @@
"""
scripts/ops/seed-repair-playbooks.py
建立 Sprint 3 Host Repair Playbooks
2026-04-05 Claude Code: Sprint 3 Host Auto-Repair
用法: python3 scripts/ops/seed-repair-playbooks.py
需要: AWOOOI API 可訪問 (http://192.168.0.121:32334)
"""
import json
import urllib.request
import urllib.error
API_BASE = "http://192.168.0.121:32334"
# ssh_command 格式: "layer/component" → auto_repair_service._execute_step 解析
PLAYBOOKS = [
{
"name": "sentry-down-repair",
"description": "Sentry (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["SentryDown"],
"affected_services": ["sentry"],
"keywords": ["SentryDown", "sentry", "9000"],
"label_patterns": {"layer": "docker-110", "component": "sentry"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/sentry",
"description": "SSH 到 110docker compose up -d Sentry",
"risk_level": "LOW",
}
],
"tags": ["sentry", "docker-110", "auto-repair"],
},
{
"name": "harbor-down-repair",
"description": "Harbor Registry (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["HarborDown"],
"affected_services": ["harbor"],
"keywords": ["HarborDown", "harbor", "5000", "ImagePullBackOff"],
"label_patterns": {"layer": "docker-110", "component": "harbor"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/harbor",
"description": "SSH 到 110docker compose up -d Harbor",
"risk_level": "LOW",
}
],
"tags": ["harbor", "docker-110", "auto-repair", "registry"],
},
{
"name": "gitea-down-repair",
"description": "Gitea (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["GiteaDown"],
"affected_services": ["gitea"],
"keywords": ["GiteaDown", "gitea", "3001"],
"label_patterns": {"layer": "docker-110", "component": "gitea"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/gitea",
"description": "SSH 到 110docker compose up -d Gitea",
"risk_level": "LOW",
}
],
"tags": ["gitea", "docker-110", "auto-repair"],
},
{
"name": "alertmanager-down-repair",
"description": "Alertmanager (110) 離線自動修復",
"symptom_pattern": {
"alert_names": ["AlertmanagerDown"],
"affected_services": ["alertmanager"],
"keywords": ["AlertmanagerDown", "alertmanager", "9093"],
"label_patterns": {"layer": "docker-110", "component": "alertmanager"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-110/alertmanager",
"description": "SSH 到 110docker compose up -d monitoring (含 Alertmanager)",
"risk_level": "LOW",
}
],
"tags": ["alertmanager", "docker-110", "auto-repair", "critical-infra"],
},
{
"name": "openclaw-down-repair",
"description": "OpenClaw (188) 離線自動修復",
"symptom_pattern": {
"alert_names": ["OpenClawDown"],
"affected_services": ["openclaw"],
"keywords": ["OpenClawDown", "openclaw", "8088"],
"label_patterns": {"layer": "docker-188", "component": "openclaw"},
},
"repair_steps": [
{
"step_number": 1,
"action_type": "ssh_command",
"command": "docker-188/openclaw",
"description": "SSH 到 188docker compose up -d OpenClaw",
"risk_level": "LOW",
}
],
"tags": ["openclaw", "docker-188", "auto-repair"],
},
]
def create_playbook(playbook_data: dict) -> bool:
"""透過 API 建立 Playbook"""
data = json.dumps(playbook_data).encode()
req = urllib.request.Request(
f"{API_BASE}/api/v1/playbooks/",
data=data,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
result = json.loads(resp.read())
print(f" OK Created: {playbook_data['name']} (id: {result.get('playbook_id', '?')})")
return True
except urllib.error.HTTPError as e:
body = e.read().decode()
if "already exists" in body or e.code == 409:
print(f" -- Already exists: {playbook_data['name']}")
return True
print(f" ER Failed: {playbook_data['name']} -- HTTP {e.code}: {body[:100]}")
return False
except Exception as e:
print(f" ER Error: {playbook_data['name']} -- {e}")
return False
if __name__ == "__main__":
print("=== 建立 Host Repair Playbooks ===")
success = 0
for pb in PLAYBOOKS:
if create_playbook(pb):
success += 1
print(f"\n結果: {success}/{len(PLAYBOOKS)} playbooks 建立成功")