feat(backup): 全面自動化備份 + AWOOOI DB + GFS 延長保留

首席架構師備份審計 — 全部自動化完成:

- backup-awoooi.sh:新增 AWOOOI PostgreSQL 備份腳本
  - awoooi_prod (KB/事故/AutoRepair/Drift) + k3s_datastore
  - 從 110 SSH 到 188 執行 pg_dump,整合進 restic
  - 首次執行:680K,9s,snapshot 8750748f 

- backup-all.sh v2.0:整合第 4 個服務 AWOOOI DB

- GFS 保留策略延長:
  - 每日 7→30 份(覆蓋最近 30 天)
  - 每週 4→12 份(覆蓋最近 3 個月)
  - 每月 6→24 份(覆蓋最近 2 年)

- BACKUP-STATUS.md:更新為全自動化狀態總覽

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-05 01:11:31 +08:00
parent 84cfdb6195
commit 3136fc5ea0
6 changed files with 265 additions and 206 deletions

80
scripts/backup/backup-all.sh Executable file
View File

@@ -0,0 +1,80 @@
#!/bin/bash
# =============================================================================
# WOOO AIOps - 全服務備份總控腳本
# 版本: 2.0.0
# 建立日期: 2026-03-12
# 2026-04-05 Claude Code: 加入 AWOOOI DB (v1→v2) — 首席架構師備份審計
# =============================================================================
set -euo pipefail
# 載入共用函式
source "$(dirname "$0")/common.sh"
# -----------------------------------------------------------------------------
# 主函式
# -----------------------------------------------------------------------------
main() {
local start_time=$(date +%s)
local failed=0
local total=4
log_info "╔══════════════════════════════════════════════════════════════╗"
log_info "║ WOOO AIOps - 全服務備份開始 (v2.0) ║"
log_info "╚══════════════════════════════════════════════════════════════╝"
# 備份 Gitea
log_info ">>> [1/${total}] 備份 Gitea..."
if /backup/scripts/backup-gitea.sh; then
log_success " Gitea 備份成功"
else
log_error " Gitea 備份失敗"
((failed++))
fi
# 備份 MOMO Pro
log_info ">>> [2/${total}] 備份 MOMO Pro..."
if /backup/scripts/backup-momo.sh; then
log_success " MOMO Pro 備份成功"
else
log_error " MOMO Pro 備份失敗"
((failed++))
fi
# 備份 Harbor
log_info ">>> [3/${total}] 備份 Harbor..."
if /backup/scripts/backup-harbor.sh; then
log_success " Harbor 備份成功"
else
log_error " Harbor 備份失敗"
((failed++))
fi
# 備份 AWOOOI DB (awoooi_prod + k3s_datastore)
# 2026-04-05 Claude Code: 首席架構師備份審計後加入
log_info ">>> [4/${total}] 備份 AWOOOI DB..."
if /backup/scripts/backup-awoooi.sh; then
log_success " AWOOOI DB 備份成功"
else
log_error " AWOOOI DB 備份失敗"
((failed++))
fi
local end_time=$(date +%s)
local duration=$((end_time - start_time))
log_info "╔══════════════════════════════════════════════════════════════╗"
if [ $failed -eq 0 ]; then
log_success "║ 全服務備份完成 (${duration}s) - 全部成功 (${total}/${total}) ║"
notify_clawbot "success" "all" "全服務備份完成 (${total}/${total} 成功)" "${duration}"
else
log_error "║ 全服務備份完成 (${duration}s) - ${failed} 個失敗 ($((total-failed))/${total}) ║"
notify_clawbot "warning" "all" "全服務備份完成 ($((total-failed))/${total} 成功)" "${duration}"
fi
log_info "╚══════════════════════════════════════════════════════════════╝"
return $failed
}
# 執行
main "$@"

View File

@@ -1,39 +0,0 @@
#!/bin/bash
# =============================================================================
# AWOOOI PostgreSQL 每日備份腳本
# 2026-04-05 Claude Code: 盤點後發現 awoooi DB 無備份,依循 backup-momo-db.sh 規範建立
# 部署位置: /home/ollama/scripts/backup-awoooi-db.sh (on 192.168.0.188)
# cron: 0 2 * * * /home/ollama/scripts/backup-awoooi-db.sh >> /home/ollama/logs/backup-awoooi.log 2>&1
# =============================================================================
set -e
BACKUP_DIR="/home/ollama/backups/awoooi"
DATE=$(date +%Y-%m-%d_%H-%M)
RETENTION_DAYS=30
mkdir -p "$BACKUP_DIR"
echo "[$(date)] 開始 AWOOOI DB 備份..."
# awoooi_prod — 主要生產資料庫知識庫、事故、AutoRepair 決策等)
sudo -u postgres pg_dump awoooi_prod | gzip > "$BACKUP_DIR/awoooi_prod_${DATE}.sql.gz"
echo "[$(date)] ✅ awoooi_prod 備份完成"
# awoooi_dev — 開發資料庫
sudo -u postgres pg_dump awoooi_dev 2>/dev/null | gzip > "$BACKUP_DIR/awoooi_dev_${DATE}.sql.gz" || \
echo "[$(date)] ⚠️ awoooi_dev 備份跳過(可能不存在)"
# k3s_datastore — K3s Kine 資料庫
sudo -u postgres pg_dump k3s_datastore 2>/dev/null | gzip > "$BACKUP_DIR/k3s_datastore_${DATE}.sql.gz" || \
echo "[$(date)] ⚠️ k3s_datastore 備份跳過"
# 清理舊備份(保留 30 天)
find "$BACKUP_DIR" -name "*.sql.gz" -mtime +"$RETENTION_DAYS" -delete
echo "[$(date)] 清理超過 ${RETENTION_DAYS} 天的舊備份完成"
# 列出最新備份
echo "[$(date)] 目前備份清單:"
ls -lh "$BACKUP_DIR"/*.sql.gz 2>/dev/null | tail -10
echo "[$(date)] AWOOOI DB 備份完成!"

123
scripts/backup/backup-awoooi.sh Executable file
View File

@@ -0,0 +1,123 @@
#!/bin/bash
# =============================================================================
# WOOO AIOps - AWOOOI 資料庫備份腳本
# 2026-04-05 Claude Code: 首席架構師備份審計 — awoooi_prod/dev/k3s_datastore
# 部署位置: /backup/scripts/backup-awoooi.sh (on 192.168.0.110)
# 整合進 backup-all.sh (步驟 4/4)
# =============================================================================
set -euo pipefail
# 載入共用函式
source "$(dirname "$0")/common.sh"
# 配置
SERVICE="awoooi"
AWOOOI_HOST="192.168.0.188"
AWOOOI_DB_USER="awoooi"
AWOOOI_DB_PASS="awoooi_prod_2026"
AWOOOI_DB_HOST="localhost"
AWOOOI_DB_PORT="5432"
K3S_DB_USER="postgres"
LOCAL_REPO="${BACKUP_BASE}/awoooi"
DUMP_DIR="/tmp/awoooi-backup-$$"
# 保留策略覆寫(比其他服務更長)
KEEP_DAILY=14 # 14 天每日
KEEP_WEEKLY=8 # 8 週每週
KEEP_MONTHLY=12 # 12 個月每月
main() {
local start_time=$(date +%s)
local failed=0
log_info "========== 開始 AWOOOI 資料庫備份 =========="
mkdir -p "${DUMP_DIR}"
# Step 1: awoooi_prod dump核心資料庫KB/事故/AutoRepair/Drift
log_info "Dump awoooi_prod..."
local timestamp=$(date "+%Y%m%d_%H%M%S")
if ssh ollama@${AWOOOI_HOST} "PGPASSWORD='${AWOOOI_DB_PASS}' pg_dump \
-U ${AWOOOI_DB_USER} -h ${AWOOOI_DB_HOST} -p ${AWOOOI_DB_PORT} \
awoooi_prod" > "${DUMP_DIR}/awoooi_prod_${timestamp}.sql" 2>&1; then
local size=$(du -h "${DUMP_DIR}/awoooi_prod_${timestamp}.sql" | cut -f1)
log_success "awoooi_prod dump 完成 (${size})"
else
log_error "awoooi_prod dump 失敗"
((failed++))
fi
# Step 2: awoooi_dev dump
log_info "Dump awoooi_dev..."
if ssh ollama@${AWOOOI_HOST} "PGPASSWORD='${AWOOOI_DB_PASS}' pg_dump \
-U ${AWOOOI_DB_USER} -h ${AWOOOI_DB_HOST} -p ${AWOOOI_DB_PORT} \
awoooi_dev 2>/dev/null" > "${DUMP_DIR}/awoooi_dev_${timestamp}.sql" 2>/dev/null; then
local size=$(du -h "${DUMP_DIR}/awoooi_dev_${timestamp}.sql" | cut -f1)
log_success "awoooi_dev dump 完成 (${size})"
else
log_warn "awoooi_dev dump 跳過(可能不存在)"
fi
# Step 3: k3s_datastore dumpKine 後端)
log_info "Dump k3s_datastore..."
if ssh ollama@${AWOOOI_HOST} "PGPASSWORD='${AWOOOI_DB_PASS}' pg_dump \
-U ${AWOOOI_DB_USER} -h ${AWOOOI_DB_HOST} -p ${AWOOOI_DB_PORT} \
k3s_datastore 2>/dev/null" > "${DUMP_DIR}/k3s_datastore_${timestamp}.sql" 2>/dev/null; then
local size=$(du -h "${DUMP_DIR}/k3s_datastore_${timestamp}.sql" | cut -f1)
log_success "k3s_datastore dump 完成 (${size})"
else
log_warn "k3s_datastore dump 跳過"
fi
# 若核心 DB 失敗,中止
if [ $failed -gt 0 ]; then
log_error "核心 DB awoooi_prod 備份失敗,中止"
notify_clawbot "failed" "${SERVICE}" "AWOOOI 核心 DB 備份失敗"
rm -rf "${DUMP_DIR}"
exit 1
fi
# Step 4: Restic 備份
log_info "建立 Restic 備份..."
local tags=$(build_tags "${SERVICE}")
if [ ! -d "${LOCAL_REPO}/data" ]; then
log_info "初始化 Restic 倉庫 ${LOCAL_REPO}..."
restic -r "${LOCAL_REPO}" init --password-file "${RESTIC_PASSWORD_FILE}" 2>&1
fi
restic -r "${LOCAL_REPO}" backup "${DUMP_DIR}" \
--password-file "${RESTIC_PASSWORD_FILE}" \
${tags} 2>&1
local snapshot_id=$(restic -r "${LOCAL_REPO}" snapshots --latest 1 --json \
--password-file "${RESTIC_PASSWORD_FILE}" 2>/dev/null | \
grep -oP '"short_id":"\K[^"]+' | head -1)
log_success "Restic 備份完成: ${snapshot_id}"
# Step 5: GFS 清理(延長保留)
log_info "執行 GFS 清理 (daily=${KEEP_DAILY} weekly=${KEEP_WEEKLY} monthly=${KEEP_MONTHLY})..."
restic -r "${LOCAL_REPO}" forget --prune \
--password-file "${RESTIC_PASSWORD_FILE}" \
--keep-daily ${KEEP_DAILY} \
--keep-weekly ${KEEP_WEEKLY} \
--keep-monthly ${KEEP_MONTHLY} 2>&1
log_success "GFS 清理完成"
# Step 6: B2 同步(若設定)
if check_b2_config; then
log_info "同步到 Backblaze B2..."
rclone sync "${LOCAL_REPO}" "b2:${B2_BUCKET}/awoooi" --progress 2>&1
log_success "B2 同步完成"
fi
rm -rf "${DUMP_DIR}"
local end_time=$(date +%s)
local duration=$((end_time - start_time))
log_success "========== AWOOOI 備份完成 (${duration}s) =========="
notify_clawbot "success" "${SERVICE}" "AWOOOI DB 備份完成 (awoooi_prod/dev + k3s)" "${duration}"
}
main "$@"

View File

@@ -1,42 +0,0 @@
#!/bin/bash
# =============================================================================
# Gitea SQLite 每日備份腳本
# 2026-04-05 Claude Code: 盤點後發現 Gitea DB 無備份,今日已發生損壞事故
# 部署位置: /home/wooo/scripts/backup-gitea-db.sh (on 192.168.0.110)
# cron (wooo@110): 0 1 * * * /home/wooo/scripts/backup-gitea-db.sh >> /home/wooo/logs/backup-gitea.log 2>&1
# 教訓: 2026-04-05 Gitea DB 損壞,靠 sqlite3 .recover 才救回 — 人工修復耗時 2+ 小時
# =============================================================================
set -e
GITEA_DB="/home/wooo/gitea/gitea_data/gitea/gitea.db"
BACKUP_DIR="/home/wooo/backups/gitea"
DATE=$(date +%Y-%m-%d_%H-%M)
RETENTION_DAYS=30
mkdir -p "$BACKUP_DIR"
echo "[$(date)] 開始 Gitea DB 備份..."
if [ ! -f "$GITEA_DB" ]; then
echo "[$(date)] ❌ Gitea DB 不存在: $GITEA_DB"
exit 1
fi
# SQLite online backup不需要停止 Gitea
sqlite3 "$GITEA_DB" ".backup '$BACKUP_DIR/gitea_${DATE}.db'"
echo "[$(date)] ✅ Gitea DB 備份完成"
# 壓縮備份
gzip "$BACKUP_DIR/gitea_${DATE}.db"
echo "[$(date)] ✅ 壓縮完成: gitea_${DATE}.db.gz"
# 清理舊備份(保留 30 天)
find "$BACKUP_DIR" -name "gitea_*.db.gz" -mtime +"$RETENTION_DAYS" -delete
echo "[$(date)] 清理超過 ${RETENTION_DAYS} 天的舊備份完成"
# 列出最新備份
echo "[$(date)] 目前備份清單:"
ls -lh "$BACKUP_DIR"/gitea_*.db.gz 2>/dev/null | tail -5
echo "[$(date)] Gitea DB 備份完成!"