Files
awoooi/scripts/ops/backup-from-110.sh
Your Name b191f8e9fe
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m6s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
fix(telegram): close ops direct sender gaps
2026-07-02 19:32:36 +08:00

137 lines
5.7 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# =============================================================================
# backup-from-110.sh — 188 Host 層備份腳本(從 110 rsync 到 188
# =============================================================================
# 部署位置: /home/ollama/bin/backup-from-110.sh (188 上)
# 執行者: ollama (188 的主要帳號)
# Cron: 0 1 * * * /home/ollama/bin/backup-from-110.sh
#
# 備份項目:
# 1. Harbor registry data最高優先
# 2. Gitea repos
# 3. bitan-pharmacy git bare repo若存在
#
# 前提:
# - 188 的 ollama 帳號已加入 110 wooo 帳號的 authorized_keys
# - /backup/110/{harbor,gitea} 目錄已建立 (mkdir -p /backup/110/{harbor,gitea})
# - 188 磁碟空間足夠(建議 > 50GB 可用)
#
# 成功/失敗狀態:
# - 寫入 BACKUP_LAST_SUCCESS_TS 到 /var/run/backup-110.last_success
# - 失敗時透過 AWOOI Alertmanager webhook 進 TelegramGateway / AwoooP receipt path
#
# Sprint C ADR-069 (2026-04-11 Claude Sonnet 4.6 Asia/Taipei)
# =============================================================================
set -euo pipefail
BACKUP_ROOT="${BACKUP_ROOT:-/home/ollama/backup/110}"
LOG="${BACKUP_ROOT}/backup.log"
LAST_SUCCESS_FILE="${BACKUP_ROOT}/last_success"
TEXTFILE_DIR="${TEXTFILE_DIR:-/home/ollama/node_exporter_textfiles}"
TEXTFILE_PROM="${TEXTFILE_DIR}/backup.prom"
DATE=$(date +%Y%m%d-%H%M%S)
ERRORS=0
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
log() {
echo "[$DATE] $*" | tee -a "$LOG"
}
log "=== Starting backup from 110 ==="
notify_awoooi_ops() {
local status="$1"
local msg="$2"
local helper="${SCRIPT_DIR}/notify-awoooi-ops.sh"
[[ -x "$helper" ]] || return 1
AWOOI_OPS_ALERTNAME="HostBackupFailed" \
AWOOI_OPS_JOB_NAME="188 Host 層備份" \
AWOOI_OPS_STATUS="$status" \
AWOOI_OPS_SEVERITY="info" \
AWOOI_OPS_SOURCE="backup-from-110" \
AWOOI_OPS_COMPONENT="host-backup" \
AWOOI_OPS_SUMMARY="188 Host 層備份 ${status}" \
AWOOI_OPS_DETAIL="$msg" \
"$helper" >/dev/null
}
notify_ops() {
local status="$1"
local msg="$2"
# 所有通知都必須交給 AWOOI API由 TelegramGateway 送出並鏡像到 AwoooP。
# API 不可達時只留下本地 log避免繞過 DB/log receipt 與 AI automation。
if notify_awoooi_ops "$status" "$msg"; then
return 0
fi
log "WARN: AWOOI API notification unavailable; direct Telegram fallback disabled; receipt remains local"
return 0
}
# ── Harbor registry data ──────────────────────────────────────────────────────
# 2026-04-17 ogt: 改用 docker socket 讀取 volumes/var/lib/docker/volumes/ 是 710 root:root
# wooo 是 docker group 成員,可透過 docker run 掛載 volume不可直接讀取 FS 路徑
log "Backing up Harbor registry..."
mkdir -p "${BACKUP_ROOT}/harbor"
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
wooo@192.168.0.110 \
"docker run --rm -v harbor_harbor-data:/source alpine tar czf - -C /source ." \
| tar xzf - -C "${BACKUP_ROOT}/harbor/" >> "$LOG" 2>&1; then
log "✅ Harbor backup OK"
else
log "❌ ERROR: Harbor backup failed"
ERRORS=$((ERRORS + 1))
fi
# ── Gitea repos ───────────────────────────────────────────────────────────────
log "Backing up Gitea repos..."
mkdir -p "${BACKUP_ROOT}/gitea"
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 \
wooo@192.168.0.110 \
"docker run --rm -v gitea_gitea-data:/source alpine tar czf - -C /source ." \
| tar xzf - -C "${BACKUP_ROOT}/gitea/" >> "$LOG" 2>&1; then
log "✅ Gitea backup OK"
else
log "❌ ERROR: Gitea backup failed"
ERRORS=$((ERRORS + 1))
fi
# ── bitan-pharmacy git bare repo (可選) ──────────────────────────────────────
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
wooo@192.168.0.110 "test -d /home/wooo/bitan-pharmacy.git" 2>/dev/null; then
log "Backing up bitan-pharmacy.git..."
if rsync -avz \
-e "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10" \
wooo@192.168.0.110:/home/wooo/bitan-pharmacy.git/ \
${BACKUP_ROOT}/bitan-pharmacy.git/ >> "$LOG" 2>&1; then
log "✅ bitan-pharmacy.git backup OK"
else
log "⚠️ bitan-pharmacy.git backup failed (non-fatal)"
fi
else
log "⚠️ bitan-pharmacy.git not found on 110, skipping"
fi
# ── 結果處理 ─────────────────────────────────────────────────────────────────
if [ "$ERRORS" -eq 0 ]; then
TS=$(date +%s)
# 寫入純文字時間戳(舊格式,保留相容性)
echo "$TS" > "$LAST_SUCCESS_FILE"
# 寫入 Prometheus textfile 格式(供 node_exporter textfile collector 讀取)
# 2026-04-17 ogt: 修復 HostBackupFailed — absent(backup_110_last_success_timestamp) 永遠觸發
# 根因:只寫純文字檔,從未輸出 .prom 指標 → node_exporter 找不到 → Prometheus absent()=1
mkdir -p "$TEXTFILE_DIR"
cat > "$TEXTFILE_PROM" <<EOF
# HELP backup_110_last_success_timestamp Unix timestamp of last successful backup from 110
# TYPE backup_110_last_success_timestamp gauge
backup_110_last_success_timestamp $TS
EOF
log "=== Backup completed successfully (ts=$TS) ==="
exit 0
else
log "=== Backup FAILED ($ERRORS errors) ==="
notify_ops "failed" "🚨 backup-from-110.sh FAILED on 188 — ${ERRORS} error(s) at ${DATE}"
exit 1
fi