Files
awoooi/scripts/backup/backup-awoooi-frequent.sh
Your Name d4c513a022
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 2m1s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
fix(backup): restore reboot freshness readback
2026-07-01 21:09:35 +08:00

256 lines
8.9 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# =============================================================================
# WOOO AIOps - AWOOOI 高頻備份(每 6 小時)
# 2026-04-05 Claude Code: awoooi_prod 為核心生產 DB每日一次不夠
# 部署位置: /backup/scripts/backup-awoooi-frequent.sh (on 192.168.0.110)
# cron: 0 */6 * * * /backup/scripts/backup-awoooi-frequent.sh >> /backup/logs/cron.log 2>&1
# 說明: 每 6 小時備份 awoooi_prod最多損失 6 小時資料
# 完整備份(含 Gitea/Harbor仍由 backup-all.sh 每日 02:00 執行
# =============================================================================
set -euo pipefail
source "$(dirname "$0")/common.sh"
SERVICE="awoooi-frequent"
AWOOOI_HOST="192.168.0.188"
AWOOOI_DB_USER="awoooi"
AWOOOI_DB_PASS="${AWOOOI_DB_PASS:-}"
AWOOOI_DB_HOST="localhost"
AWOOOI_DB_PORT="5432"
LOCAL_REPO="${BACKUP_BASE}/awoooi"
DUMP_DIR="/tmp/awoooi-freq-backup-$$"
AWOOOI_K8S_HOST="${AWOOOI_K8S_HOST:-192.168.0.120}"
AWOOOI_K8S_HOSTS="${AWOOOI_K8S_HOSTS:-${AWOOOI_K8S_HOST} 192.168.0.121 192.168.0.125}"
AWOOOI_K8S_SECRET_NAME="${AWOOOI_K8S_SECRET_NAME:-awoooi-secrets}"
AWOOOI_K8S_NAMESPACE="${AWOOOI_K8S_NAMESPACE:-awoooi-prod}"
AWOOOI_K8S_DATABASE_URL_KEYS="${AWOOOI_K8S_DATABASE_URL_KEYS:-AWOOOI_BACKUP_DATABASE_URL BACKUP_DATABASE_URL DATABASE_URL}"
FORCE_RLS_RESTORE_SQL=""
FORCE_RLS_RESTORE_DB=""
# 高頻備份保留策略
# 2026-05-19 ogt + Codex: 保留策略統一交給 common.sh。
# 預設 latest-only keep-last=1避免高頻 DB snapshot 堆積。
resolve_database_url() {
if [ -n "${AWOOOI_DATABASE_URL:-}" ]; then
printf '%s\n' "${AWOOOI_DATABASE_URL}"
return 0
fi
if [ -n "${DATABASE_URL:-}" ]; then
printf '%s\n' "${DATABASE_URL}"
return 0
fi
# 2026-07-01 ogt + Codex: 優先使用專用備份 DB URL不存在時才退回
# runtime DATABASE_URL。只在遠端流程內解碼不把 secret value 寫入 log。
local k8s_host key encoded decoded
for k8s_host in ${AWOOOI_K8S_HOSTS}; do
for key in ${AWOOOI_K8S_DATABASE_URL_KEYS}; do
encoded="$(ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -o ConnectTimeout=8 "wooo@${k8s_host}" \
"sudo -n kubectl get secret ${AWOOOI_K8S_SECRET_NAME} -n ${AWOOOI_K8S_NAMESPACE} -o jsonpath='{.data.${key}}' 2>/dev/null || kubectl get secret ${AWOOOI_K8S_SECRET_NAME} -n ${AWOOOI_K8S_NAMESPACE} -o jsonpath='{.data.${key}}'" \
2>/dev/null || true)"
decoded="$(printf '%s' "${encoded}" | base64 -d 2>/dev/null || true)"
if [ -n "${decoded}" ]; then
printf '%s\n' "${decoded}"
return 0
fi
done
done
return 1
}
load_database_config() {
local database_url
database_url="$(resolve_database_url || true)"
if [ -z "${database_url}" ]; then
log_error "無法解析 AWOOOI DATABASE_URL拒絕使用舊硬編密碼"
return 1
fi
eval "$(
python3 - 3<<< "${database_url}" <<'PY'
import shlex
from urllib.parse import unquote, urlparse
with open(3) as source:
url = source.read().strip()
parsed = urlparse(url)
values = {
"AWOOOI_DB_USER": unquote(parsed.username or "awoooi"),
"AWOOOI_DB_PASS": unquote(parsed.password or ""),
"AWOOOI_DB_HOST": parsed.hostname or "localhost",
"AWOOOI_DB_PORT": str(parsed.port or 5432),
}
for key, value in values.items():
print(f"{key}={shlex.quote(value)}")
PY
)"
}
quote_remote() {
printf "%q" "$1"
}
pgpass_escape() {
local value="$1"
value="${value//\\/\\\\}"
value="${value//:/\\:}"
printf '%s' "${value}"
}
pgpass_line() {
local database="$1"
printf '%s:%s:%s:%s:%s\n' \
"$(pgpass_escape "${AWOOOI_DB_HOST}")" \
"$(pgpass_escape "${AWOOOI_DB_PORT}")" \
"$(pgpass_escape "${database}")" \
"$(pgpass_escape "${AWOOOI_DB_USER}")" \
"$(pgpass_escape "${AWOOOI_DB_PASS}")"
}
remote_psql_command() {
local database="$1"
printf "psql --no-password -U %s -h %s -p %s -d %s -v ON_ERROR_STOP=1" \
"$(quote_remote "${AWOOOI_DB_USER}")" \
"$(quote_remote "${AWOOOI_DB_HOST}")" \
"$(quote_remote "${AWOOOI_DB_PORT}")" \
"$(quote_remote "${database}")"
}
remote_pgpass_wrapper() {
local command="$1"
printf 'umask 077; pgpass=$(mktemp "${TMPDIR:-/tmp}/awoooi-pgpass.XXXXXX") || exit 1; cleanup() { rm -f "$pgpass"; }; trap cleanup EXIT HUP INT TERM; cat > "$pgpass"; PGOPTIONS="-c statement_timeout=0 -c max_parallel_workers_per_gather=0" PGPASSFILE="$pgpass" %s' "${command}"
}
run_remote_pgpass_command() {
local database="$1"
local command="$2"
pgpass_line "${database}" | ssh "ollama@${AWOOOI_HOST}" "$(remote_pgpass_wrapper "${command}")"
}
latest_restic_snapshot_id() {
restic -r "${LOCAL_REPO}" snapshots --latest 1 --json \
--password-file "${RESTIC_PASSWORD_FILE}" 2>/dev/null | \
python3 -c 'import json,sys; rows=json.load(sys.stdin); row=max(rows,key=lambda r: r.get("time","")) if rows else {}; print(row.get("short_id","unknown"))' \
2>/dev/null || echo "unknown"
}
collect_force_rls_sql() {
local database="$1"
local mode="$2"
local query
query="
select format('ALTER TABLE %I.%I ${mode} ROW LEVEL SECURITY;', n.nspname, c.relname)
from pg_class c
join pg_namespace n on n.oid = c.relnamespace
where c.relkind in ('r', 'p')
and c.relforcerowsecurity
and pg_get_userbyid(c.relowner) = current_user
order by 1;
"
run_remote_pgpass_command "${database}" "$(remote_psql_command "${database}") -At -c $(quote_remote "${query}")"
}
apply_remote_sql() {
local database="$1"
local sql="$2"
[ -n "${sql}" ] || return 0
run_remote_pgpass_command "${database}" "$(remote_psql_command "${database}") -c $(quote_remote "${sql}") >/dev/null"
}
restore_force_rls() {
if [ -n "${FORCE_RLS_RESTORE_DB}" ] && [ -n "${FORCE_RLS_RESTORE_SQL}" ]; then
if apply_remote_sql "${FORCE_RLS_RESTORE_DB}" "${FORCE_RLS_RESTORE_SQL}"; then
log_info "FORCE ROW LEVEL SECURITY 已恢復 (${FORCE_RLS_RESTORE_DB})"
else
log_error "FORCE ROW LEVEL SECURITY 恢復失敗 (${FORCE_RLS_RESTORE_DB})"
return 1
fi
FORCE_RLS_RESTORE_DB=""
FORCE_RLS_RESTORE_SQL=""
fi
}
trap restore_force_rls EXIT
dump_database_with_rls_guard() {
local database="$1"
local output_file="$2"
local stderr_file="${output_file}.stderr"
local noforce_sql force_sql dump_rc
noforce_sql="$(collect_force_rls_sql "${database}" "NO FORCE")"
force_sql="$(printf '%s\n' "${noforce_sql}" | sed 's/NO FORCE/FORCE/')"
if [ -n "${noforce_sql}" ]; then
FORCE_RLS_RESTORE_DB="${database}"
FORCE_RLS_RESTORE_SQL="${force_sql}"
log_info "暫時解除 FORCE RLS 以完成完整 pg_dump (${database}, tables=$(printf '%s\n' "${noforce_sql}" | awk 'NF {count++} END {print count+0}'))"
apply_remote_sql "${database}" "${noforce_sql}"
fi
set +e
run_remote_pgpass_command "${database}" "pg_dump --no-password \
-U $(quote_remote "${AWOOOI_DB_USER}") -h $(quote_remote "${AWOOOI_DB_HOST}") -p $(quote_remote "${AWOOOI_DB_PORT}") \
$(quote_remote "${database}")" > "${output_file}" 2>"${stderr_file}"
dump_rc=$?
set -e
restore_force_rls
if [ "${dump_rc}" -ne 0 ]; then
log_error "${database} dump 失敗pg_dump stderr 尾端如下(已避免輸出 credential"
tail -40 "${stderr_file}" | sed -E 's/(password=)[^ ]+/\1REDACTED/g' || true
return "${dump_rc}"
fi
rm -f "${stderr_file}"
}
main() {
local start_time=$(date +%s)
log_info "========== AWOOOI 高頻備份 ($(date '+%H:%M')) =========="
mkdir -p "${DUMP_DIR}"
load_database_config || {
notify_clawbot "failed" "${SERVICE}" "AWOOOI 高頻備份失敗DATABASE_URL 不可用"
rm -rf "${DUMP_DIR}"
exit 1
}
local timestamp=$(date "+%Y%m%d_%H%M%S")
# 只備份 awoooi_prod高頻核心
if dump_database_with_rls_guard "awoooi_prod" "${DUMP_DIR}/awoooi_prod_${timestamp}.sql"; then
local size=$(du -h "${DUMP_DIR}/awoooi_prod_${timestamp}.sql" | cut -f1)
log_success "awoooi_prod dump 完成 (${size})"
else
local status=$?
log_error "awoooi_prod dump 失敗"
notify_clawbot "failed" "${SERVICE}" "AWOOOI 高頻備份失敗"
rm -rf "${DUMP_DIR}"
exit "${status}"
fi
# Restic 備份(同一倉庫,頻率不同)
restic -r "${LOCAL_REPO}" backup "${DUMP_DIR}" \
--password-file "${RESTIC_PASSWORD_FILE}" \
--tag "service:awoooi" --tag "freq:6h" \
--tag "timestamp:${timestamp}" 2>&1
local snapshot_id
snapshot_id="$(latest_restic_snapshot_id)"
log_success "快照: ${snapshot_id}"
cleanup_old_backups "${LOCAL_REPO}"
rm -rf "${DUMP_DIR}"
local end_time=$(date +%s)
log_success "========== AWOOOI 高頻備份完成 ($((end_time-start_time))s) =========="
}
main "$@"