fix(cold-start): split stale evidence from live blockers
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Failing after 1m51s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped

This commit is contained in:
Your Name
2026-07-01 21:34:13 +08:00
parent 2cfb891e57
commit 10275a9d7b
5 changed files with 107 additions and 11 deletions

View File

@@ -770,7 +770,15 @@ printf "%s\n" "$momo_drive_source_probe"
awk '/MOMO_GDRIVE_TOKEN_STAT / {split($2,a,":"); split($3,b,"="); exit !(a[1] == b[2] && a[3] <= 600)}' <<<"$out" && ok "188 momo Google Drive token ownership matches scheduler userns" || warn "188 momo Google Drive token ownership/writeback not confirmed"
grep -Fq "MOMO_IMPORT_CONFIG 當日業績匯入|即時業績_當日" <<<"$out" && ok "188 momo Drive import config points to expected daily-sales intake" || fail "188 momo Drive import config drifted from expected daily-sales intake"
awk '/MOMO_LATEST_IMPORT_JOB / {split($2,a,"|"); exit !(a[1] ~ /^[0-9]+$/ && a[2] == "completed" && a[6] == a[7] && a[8] == 0)}' <<<"$out" && ok "188 momo latest daily import job completed cleanly" || warn "188 momo latest daily import job not confirmed clean"
awk '/MOMO_MONTHLY_SYNC / {split($2,a,"|"); exit !(a[1] > 0 && a[1] == a[2] && a[3] == a[5] && a[4] == a[6])}' <<<"$out" && ok "188 momo current-month snapshot and realtime tables match" || warn "188 momo current-month snapshot/realtime sync not confirmed"
momo_latest_import_clean=$(awk '
$1 == "MOMO_LATEST_IMPORT_JOB" {
seen=1
split($2,a,"|")
if (a[1] ~ /^[0-9]+$/ && a[2] == "completed" && a[6] == a[7] && a[8] == 0) print 1;
else print 0;
}
END { if (!seen) print 0 }
' <<<"$out")
momo_source_stale_only=$(awk '
$1 == "MOMO_DRIVE_INTAKE_COUNT" {intake=$2+0}
$1 == "MOMO_DRIVE_FAILED_COUNT" {failed=$2+0}
@@ -780,19 +788,42 @@ printf "%s\n" "$momo_drive_source_probe"
if (intake == 0 && failed == 0 && global ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/ && completed ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/ && global <= completed) print 1;
else print 0;
}' <<<"$out")
momo_source_preflight_output=""
momo_source_preflight_summary=""
if [ "$momo_source_stale_only" != "1" ] && [ "$momo_latest_import_clean" = "1" ] && [ -x "$MOMO_SOURCE_PREFLIGHT_SCRIPT" ]; then
momo_source_preflight_output="$(
"$MOMO_SOURCE_PREFLIGHT_SCRIPT" \
--host ollama@192.168.0.188 \
--freshness-max-days 2 2>/dev/null || true
)"
momo_source_preflight_summary="$(awk '/^MOMO_DRIVE_TOKEN_SOURCE_PREFLIGHT / {line=$0} END {print line}' <<<"$momo_source_preflight_output")"
[ -n "$momo_source_preflight_summary" ] && echo "$momo_source_preflight_summary"
if awk '$1 == "MOMO_SOURCE_ABSENT_WITHOUT_NEWER_DRIVE" {seen=1; ok=($2 == 1)} END {exit !(seen && ok)}' <<<"$momo_source_preflight_output"; then
momo_source_stale_only=1
fi
fi
if awk '/MOMO_MONTHLY_SYNC / {seen=1; split($2,a,"|"); ok=(a[1] > 0 && a[1] == a[2] && a[3] == a[5] && a[4] == a[6])} END {exit !(seen && ok)}' <<<"$out"; then
ok "188 momo current-month snapshot and realtime tables match"
elif [ "$momo_latest_import_clean" = "1" ] && [ "$momo_source_stale_only" = "1" ] && awk '/MOMO_MONTHLY_SYNC / {seen=1; split($2,a,"|"); ok=(a[1] == 0 && a[2] == 0)} END {exit !(seen && ok)}' <<<"$out"; then
ok "188 momo current-month sync not applicable; Drive has no newer source than last clean import"
else
warn "188 momo current-month snapshot/realtime sync not confirmed"
fi
if awk '/MOMO_DAILY_FRESHNESS / {split($2,a,"|"); exit !(a[1] ~ /^[0-9]+$/ && a[1] >= 0 && a[1] <= 2)}' <<<"$out"; then
ok "188 momo daily sales data fresh enough"
elif awk '/MOMO_DAILY_FRESHNESS / {split($2,a,"|"); exit !(a[1] ~ /^[0-9]+$/ && a[1] >= 3)}' <<<"$out"; then
if [ "$momo_source_stale_only" = "1" ]; then
warn "188 momo daily sales stale but Drive has no newer source candidate"
elif [ -x "$MOMO_SOURCE_PREFLIGHT_SCRIPT" ]; then
momo_source_preflight_summary="$(
"$MOMO_SOURCE_PREFLIGHT_SCRIPT" \
--host ollama@192.168.0.188 \
--freshness-max-days 2 2>/dev/null \
| awk '/^MOMO_DRIVE_TOKEN_SOURCE_PREFLIGHT / {line=$0} END {print line}' || true
)"
[ -n "$momo_source_preflight_summary" ] && echo "$momo_source_preflight_summary"
if [ -z "$momo_source_preflight_summary" ]; then
momo_source_preflight_output="$(
"$MOMO_SOURCE_PREFLIGHT_SCRIPT" \
--host ollama@192.168.0.188 \
--freshness-max-days 2 2>/dev/null || true
)"
momo_source_preflight_summary="$(awk '/^MOMO_DRIVE_TOKEN_SOURCE_PREFLIGHT / {line=$0} END {print line}' <<<"$momo_source_preflight_output")"
[ -n "$momo_source_preflight_summary" ] && echo "$momo_source_preflight_summary"
fi
if grep -q "BLOCKED=0" <<<"$momo_source_preflight_summary"; then
warn "188 momo daily sales stale but source preflight has no hard blocker"
elif awk '/MOMO_SOURCE_EMPTY_EVIDENCE_LINES / {exit !($2 > 0)}' <<<"$out"; then
@@ -844,7 +875,12 @@ fi
awk '/TEXTFILE_110 storage_health.prom age=/ {split($3,a,"="); exit !(a[2] < 300)}' <<<"$out" && ok "110 storage health exporter fresh" || warn "110 storage health exporter stale"
awk '/TEXTFILE_110 backup_health.prom age=/ {split($3,a,"="); exit !(a[2] < 900)}' <<<"$out" && ok "110 backup health exporter fresh" || warn "110 backup health exporter stale"
grep -q "STORAGE_HEALTH_110 root_readonly=0 current=0" <<<"$out" && ok "110 current boot storage health clean" || warn "110 storage health not clean"
grep -q "BACKUP_HEALTH_110 total=" <<<"$out" && awk '/BACKUP_HEALTH_110/ {split($3,a,"="); split($4,b,"="); split($5,c,"="); split($6,d,"="); split($7,e,"="); exit !((a[2]+b[2]+c[2]) == 0 && d[2] == 0 && e[2] == 0)}' <<<"$out" && ok "110 backup health has no stale expected jobs" || warn "110 latest aggregate/config backup had failed components; rerun backup-all after 120 recovers"
if grep -q "BACKUP_HEALTH_110 total=" <<<"$out" && awk '/BACKUP_HEALTH_110/ {split($3,a,"="); split($4,b,"="); split($5,c,"="); split($7,e,"="); exit !((a[2]+b[2]+c[2]+e[2]) == 0)}' <<<"$out"; then
ok "110 backup health has no stale expected jobs or critical config gaps"
awk '/BACKUP_HEALTH_110/ {split($6,a,"="); exit !(a[2] > 0)}' <<<"$out" && echo "INFO 110 latest aggregate backup-all still records failed_count but current component freshness is clean"
else
warn "110 backup health has stale expected jobs or critical config gaps"
fi
awk '/BACKUP_HEALTH_110/ {split($9,a,"="); exit !(a[2] == 0)}' <<<"$out" && ok "110 backup integrity and restore drill fresh" || warn "110 backup integrity or restore drill stale"
else
warn "110 schedule check unavailable"

View File

@@ -342,6 +342,8 @@ if [[ "$drive_intake_count" -eq 0 \
&& [[ "$drive_global_latest_date" < "$job_completed_date" || "$drive_global_latest_date" == "$job_completed_date" ]]; then
source_absent_without_newer_drive=1
fi
printf 'MOMO_LATEST_IMPORT_CLEAN %s\n' "$latest_job_clean"
printf 'MOMO_SOURCE_ABSENT_WITHOUT_NEWER_DRIVE %s\n' "$source_absent_without_newer_drive"
monthly_sync="$(value_for DB_MONTHLY_SYNC)"
IFS='|' read -r sync_snapshot_count sync_monthly_count sync_dmin sync_dmax sync_mmin sync_mmax <<<"$monthly_sync"

View File

@@ -5,6 +5,9 @@ from pathlib import Path
ROOT = Path(__file__).resolve().parents[3]
COLD_START_CHECK = ROOT / "scripts" / "reboot-recovery" / "full-stack-cold-start-check.sh"
MOMO_SOURCE_PREFLIGHT = (
ROOT / "scripts" / "reboot-recovery" / "momo-drive-token-source-recovery-preflight.sh"
)
INSTALL_COLD_START = (
ROOT / "scripts" / "reboot-recovery" / "install-cold-start-monitor-110.sh"
)
@@ -49,6 +52,33 @@ def test_full_stack_cold_start_check_bounds_ssh_probes() -> None:
assert "SSH_110_RECOVERY_PACKAGE_NEXT_ACTION verify_or_preinstall_local_recovery_package_from_console_before_harbor_repair_retry" in text
def test_cold_start_momo_current_month_handles_no_new_source_without_false_warn() -> None:
text = COLD_START_CHECK.read_text(encoding="utf-8")
assert "momo_latest_import_clean" in text
assert "momo_source_stale_only" in text
assert "MOMO_SOURCE_ABSENT_WITHOUT_NEWER_DRIVE" in text
assert "Drive has no newer source than last clean import" in text
assert "a[1] == 0 && a[2] == 0" in text
def test_momo_source_preflight_emits_machine_readable_no_new_source_evidence() -> None:
text = MOMO_SOURCE_PREFLIGHT.read_text(encoding="utf-8")
assert "MOMO_LATEST_IMPORT_CLEAN" in text
assert "MOMO_SOURCE_ABSENT_WITHOUT_NEWER_DRIVE" in text
assert "source_absent_without_newer_drive=1" in text
def test_cold_start_110_backup_splits_current_freshness_from_old_aggregate_failure() -> None:
text = COLD_START_CHECK.read_text(encoding="utf-8")
assert "critical config gaps" in text
assert "current component freshness is clean" in text
assert "split($6,a,\"=\"); exit !(a[2] > 0)" in text
assert "split($7,e,\"=\"); exit !((a[2]+b[2]+c[2]+e[2]) == 0)" in text
def test_recovery_scorecard_bounds_offsite_evidence_ssh() -> None:
text = RECOVERY_SCORECARD.read_text(encoding="utf-8")