From 75626305f5f9feef68bb9e87f975c89392f0e90d Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 2 Jul 2026 13:12:21 +0800 Subject: [PATCH] fix(api): retry slow stockplatform freshness readback --- ...ockplatform_public_api_runtime_readback.py | 85 ++++++++++++++----- ...ockplatform_public_api_runtime_readback.py | 31 +++++++ docs/LOGBOOK.md | 18 ++++ 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/apps/api/src/services/stockplatform_public_api_runtime_readback.py b/apps/api/src/services/stockplatform_public_api_runtime_readback.py index a168b550..97c952a1 100644 --- a/apps/api/src/services/stockplatform_public_api_runtime_readback.py +++ b/apps/api/src/services/stockplatform_public_api_runtime_readback.py @@ -25,6 +25,8 @@ _RECOVERY_RECEIPT_FILE = ( ) _DEFAULT_BASE_URL = "https://stock.wooo.work" _DEFAULT_TIMEOUT_SECONDS = 4.0 +_DEFAULT_DATA_ENDPOINT_TIMEOUT_SECONDS = 10.0 +_DEFAULT_PROBE_ATTEMPTS = 2 Probe = Callable[[str, float], dict[str, Any]] @@ -33,6 +35,8 @@ def load_latest_stockplatform_public_api_runtime_readback( *, base_url: str = _DEFAULT_BASE_URL, timeout_seconds: float = _DEFAULT_TIMEOUT_SECONDS, + data_endpoint_timeout_seconds: float = _DEFAULT_DATA_ENDPOINT_TIMEOUT_SECONDS, + probe_attempts: int = _DEFAULT_PROBE_ATTEMPTS, operations_dir: Path | None = None, probe: Probe | None = None, ) -> dict[str, Any]: @@ -46,23 +50,42 @@ def load_latest_stockplatform_public_api_runtime_readback( recovery_control_receipt = _load_recovery_control_receipt(directory) committed_stock = _dict(committed_scorecard.get("stockplatform_data_freshness")) endpoints = { - "public_web_healthz": "/healthz", - "public_api_healthz": "/api/healthz", - "freshness": "/api/v1/system/freshness", - "ingestion": "/api/v1/system/ingestion", + "public_web_healthz": { + "path": "/healthz", + "parse_json": False, + "timeout_seconds": timeout_seconds, + }, + "public_api_healthz": { + "path": "/api/healthz", + "parse_json": False, + "timeout_seconds": timeout_seconds, + }, + "freshness": { + "path": "/api/v1/system/freshness", + "parse_json": True, + "timeout_seconds": data_endpoint_timeout_seconds, + }, + "ingestion": { + "path": "/api/v1/system/ingestion", + "parse_json": True, + "timeout_seconds": data_endpoint_timeout_seconds, + }, } probes = { name: _probe_endpoint( http_probe, - f"{normalized_base_url}{path}", - timeout_seconds, - parse_json=name in {"freshness", "ingestion"}, + f"{normalized_base_url}{endpoint['path']}", + float(endpoint["timeout_seconds"]), + parse_json=bool(endpoint["parse_json"]), + attempts=probe_attempts, ) - for name, path in endpoints.items() + for name, endpoint in endpoints.items() } return _build_payload( base_url=normalized_base_url, timeout_seconds=timeout_seconds, + data_endpoint_timeout_seconds=data_endpoint_timeout_seconds, + probe_attempts=probe_attempts, probes=probes, committed_stockplatform=committed_stock, recovery_control_receipt=recovery_control_receipt, @@ -73,6 +96,8 @@ def _build_payload( *, base_url: str, timeout_seconds: float, + data_endpoint_timeout_seconds: float, + probe_attempts: int, probes: dict[str, dict[str, Any]], committed_stockplatform: dict[str, Any], recovery_control_receipt: dict[str, Any], @@ -144,6 +169,8 @@ def _build_payload( "live_drift_from_committed_scorecard": live_drift_from_committed_scorecard, "base_url": base_url, "timeout_seconds": timeout_seconds, + "data_endpoint_timeout_seconds": data_endpoint_timeout_seconds, + "probe_attempts": probe_attempts, "checks": checks, "readback": { "web_health_http_status": web.get("http_status"), @@ -379,23 +406,37 @@ def _probe_endpoint( timeout_seconds: float, *, parse_json: bool, + attempts: int, ) -> dict[str, Any]: - result = probe(url, timeout_seconds) - http_status = _int_or_none(result.get("http_status")) - body = str(result.get("body") or "") - payload: dict[str, Any] = { + bounded_attempts = max(1, int(attempts)) + last_payload: dict[str, Any] = { "url": url, - "http_status": http_status, - "ok": http_status == 200, - "error": str(result.get("error") or ""), + "http_status": None, + "ok": False, + "error": "not_attempted", + "attempt_count": 0, } - if parse_json and http_status == 200: - try: - payload["json"] = json.loads(body) - except json.JSONDecodeError: - payload["json"] = {} - payload["error"] = "invalid_json" - return payload + for attempt in range(1, bounded_attempts + 1): + result = probe(url, timeout_seconds) + http_status = _int_or_none(result.get("http_status")) + body = str(result.get("body") or "") + payload: dict[str, Any] = { + "url": url, + "http_status": http_status, + "ok": http_status == 200, + "error": str(result.get("error") or ""), + "attempt_count": attempt, + } + if parse_json and http_status == 200: + try: + payload["json"] = json.loads(body) + except json.JSONDecodeError: + payload["json"] = {} + payload["error"] = "invalid_json" + if payload.get("ok") and (not parse_json or payload.get("json")): + return payload + last_payload = payload + return last_payload def _http_probe(url: str, timeout_seconds: float) -> dict[str, Any]: diff --git a/apps/api/tests/test_stockplatform_public_api_runtime_readback.py b/apps/api/tests/test_stockplatform_public_api_runtime_readback.py index 6b73c708..cb2f043a 100644 --- a/apps/api/tests/test_stockplatform_public_api_runtime_readback.py +++ b/apps/api/tests/test_stockplatform_public_api_runtime_readback.py @@ -78,6 +78,37 @@ def test_stockplatform_public_api_runtime_readback_ready_when_live_green(): assert payload["rollups"]["http_502_count"] == 0 +def test_stockplatform_public_api_runtime_readback_retries_slow_data_endpoints(): + calls: dict[str, int] = {} + + def probe(url: str, timeout_seconds: float) -> dict: + assert timeout_seconds == ( + 10.0 + if url.endswith(("/api/v1/system/freshness", "/api/v1/system/ingestion")) + else 4.0 + ) + calls[url] = calls.get(url, 0) + 1 + if url.endswith(("/api/v1/system/freshness", "/api/v1/system/ingestion")): + if calls[url] == 1: + return {"http_status": None, "body": "", "error": "TimeoutError"} + return { + "http_status": 200, + "body": json.dumps({"status": "ok", "blockers": []}), + "error": "", + } + return {"http_status": 200, "body": "ok", "error": ""} + + payload = load_latest_stockplatform_public_api_runtime_readback(probe=probe) + + assert payload["status"] == "stockplatform_public_api_runtime_ready" + assert payload["runtime_ready"] is True + assert payload["active_blockers"] == [] + assert payload["probes"]["freshness"]["attempt_count"] == 2 + assert payload["probes"]["ingestion"]["attempt_count"] == 2 + assert payload["data_endpoint_timeout_seconds"] == 10.0 + assert payload["probe_attempts"] == 2 + + def test_stockplatform_public_api_runtime_readback_routes_postgres_not_ready(): payload = load_latest_stockplatform_public_api_runtime_readback( probe=_probe_public_api_ok_postgres_not_ready diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 3ad0ba0c..7c644dea 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,21 @@ +## 2026-07-02 — 13:18 StockPlatform freshness 慢回應 false-blocker 修正 + +**完成內容**: +- `stockplatform_public_api_runtime_readback` 對 `/api/v1/system/freshness` 與 `/api/v1/system/ingestion` 改用 data endpoint timeout 10 秒與 2 次 probe attempts;public route health 仍維持 4 秒,避免正常但約 4 秒才回的資料 readback 被 AWOOOI priority 誤判為 `stockplatform_*_unreachable`。 +- 新增 regression:data endpoint 首次 timeout / unreachable、第二次回 `status=ok` 時,runtime readback 必須判定 `stockplatform_public_api_runtime_ready` 且 active blockers 為空。 + +**live readback 證據**: +- StockPlatform public `/healthz=200`、`/api/healthz=200`;`freshness status=ok latest_trading_date=2026-07-01 blockers=[]`;`ingestion status=ok coverage=100% blockers=[]`。 +- 本地套用修正後,AWOOOI priority 回到 `p0_006_blocked_reboot_auto_recovery_slo_not_ready`,`stock_blockers=[]`,剩餘 blocker 為 reboot SLO / host / backup / Wazuh 類 P0-006 主線。 + +**驗證**: +- `python -m py_compile apps/api/src/services/stockplatform_public_api_runtime_readback.py apps/api/src/services/stockplatform_public_api_controlled_recovery_preflight.py apps/api/src/services/awoooi_priority_work_order_readback.py`:通過。 +- `DATABASE_URL=sqlite:////tmp/awoooi-test.db pytest apps/api/tests/test_stockplatform_public_api_runtime_readback.py apps/api/tests/test_stockplatform_public_api_controlled_recovery_preflight.py apps/api/tests/test_awoooi_priority_work_order_readback_api.py -q`:`23 passed`。 +- `git diff --check`:通過。 + +**仍維持**: +- 未使用 GitHub / `gh` / GitHub API;未讀 secret / token / `.env` / raw sessions / SQLite / auth;未觸發 workflow;未重啟主機 / Docker / Nginx / K3s / DB / firewall;未寫 StockPlatform DB。 + ## 2026-07-02 — 13:10 Telegram 告警 receipt 與 AI controlled readback 補強 **完成內容**: