fix(api): retry slow stockplatform freshness readback
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 51s
CD Pipeline / build-and-deploy (push) Successful in 5m28s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Successful in 55s
CD Pipeline / post-deploy-checks (push) Successful in 2m10s

This commit is contained in:
Your Name
2026-07-02 13:12:21 +08:00
parent e206a9e91d
commit 75626305f5
3 changed files with 112 additions and 22 deletions

View File

@@ -25,6 +25,8 @@ _RECOVERY_RECEIPT_FILE = (
)
_DEFAULT_BASE_URL = "https://stock.wooo.work"
_DEFAULT_TIMEOUT_SECONDS = 4.0
_DEFAULT_DATA_ENDPOINT_TIMEOUT_SECONDS = 10.0
_DEFAULT_PROBE_ATTEMPTS = 2
Probe = Callable[[str, float], dict[str, Any]]
@@ -33,6 +35,8 @@ def load_latest_stockplatform_public_api_runtime_readback(
*,
base_url: str = _DEFAULT_BASE_URL,
timeout_seconds: float = _DEFAULT_TIMEOUT_SECONDS,
data_endpoint_timeout_seconds: float = _DEFAULT_DATA_ENDPOINT_TIMEOUT_SECONDS,
probe_attempts: int = _DEFAULT_PROBE_ATTEMPTS,
operations_dir: Path | None = None,
probe: Probe | None = None,
) -> dict[str, Any]:
@@ -46,23 +50,42 @@ def load_latest_stockplatform_public_api_runtime_readback(
recovery_control_receipt = _load_recovery_control_receipt(directory)
committed_stock = _dict(committed_scorecard.get("stockplatform_data_freshness"))
endpoints = {
"public_web_healthz": "/healthz",
"public_api_healthz": "/api/healthz",
"freshness": "/api/v1/system/freshness",
"ingestion": "/api/v1/system/ingestion",
"public_web_healthz": {
"path": "/healthz",
"parse_json": False,
"timeout_seconds": timeout_seconds,
},
"public_api_healthz": {
"path": "/api/healthz",
"parse_json": False,
"timeout_seconds": timeout_seconds,
},
"freshness": {
"path": "/api/v1/system/freshness",
"parse_json": True,
"timeout_seconds": data_endpoint_timeout_seconds,
},
"ingestion": {
"path": "/api/v1/system/ingestion",
"parse_json": True,
"timeout_seconds": data_endpoint_timeout_seconds,
},
}
probes = {
name: _probe_endpoint(
http_probe,
f"{normalized_base_url}{path}",
timeout_seconds,
parse_json=name in {"freshness", "ingestion"},
f"{normalized_base_url}{endpoint['path']}",
float(endpoint["timeout_seconds"]),
parse_json=bool(endpoint["parse_json"]),
attempts=probe_attempts,
)
for name, path in endpoints.items()
for name, endpoint in endpoints.items()
}
return _build_payload(
base_url=normalized_base_url,
timeout_seconds=timeout_seconds,
data_endpoint_timeout_seconds=data_endpoint_timeout_seconds,
probe_attempts=probe_attempts,
probes=probes,
committed_stockplatform=committed_stock,
recovery_control_receipt=recovery_control_receipt,
@@ -73,6 +96,8 @@ def _build_payload(
*,
base_url: str,
timeout_seconds: float,
data_endpoint_timeout_seconds: float,
probe_attempts: int,
probes: dict[str, dict[str, Any]],
committed_stockplatform: dict[str, Any],
recovery_control_receipt: dict[str, Any],
@@ -144,6 +169,8 @@ def _build_payload(
"live_drift_from_committed_scorecard": live_drift_from_committed_scorecard,
"base_url": base_url,
"timeout_seconds": timeout_seconds,
"data_endpoint_timeout_seconds": data_endpoint_timeout_seconds,
"probe_attempts": probe_attempts,
"checks": checks,
"readback": {
"web_health_http_status": web.get("http_status"),
@@ -379,23 +406,37 @@ def _probe_endpoint(
timeout_seconds: float,
*,
parse_json: bool,
attempts: int,
) -> dict[str, Any]:
result = probe(url, timeout_seconds)
http_status = _int_or_none(result.get("http_status"))
body = str(result.get("body") or "")
payload: dict[str, Any] = {
bounded_attempts = max(1, int(attempts))
last_payload: dict[str, Any] = {
"url": url,
"http_status": http_status,
"ok": http_status == 200,
"error": str(result.get("error") or ""),
"http_status": None,
"ok": False,
"error": "not_attempted",
"attempt_count": 0,
}
if parse_json and http_status == 200:
try:
payload["json"] = json.loads(body)
except json.JSONDecodeError:
payload["json"] = {}
payload["error"] = "invalid_json"
return payload
for attempt in range(1, bounded_attempts + 1):
result = probe(url, timeout_seconds)
http_status = _int_or_none(result.get("http_status"))
body = str(result.get("body") or "")
payload: dict[str, Any] = {
"url": url,
"http_status": http_status,
"ok": http_status == 200,
"error": str(result.get("error") or ""),
"attempt_count": attempt,
}
if parse_json and http_status == 200:
try:
payload["json"] = json.loads(body)
except json.JSONDecodeError:
payload["json"] = {}
payload["error"] = "invalid_json"
if payload.get("ok") and (not parse_json or payload.get("json")):
return payload
last_payload = payload
return last_payload
def _http_probe(url: str, timeout_seconds: float) -> dict[str, Any]:

View File

@@ -78,6 +78,37 @@ def test_stockplatform_public_api_runtime_readback_ready_when_live_green():
assert payload["rollups"]["http_502_count"] == 0
def test_stockplatform_public_api_runtime_readback_retries_slow_data_endpoints():
calls: dict[str, int] = {}
def probe(url: str, timeout_seconds: float) -> dict:
assert timeout_seconds == (
10.0
if url.endswith(("/api/v1/system/freshness", "/api/v1/system/ingestion"))
else 4.0
)
calls[url] = calls.get(url, 0) + 1
if url.endswith(("/api/v1/system/freshness", "/api/v1/system/ingestion")):
if calls[url] == 1:
return {"http_status": None, "body": "", "error": "TimeoutError"}
return {
"http_status": 200,
"body": json.dumps({"status": "ok", "blockers": []}),
"error": "",
}
return {"http_status": 200, "body": "ok", "error": ""}
payload = load_latest_stockplatform_public_api_runtime_readback(probe=probe)
assert payload["status"] == "stockplatform_public_api_runtime_ready"
assert payload["runtime_ready"] is True
assert payload["active_blockers"] == []
assert payload["probes"]["freshness"]["attempt_count"] == 2
assert payload["probes"]["ingestion"]["attempt_count"] == 2
assert payload["data_endpoint_timeout_seconds"] == 10.0
assert payload["probe_attempts"] == 2
def test_stockplatform_public_api_runtime_readback_routes_postgres_not_ready():
payload = load_latest_stockplatform_public_api_runtime_readback(
probe=_probe_public_api_ok_postgres_not_ready

View File

@@ -1,3 +1,21 @@
## 2026-07-02 — 13:18 StockPlatform freshness 慢回應 false-blocker 修正
**完成內容**
- `stockplatform_public_api_runtime_readback``/api/v1/system/freshness``/api/v1/system/ingestion` 改用 data endpoint timeout 10 秒與 2 次 probe attemptspublic route health 仍維持 4 秒,避免正常但約 4 秒才回的資料 readback 被 AWOOOI priority 誤判為 `stockplatform_*_unreachable`
- 新增 regressiondata endpoint 首次 timeout / unreachable、第二次回 `status=ok`runtime readback 必須判定 `stockplatform_public_api_runtime_ready` 且 active blockers 為空。
**live readback 證據**
- StockPlatform public `/healthz=200``/api/healthz=200``freshness status=ok latest_trading_date=2026-07-01 blockers=[]``ingestion status=ok coverage=100% blockers=[]`
- 本地套用修正後AWOOOI priority 回到 `p0_006_blocked_reboot_auto_recovery_slo_not_ready``stock_blockers=[]`,剩餘 blocker 為 reboot SLO / host / backup / Wazuh 類 P0-006 主線。
**驗證**
- `python -m py_compile apps/api/src/services/stockplatform_public_api_runtime_readback.py apps/api/src/services/stockplatform_public_api_controlled_recovery_preflight.py apps/api/src/services/awoooi_priority_work_order_readback.py`:通過。
- `DATABASE_URL=sqlite:////tmp/awoooi-test.db pytest apps/api/tests/test_stockplatform_public_api_runtime_readback.py apps/api/tests/test_stockplatform_public_api_controlled_recovery_preflight.py apps/api/tests/test_awoooi_priority_work_order_readback_api.py -q``23 passed`
- `git diff --check`:通過。
**仍維持**
- 未使用 GitHub / `gh` / GitHub API未讀 secret / token / `.env` / raw sessions / SQLite / auth未觸發 workflow未重啟主機 / Docker / Nginx / K3s / DB / firewall未寫 StockPlatform DB。
## 2026-07-02 — 13:10 Telegram 告警 receipt 與 AI controlled readback 補強
**完成內容**