fix(api): retry slow stockplatform freshness readback
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 51s
CD Pipeline / build-and-deploy (push) Successful in 5m28s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Successful in 55s
CD Pipeline / post-deploy-checks (push) Successful in 2m10s
All checks were successful
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 51s
CD Pipeline / build-and-deploy (push) Successful in 5m28s
AWOOOI Harbor 110 Local Repair / workflow-shape (push) Successful in 0s
AWOOOI Harbor 110 Local Repair / harbor-110-local-repair (push) Successful in 55s
CD Pipeline / post-deploy-checks (push) Successful in 2m10s
This commit is contained in:
@@ -25,6 +25,8 @@ _RECOVERY_RECEIPT_FILE = (
|
||||
)
|
||||
_DEFAULT_BASE_URL = "https://stock.wooo.work"
|
||||
_DEFAULT_TIMEOUT_SECONDS = 4.0
|
||||
_DEFAULT_DATA_ENDPOINT_TIMEOUT_SECONDS = 10.0
|
||||
_DEFAULT_PROBE_ATTEMPTS = 2
|
||||
|
||||
Probe = Callable[[str, float], dict[str, Any]]
|
||||
|
||||
@@ -33,6 +35,8 @@ def load_latest_stockplatform_public_api_runtime_readback(
|
||||
*,
|
||||
base_url: str = _DEFAULT_BASE_URL,
|
||||
timeout_seconds: float = _DEFAULT_TIMEOUT_SECONDS,
|
||||
data_endpoint_timeout_seconds: float = _DEFAULT_DATA_ENDPOINT_TIMEOUT_SECONDS,
|
||||
probe_attempts: int = _DEFAULT_PROBE_ATTEMPTS,
|
||||
operations_dir: Path | None = None,
|
||||
probe: Probe | None = None,
|
||||
) -> dict[str, Any]:
|
||||
@@ -46,23 +50,42 @@ def load_latest_stockplatform_public_api_runtime_readback(
|
||||
recovery_control_receipt = _load_recovery_control_receipt(directory)
|
||||
committed_stock = _dict(committed_scorecard.get("stockplatform_data_freshness"))
|
||||
endpoints = {
|
||||
"public_web_healthz": "/healthz",
|
||||
"public_api_healthz": "/api/healthz",
|
||||
"freshness": "/api/v1/system/freshness",
|
||||
"ingestion": "/api/v1/system/ingestion",
|
||||
"public_web_healthz": {
|
||||
"path": "/healthz",
|
||||
"parse_json": False,
|
||||
"timeout_seconds": timeout_seconds,
|
||||
},
|
||||
"public_api_healthz": {
|
||||
"path": "/api/healthz",
|
||||
"parse_json": False,
|
||||
"timeout_seconds": timeout_seconds,
|
||||
},
|
||||
"freshness": {
|
||||
"path": "/api/v1/system/freshness",
|
||||
"parse_json": True,
|
||||
"timeout_seconds": data_endpoint_timeout_seconds,
|
||||
},
|
||||
"ingestion": {
|
||||
"path": "/api/v1/system/ingestion",
|
||||
"parse_json": True,
|
||||
"timeout_seconds": data_endpoint_timeout_seconds,
|
||||
},
|
||||
}
|
||||
probes = {
|
||||
name: _probe_endpoint(
|
||||
http_probe,
|
||||
f"{normalized_base_url}{path}",
|
||||
timeout_seconds,
|
||||
parse_json=name in {"freshness", "ingestion"},
|
||||
f"{normalized_base_url}{endpoint['path']}",
|
||||
float(endpoint["timeout_seconds"]),
|
||||
parse_json=bool(endpoint["parse_json"]),
|
||||
attempts=probe_attempts,
|
||||
)
|
||||
for name, path in endpoints.items()
|
||||
for name, endpoint in endpoints.items()
|
||||
}
|
||||
return _build_payload(
|
||||
base_url=normalized_base_url,
|
||||
timeout_seconds=timeout_seconds,
|
||||
data_endpoint_timeout_seconds=data_endpoint_timeout_seconds,
|
||||
probe_attempts=probe_attempts,
|
||||
probes=probes,
|
||||
committed_stockplatform=committed_stock,
|
||||
recovery_control_receipt=recovery_control_receipt,
|
||||
@@ -73,6 +96,8 @@ def _build_payload(
|
||||
*,
|
||||
base_url: str,
|
||||
timeout_seconds: float,
|
||||
data_endpoint_timeout_seconds: float,
|
||||
probe_attempts: int,
|
||||
probes: dict[str, dict[str, Any]],
|
||||
committed_stockplatform: dict[str, Any],
|
||||
recovery_control_receipt: dict[str, Any],
|
||||
@@ -144,6 +169,8 @@ def _build_payload(
|
||||
"live_drift_from_committed_scorecard": live_drift_from_committed_scorecard,
|
||||
"base_url": base_url,
|
||||
"timeout_seconds": timeout_seconds,
|
||||
"data_endpoint_timeout_seconds": data_endpoint_timeout_seconds,
|
||||
"probe_attempts": probe_attempts,
|
||||
"checks": checks,
|
||||
"readback": {
|
||||
"web_health_http_status": web.get("http_status"),
|
||||
@@ -379,23 +406,37 @@ def _probe_endpoint(
|
||||
timeout_seconds: float,
|
||||
*,
|
||||
parse_json: bool,
|
||||
attempts: int,
|
||||
) -> dict[str, Any]:
|
||||
result = probe(url, timeout_seconds)
|
||||
http_status = _int_or_none(result.get("http_status"))
|
||||
body = str(result.get("body") or "")
|
||||
payload: dict[str, Any] = {
|
||||
bounded_attempts = max(1, int(attempts))
|
||||
last_payload: dict[str, Any] = {
|
||||
"url": url,
|
||||
"http_status": http_status,
|
||||
"ok": http_status == 200,
|
||||
"error": str(result.get("error") or ""),
|
||||
"http_status": None,
|
||||
"ok": False,
|
||||
"error": "not_attempted",
|
||||
"attempt_count": 0,
|
||||
}
|
||||
if parse_json and http_status == 200:
|
||||
try:
|
||||
payload["json"] = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
payload["json"] = {}
|
||||
payload["error"] = "invalid_json"
|
||||
return payload
|
||||
for attempt in range(1, bounded_attempts + 1):
|
||||
result = probe(url, timeout_seconds)
|
||||
http_status = _int_or_none(result.get("http_status"))
|
||||
body = str(result.get("body") or "")
|
||||
payload: dict[str, Any] = {
|
||||
"url": url,
|
||||
"http_status": http_status,
|
||||
"ok": http_status == 200,
|
||||
"error": str(result.get("error") or ""),
|
||||
"attempt_count": attempt,
|
||||
}
|
||||
if parse_json and http_status == 200:
|
||||
try:
|
||||
payload["json"] = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
payload["json"] = {}
|
||||
payload["error"] = "invalid_json"
|
||||
if payload.get("ok") and (not parse_json or payload.get("json")):
|
||||
return payload
|
||||
last_payload = payload
|
||||
return last_payload
|
||||
|
||||
|
||||
def _http_probe(url: str, timeout_seconds: float) -> dict[str, Any]:
|
||||
|
||||
@@ -78,6 +78,37 @@ def test_stockplatform_public_api_runtime_readback_ready_when_live_green():
|
||||
assert payload["rollups"]["http_502_count"] == 0
|
||||
|
||||
|
||||
def test_stockplatform_public_api_runtime_readback_retries_slow_data_endpoints():
|
||||
calls: dict[str, int] = {}
|
||||
|
||||
def probe(url: str, timeout_seconds: float) -> dict:
|
||||
assert timeout_seconds == (
|
||||
10.0
|
||||
if url.endswith(("/api/v1/system/freshness", "/api/v1/system/ingestion"))
|
||||
else 4.0
|
||||
)
|
||||
calls[url] = calls.get(url, 0) + 1
|
||||
if url.endswith(("/api/v1/system/freshness", "/api/v1/system/ingestion")):
|
||||
if calls[url] == 1:
|
||||
return {"http_status": None, "body": "", "error": "TimeoutError"}
|
||||
return {
|
||||
"http_status": 200,
|
||||
"body": json.dumps({"status": "ok", "blockers": []}),
|
||||
"error": "",
|
||||
}
|
||||
return {"http_status": 200, "body": "ok", "error": ""}
|
||||
|
||||
payload = load_latest_stockplatform_public_api_runtime_readback(probe=probe)
|
||||
|
||||
assert payload["status"] == "stockplatform_public_api_runtime_ready"
|
||||
assert payload["runtime_ready"] is True
|
||||
assert payload["active_blockers"] == []
|
||||
assert payload["probes"]["freshness"]["attempt_count"] == 2
|
||||
assert payload["probes"]["ingestion"]["attempt_count"] == 2
|
||||
assert payload["data_endpoint_timeout_seconds"] == 10.0
|
||||
assert payload["probe_attempts"] == 2
|
||||
|
||||
|
||||
def test_stockplatform_public_api_runtime_readback_routes_postgres_not_ready():
|
||||
payload = load_latest_stockplatform_public_api_runtime_readback(
|
||||
probe=_probe_public_api_ok_postgres_not_ready
|
||||
|
||||
@@ -1,3 +1,21 @@
|
||||
## 2026-07-02 — 13:18 StockPlatform freshness 慢回應 false-blocker 修正
|
||||
|
||||
**完成內容**:
|
||||
- `stockplatform_public_api_runtime_readback` 對 `/api/v1/system/freshness` 與 `/api/v1/system/ingestion` 改用 data endpoint timeout 10 秒與 2 次 probe attempts;public route health 仍維持 4 秒,避免正常但約 4 秒才回的資料 readback 被 AWOOOI priority 誤判為 `stockplatform_*_unreachable`。
|
||||
- 新增 regression:data endpoint 首次 timeout / unreachable、第二次回 `status=ok` 時,runtime readback 必須判定 `stockplatform_public_api_runtime_ready` 且 active blockers 為空。
|
||||
|
||||
**live readback 證據**:
|
||||
- StockPlatform public `/healthz=200`、`/api/healthz=200`;`freshness status=ok latest_trading_date=2026-07-01 blockers=[]`;`ingestion status=ok coverage=100% blockers=[]`。
|
||||
- 本地套用修正後,AWOOOI priority 回到 `p0_006_blocked_reboot_auto_recovery_slo_not_ready`,`stock_blockers=[]`,剩餘 blocker 為 reboot SLO / host / backup / Wazuh 類 P0-006 主線。
|
||||
|
||||
**驗證**:
|
||||
- `python -m py_compile apps/api/src/services/stockplatform_public_api_runtime_readback.py apps/api/src/services/stockplatform_public_api_controlled_recovery_preflight.py apps/api/src/services/awoooi_priority_work_order_readback.py`:通過。
|
||||
- `DATABASE_URL=sqlite:////tmp/awoooi-test.db pytest apps/api/tests/test_stockplatform_public_api_runtime_readback.py apps/api/tests/test_stockplatform_public_api_controlled_recovery_preflight.py apps/api/tests/test_awoooi_priority_work_order_readback_api.py -q`:`23 passed`。
|
||||
- `git diff --check`:通過。
|
||||
|
||||
**仍維持**:
|
||||
- 未使用 GitHub / `gh` / GitHub API;未讀 secret / token / `.env` / raw sessions / SQLite / auth;未觸發 workflow;未重啟主機 / Docker / Nginx / K3s / DB / firewall;未寫 StockPlatform DB。
|
||||
|
||||
## 2026-07-02 — 13:10 Telegram 告警 receipt 與 AI controlled readback 補強
|
||||
|
||||
**完成內容**:
|
||||
|
||||
Reference in New Issue
Block a user