From 604c5f3b416f8de43db32e7ae02e4f3fffc252b8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 3 Jul 2026 08:00:43 +0800 Subject: [PATCH] fix(cd): run b5 database outside test container --- .gitea/workflows/cd.yaml | 64 +++++++++---------- .../test_cd_controlled_runtime_profile.py | 18 ++++-- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/.gitea/workflows/cd.yaml b/.gitea/workflows/cd.yaml index a28112ddc..b49112181 100644 --- a/.gitea/workflows/cd.yaml +++ b/.gitea/workflows/cd.yaml @@ -1133,40 +1133,32 @@ jobs: echo "✅ controlled-runtime profile: B5 DB integration unchanged; skipping B5 for this narrow release lane" exit 0 fi - cat > /tmp/awoooi-b5-tests.sh <<'CI_SCRIPT' - set -euo pipefail - cd apps/api - # 安裝 psql client - if ! command -v psql &>/dev/null; then - apt-get install -y -q postgresql-client - fi if ! docker info >/dev/null 2>&1; then - echo "BLOCKER b5_docker_socket_unavailable" - echo "NEXT_ACTION ensure_b5_ci_container_runs_with_docker_socket_permission_then_retry_cd" + echo "BLOCKER b5_outer_docker_unavailable" + echo "NEXT_ACTION ensure_cd_runner_host_can_start_pgvector_container_then_retry_cd" exit 65 fi - # 2026-04-19 ogt + Claude Opus 4.7 v3: 主動創 shared network - # 之前 grep ACT_NET 在 c0f3509 run 沒 match → fallback bridge → container name DNS 失效 - # 真因: default bridge 不支援 container name DNS,必須 user-defined network - # 修法: 主動建 'b5-test-net' (idempotent),ci-runner + pg-test-b5 都加入 - B5_NET="b5-test-net" - docker network create "$B5_NET" 2>/dev/null || true - # 當前 ci-runner container (hostname == short container id) 連上此 network - # 若已連 → docker network connect 回 error 1,用 || true 吞掉 - docker network connect "$B5_NET" "$HOSTNAME" 2>/dev/null || true - echo "B5 shared network: $B5_NET (ci-runner hostname: $HOSTNAME)" - # 啟動測試 DB 於 shared network,用 container name 'pg-test-b5' 連線 - docker rm -f pg-test-b5 2>/dev/null || true - docker run -d --name pg-test-b5 \ + B5_RUN_ID_RAW="${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-1}" + B5_RUN_ID_SAFE="$(printf '%s' "$B5_RUN_ID_RAW" | tr -c 'A-Za-z0-9_.-' '-')" + B5_NET="b5-test-net-${B5_RUN_ID_SAFE}" + B5_DB_CONTAINER="pg-test-b5-${B5_RUN_ID_SAFE}" + cleanup_b5() { + docker rm -f "$B5_DB_CONTAINER" >/dev/null 2>&1 || true + docker network rm "$B5_NET" >/dev/null 2>&1 || true + } + trap cleanup_b5 EXIT + cleanup_b5 + docker network create "$B5_NET" >/dev/null + docker run -d --name "$B5_DB_CONTAINER" \ --network="$B5_NET" \ + --network-alias pg-test-b5 \ -e POSTGRES_DB=awoooi_test \ -e POSTGRES_USER=awoooi \ -e POSTGRES_PASSWORD=awoooi_test_2026 \ pgvector/pgvector:pg16 - # 等待就緒(用 container name,最多 60 秒) B5_DB_READY=0 for i in $(seq 1 30); do - if PGPASSWORD=awoooi_test_2026 pg_isready -h pg-test-b5 -p 5432 -U awoooi; then + if docker exec "$B5_DB_CONTAINER" pg_isready -U awoooi -d awoooi_test; then B5_DB_READY=1 break fi @@ -1174,23 +1166,30 @@ jobs: done if [ "$B5_DB_READY" != "1" ]; then echo "BLOCKER b5_pg_test_container_not_ready" - echo "NEXT_ACTION inspect_b5_test_network_and_docker_socket_then_retry_cd" - docker ps --filter name=pg-test-b5 --format 'b5_container={{.Names}} status={{.Status}}' || true + echo "NEXT_ACTION inspect_b5_test_network_and_outer_docker_then_retry_cd" + docker ps --filter "name=$B5_DB_CONTAINER" --format 'b5_container={{.Names}} status={{.Status}}' || true exit 66 fi + cat > /tmp/awoooi-b5-tests.sh <<'CI_SCRIPT' + set -euo pipefail + cd apps/api + # 安裝 psql client + if ! command -v psql &>/dev/null; then + apt-get install -y -q postgresql-client + fi + B5_DB_HOST="${B5_DB_HOST:-pg-test-b5}" # 初始化 schema PGPASSWORD=awoooi_test_2026 psql \ - -h pg-test-b5 -p 5432 -U awoooi -d awoooi_test \ + -h "$B5_DB_HOST" -p 5432 -U awoooi -d awoooi_test \ -f tests/integration/setup_test_schema.sql # 跑測試 # B5 整合測試嚴格模式 (2026-04-13 ogt: 恢復 Break-Glass 移除) # -m integration: override pyproject.toml addopts "-m 'not integration'",讓標記測試可執行 # 2026-04-22 ogt: DATABASE_URL 改為必填後,import chain 需要此 env var 讓 Settings 通過驗證 - DATABASE_URL="postgresql+asyncpg://awoooi:awoooi_test_2026@pg-test-b5:5432/awoooi_test?ssl=disable" \ - TEST_DATABASE_URL="postgresql+asyncpg://awoooi:awoooi_test_2026@pg-test-b5:5432/awoooi_test?ssl=disable" \ + DATABASE_URL="postgresql+asyncpg://awoooi:awoooi_test_2026@${B5_DB_HOST}:5432/awoooi_test?ssl=disable" \ + TEST_DATABASE_URL="postgresql+asyncpg://awoooi:awoooi_test_2026@${B5_DB_HOST}:5432/awoooi_test?ssl=disable" \ /opt/api-venv/bin/pytest tests/integration/test_b5_core_flows.py -v --tb=short -m integration -p no:cacheprovider || PYTEST_EXIT=$? - # 清理 - docker rm -f pg-test-b5 || true + # pg-test-b5 is cleaned by the outer runner trap after this container exits. # 2026-05-20 Codex: B5 imports shared tests helpers, so cleanup the # whole tests tree to avoid root-owned __pycache__ act-runner noise. find tests src -type d -name __pycache__ -prune -exec rm -rf {} + 2>/dev/null || true @@ -1202,9 +1201,10 @@ jobs: --user 0:0 \ --cpus "2.0" \ --memory "2g" \ + --network "$B5_NET" \ + -e B5_DB_HOST=pg-test-b5 \ -v "$PWD:/workspace" \ -v /tmp/awoooi-b5-tests.sh:/tmp/awoooi-b5-tests.sh:ro \ - -v /var/run/docker.sock:/var/run/docker.sock \ -v awoooi-api-venv-cache:/opt/api-venv \ -w /workspace \ "${{ env.CI_IMAGE }}" \ diff --git a/ops/runner/test_cd_controlled_runtime_profile.py b/ops/runner/test_cd_controlled_runtime_profile.py index 279eba52e..1194c8da8 100644 --- a/ops/runner/test_cd_controlled_runtime_profile.py +++ b/ops/runner/test_cd_controlled_runtime_profile.py @@ -1001,10 +1001,10 @@ def test_post_start_recovery_verifiers_stay_on_controlled_runtime_profile() -> N assert source in text -def test_controlled_runtime_skips_b5_before_docker_socket_use() -> None: +def test_controlled_runtime_skips_b5_before_outer_docker_use() -> None: text = _workflow_text() b5_start = text.index("- name: Integration Tests (B5") - docker_socket = text.index("-v /var/run/docker.sock:/var/run/docker.sock", b5_start) + outer_docker = text.index("if ! docker info >/dev/null 2>&1; then", b5_start) persisted_profile_write = text.index('> .awoooi-cd-test-profile') persisted_profile_read = text.index( 'tr -d \'\\r\\n\' < .awoooi-cd-test-profile', @@ -1017,7 +1017,7 @@ def test_controlled_runtime_skips_b5_before_docker_socket_use() -> None: exit_zero = text.index("exit 0", controlled_gate) assert persisted_profile_write < b5_start assert b5_start < persisted_profile_read < controlled_gate - assert controlled_gate < exit_zero < docker_socket + assert controlled_gate < exit_zero < outer_docker def test_metadata_only_marker_skips_deploy_jobs_after_tests() -> None: @@ -1033,19 +1033,25 @@ def test_metadata_only_marker_skips_deploy_jobs_after_tests() -> None: assert "metadata-only controlled-runtime fixes already run the" in text -def test_b5_full_profile_fails_fast_when_docker_socket_or_db_network_is_unready() -> None: +def test_b5_full_profile_uses_outer_docker_for_db_without_inner_socket_mount() -> None: text = _workflow_text() b5_block = text.split("- name: Integration Tests (B5", 1)[1] b5_block = b5_block.split("- name: Clean Test Workspace Artifacts", 1)[0] assert "set -euo pipefail" in b5_block assert "docker info >/dev/null 2>&1" in b5_block - assert "BLOCKER b5_docker_socket_unavailable" in b5_block + assert "BLOCKER b5_outer_docker_unavailable" in b5_block + assert "B5_DB_CONTAINER=\"pg-test-b5-${B5_RUN_ID_SAFE}\"" in b5_block + assert 'B5_DB_HOST="${B5_DB_HOST:-pg-test-b5}"' in b5_block + assert "--network-alias pg-test-b5" in b5_block + assert "--network \"$B5_NET\"" in b5_block + assert "-v /var/run/docker.sock:/var/run/docker.sock" not in b5_block + assert "docker rm -f pg-test-b5 || true" not in b5_block assert "B5_DB_READY=0" in b5_block assert "BLOCKER b5_pg_test_container_not_ready" in b5_block assert "--user 0:0" in b5_block assert b5_block.index("docker info >/dev/null 2>&1") < b5_block.index( - "docker run -d --name pg-test-b5" + "docker run -d --name \"$B5_DB_CONTAINER\"" ) assert b5_block.index("BLOCKER b5_pg_test_container_not_ready") < b5_block.index( "/opt/api-venv/bin/pytest tests/integration/test_b5_core_flows.py"