ewoooc/services/ai_surface_html_readback_service.py

"""Read-only product-surface HTML contract readback for AI workbench pages."""

from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any

from config import SYSTEM_VERSION


POLICY = "read_only_ai_surface_html_readback_v1"
REPAIR_POLICY = "read_only_ai_surface_html_repair_package_v1"
SITEWIDE_POLICY = "read_only_sitewide_ui_ux_agent_inventory_v1"
SITEWIDE_REPAIR_POLICY = "read_only_sitewide_ui_ux_repair_package_v1"
ROOT = Path(__file__).resolve().parents[1]


@dataclass(frozen=True)
class SurfaceContract:
    route: str
    template: str
    density_marker: str
    label: str


SURFACE_CONTRACTS: tuple[SurfaceContract, ...] = (
    SurfaceContract(
        route="/observability/overview",
        template="templates/admin/observability_overview.html",
        density_marker="compact-observability-workbench",
        label="AI 觀測總覽",
    ),
    SurfaceContract(
        route="/observability/ai_calls",
        template="templates/admin/ai_calls_dashboard.html",
        density_marker="compact-ai-calls-workbench",
        label="AI 流量監控",
    ),
    SurfaceContract(
        route="/observability/quality_trend",
        template="templates/admin/quality_trend.html",
        density_marker="compact-quality-workbench",
        label="AI 品質診斷",
    ),
    SurfaceContract(
        route="/observability/budget",
        template="templates/admin/budget.html",
        density_marker="compact-budget-workbench",
        label="AI 成本守門",
    ),
    SurfaceContract(
        route="/observability/business_intel",
        template="templates/admin/business_intel.html",
        density_marker="compact-business-workbench",
        label="AI 商業情報",
    ),
    SurfaceContract(
        route="/observability/host_health",
        template="templates/admin/host_health.html",
        density_marker="compact-runtime-workbench",
        label="AI runtime 健康",
    ),
    SurfaceContract(
        route="/observability/rag_queries",
        template="templates/admin/rag_queries.html",
        density_marker="compact-knowledge-workbench",
        label="AI 知識召回",
    ),
    SurfaceContract(
        route="/observability/agent_orchestration",
        template="templates/admin/agent_orchestration.html",
        density_marker="compact-agent-workbench",
        label="AI Agent 分工",
    ),
    SurfaceContract(
        route="/observability/promotion_review",
        template="templates/admin/promotion_review.html",
        density_marker="compact-promotion-workbench",
        label="AI 晉升例外",
    ),
    SurfaceContract(
        route="/observability/ppt_audit_history",
        template="templates/admin/ppt_audit_history.html",
        density_marker="compact-ppt-workbench",
        label="AI 視覺 QA",
    ),
)

FORBIDDEN_PRODUCT_FRAGMENTS: tuple[str, ...] = (
    "PPT_VISION_ENABLED=",
    "PPT_AUTO_GENERATION_ENABLED=",
    "ppt_generation_runs",
    "DB 已寫入",
    "待 DB 寫入",
    "DB 寫入失敗",
    "DB writes",
    "manual_required",
    "needs_human=true",
    "human gate",
    "manual review",
    "Artifact retention",
    "Compact 回讀",
)

HIGH_PRIORITY_TEMPLATE_HINTS: tuple[str, ...] = (
    "dashboard",
    "ai_",
    "ai-",
    "sales",
    "growth",
    "price",
    "pchome",
    "vendor_stockout",
    "observability",
    "agent_orchestration",
    "ai_calls",
    "quality_trend",
    "budget",
    "business_intel",
    "host_health",
    "rag_queries",
    "promotion_review",
    "ppt_audit",
)

PROFESSIONAL_GUARDRAIL_MARKERS: tuple[str, ...] = (
    "data-density-guardrail=",
    "data-benchmark-guardrail=",
    "growth-command",
    "ewoooc-shell",
    "momo-page-shell",
    "dashboard-v2",
)


def _read_surface_html(root: Path, contract: SurfaceContract) -> str:
    return (root / contract.template).read_text(encoding="utf-8")


def _evaluate_contract(
    contract: SurfaceContract,
    html: str,
) -> dict[str, Any]:
    marker = f'data-density-guardrail="{contract.density_marker}"'
    missing_markers = [] if marker in html else [contract.density_marker]
    leaked_fragments = [
        fragment
        for fragment in FORBIDDEN_PRODUCT_FRAGMENTS
        if fragment in html
    ]
    status = "ok" if not missing_markers and not leaked_fragments else "critical"
    return {
        "route": contract.route,
        "template": contract.template,
        "label": contract.label,
        "status": status,
        "density_marker": contract.density_marker,
        "marker_present": not missing_markers,
        "missing_markers": missing_markers,
        "forbidden_leak_count": len(leaked_fragments),
        "forbidden_leaks": leaked_fragments,
    }


def build_ai_surface_html_readback(
    *,
    root: Path | str | None = None,
    rendered_html_by_route: dict[str, str] | None = None,
) -> dict[str, Any]:
    """Return a machine-readable no-write contract readback for AI surfaces."""
    source_root = Path(root).resolve() if root is not None else ROOT
    rendered_html_by_route = rendered_html_by_route or {}
    surfaces: list[dict[str, Any]] = []
    read_errors: list[dict[str, str]] = []

    for contract in SURFACE_CONTRACTS:
        try:
            html = rendered_html_by_route.get(contract.route)
            if html is None:
                html = _read_surface_html(source_root, contract)
            surfaces.append(_evaluate_contract(contract, html))
        except OSError as exc:
            read_errors.append({
                "route": contract.route,
                "template": contract.template,
                "error": str(exc)[:300],
            })
            surfaces.append({
                "route": contract.route,
                "template": contract.template,
                "label": contract.label,
                "status": "critical",
                "density_marker": contract.density_marker,
                "marker_present": False,
                "missing_markers": [contract.density_marker],
                "forbidden_leak_count": 0,
                "forbidden_leaks": [],
            })

    failed = [item for item in surfaces if item["status"] != "ok"]
    leak_count = sum(int(item.get("forbidden_leak_count") or 0) for item in surfaces)
    pass_count = len(surfaces) - len(failed)
    status = "ok" if not failed and not read_errors else "critical"

    return {
        "policy": POLICY,
        "status": status,
        "version": SYSTEM_VERSION,
        "generated_at": datetime.now().isoformat(timespec="seconds"),
        "summary": {
            "checked_surface_count": len(surfaces),
            "pass_count": pass_count,
            "failed_count": len(failed),
            "forbidden_leak_count": leak_count,
            "primary_human_gate_count": 0,
            "writes_database_count": 0,
        },
        "surfaces": surfaces,
        "failed_surfaces": failed,
        "read_errors": read_errors,
        "next_machine_action": (
            "keep_surface_readback_in_ai_smoke"
            if status == "ok"
            else "repair_ai_surface_html_contract"
        ),
        "automation_policy": {
            "primary_flow": "ai_controlled",
            "manual_review_mode": "exception_only",
            "machine_verifiable_evidence": True,
            "primary_human_gate_count": 0,
        },
        "safety": {
            "read_only": True,
            "writes_database": False,
            "writes_database_count": 0,
            "sends_notifications": False,
            "requires_browser": False,
        },
    }


def _build_repair_item(surface: dict[str, Any]) -> dict[str, Any]:
    missing_markers = list(surface.get("missing_markers") or [])
    forbidden_leaks = list(surface.get("forbidden_leaks") or [])
    actions: list[dict[str, Any]] = []

    for marker in missing_markers:
        actions.append({
            "action": "restore_density_guardrail_marker",
            "target_template": surface.get("template"),
            "expected_marker": marker,
            "safe_apply_hint": (
                f"Restore data-density-guardrail=\"{marker}\" on the first-viewport workbench shell."
            ),
        })

    for leak in forbidden_leaks:
        actions.append({
            "action": "remove_raw_engineering_copy_from_product_surface",
            "target_template": surface.get("template"),
            "forbidden_fragment": leak,
            "safe_apply_hint": (
                "Replace the raw engineering fragment with Traditional Chinese product language "
                "or move it behind evidence-on-demand / hidden contract."
            ),
        })

    return {
        "route": surface.get("route"),
        "template": surface.get("template"),
        "label": surface.get("label"),
        "status": "ready_for_controlled_repair" if actions else "no_repair_required",
        "missing_markers": missing_markers,
        "forbidden_leaks": forbidden_leaks,
        "controlled_actions": actions,
        "post_apply_verifier": {
            "service_function": "build_ai_surface_html_readback",
            "expected_status": "ok",
            "expected_failed_count": 0,
            "expected_forbidden_leak_count": 0,
        },
    }


def build_ai_surface_html_repair_package(
    *,
    root: Path | str | None = None,
    source_readback: dict[str, Any] | None = None,
    rendered_html_by_route: dict[str, str] | None = None,
) -> dict[str, Any]:
    """Build a no-write controlled repair package for failed surface readbacks."""
    readback = source_readback or build_ai_surface_html_readback(
        root=root,
        rendered_html_by_route=rendered_html_by_route,
    )
    failed_surfaces = list(readback.get("failed_surfaces") or [])
    repair_items = [_build_repair_item(surface) for surface in failed_surfaces]
    action_count = sum(len(item.get("controlled_actions") or []) for item in repair_items)
    ready = bool(action_count)
    status = "repair_ready" if ready else "no_op"

    return {
        "policy": REPAIR_POLICY,
        "status": status,
        "version": SYSTEM_VERSION,
        "generated_at": datetime.now().isoformat(timespec="seconds"),
        "source_readback_policy": readback.get("policy"),
        "source_readback_status": readback.get("status"),
        "summary": {
            "failed_surface_count": len(failed_surfaces),
            "controlled_action_count": action_count,
            "forbidden_leak_count": int((readback.get("summary") or {}).get("forbidden_leak_count") or 0),
            "primary_human_gate_count": 0,
            "writes_database_count": 0,
            "executes_shell_count": 0,
        },
        "repair_items": repair_items,
        "controlled_apply_contract": {
            "mode": "ai_controlled_low_blast_radius",
            "allowed_targets": sorted({
                str(item.get("template"))
                for item in repair_items
                if item.get("template")
            }),
            "forbidden_targets": [
                ".env",
                "runtime volumes",
                "database",
                "secrets",
                "raw sessions",
                "sqlite",
            ],
            "requires_post_apply_verifier": True,
            "post_apply_verifier": "build_ai_surface_html_readback",
            "rollback_strategy": "revert_template_patch_and_rerun_surface_html_readback",
        },
        "next_machine_action": (
            "apply_ai_surface_html_contract_repair"
            if ready
            else "keep_surface_readback_in_ai_smoke"
        ),
        "automation_policy": {
            "primary_flow": "ai_controlled",
            "manual_review_mode": "exception_only",
            "machine_verifiable_evidence": True,
            "primary_human_gate_count": 0,
        },
        "safety": {
            "read_only_package": True,
            "writes_database": False,
            "writes_database_count": 0,
            "executes_shell": False,
            "sends_notifications": False,
            "requires_browser": False,
            "requires_secret": False,
        },
    }


def _iter_site_templates(root: Path) -> list[Path]:
    template_root = root / "templates"
    if not template_root.exists():
        return []
    return sorted(
        path
        for path in template_root.rglob("*")
        if path.suffix in {".html", ".j2"}
        and path.is_file()
        and not path.name.startswith(".")
        and not path.name.startswith("_")
        and "components" not in path.relative_to(template_root).parts
        and path.name not in {"base.html", "ewoooc_base.html", "test_base.html"}
    )


def _relative_template_path(root: Path, path: Path) -> str:
    return str(path.relative_to(root)).replace("\\", "/")


def _is_high_priority_template(relpath: str) -> bool:
    lowered = relpath.lower()
    return any(hint in lowered for hint in HIGH_PRIORITY_TEMPLATE_HINTS)


def _template_family(relpath: str) -> str:
    lowered = relpath.lower()
    if "/admin/" in lowered or "observability" in lowered:
        return "ai_observability"
    if "dashboard" in lowered or "ai_intelligence" in lowered:
        return "growth_command_center"
    if "vendor_stockout" in lowered:
        return "vendor_operations"
    if "sales" in lowered or "growth" in lowered or "price" in lowered:
        return "commerce_analytics"
    if "login" in lowered or "user" in lowered or "settings" in lowered:
        return "system_account"
    return "general_product_surface"


def _route_sources_for_template(root: Path, template_name: str) -> list[str]:
    routes_dir = root / "routes"
    if not routes_dir.exists():
        return []
    needle = template_name.split("templates/", 1)[-1]
    matches: list[str] = []
    for route_file in sorted(routes_dir.glob("*.py")):
        try:
            source = route_file.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if needle in source:
            matches.append(str(route_file.relative_to(root)).replace("\\", "/"))
    return matches


def _evaluate_site_template(root: Path, path: Path) -> dict[str, Any]:
    relpath = _relative_template_path(root, path)
    high_priority = _is_high_priority_template(relpath)
    try:
        html = path.read_text(encoding="utf-8")
    except UnicodeDecodeError as exc:
        return {
            "template": relpath,
            "family": _template_family(relpath),
            "priority": "P1" if high_priority else "P2",
            "status": "critical",
            "route_sources": _route_sources_for_template(root, relpath),
            "guardrails": {
                "compact_marker_present": False,
                "benchmark_marker_present": False,
                "professional_marker_present": False,
            },
            "finding_count": 1,
            "findings": [{
                "type": "template_encoding_error",
                "severity": "critical",
                "encoding": "utf-8",
                "error": str(exc)[:300],
            }],
            "next_machine_action": "build_sitewide_ui_ux_repair_package",
        }
    compact_marker_present = "data-density-guardrail=" in html
    benchmark_marker_present = "data-benchmark-guardrail=" in html
    professional_marker_present = any(marker in html for marker in PROFESSIONAL_GUARDRAIL_MARKERS)
    forbidden_leaks = [
        fragment
        for fragment in FORBIDDEN_PRODUCT_FRAGMENTS
        if fragment in html
    ]
    findings: list[dict[str, Any]] = []
    if forbidden_leaks:
        findings.append({
            "type": "raw_engineering_copy",
            "severity": "critical",
            "fragments": forbidden_leaks,
        })
    if high_priority and not professional_marker_present:
        findings.append({
            "type": "missing_professional_workbench_guardrail",
            "severity": "warning",
            "expected": "compact density marker, benchmark marker, or product shell marker",
        })
    status = "ok" if not findings else (
        "critical" if any(item["severity"] == "critical" for item in findings) else "warning"
    )
    return {
        "template": relpath,
        "family": _template_family(relpath),
        "priority": "P1" if high_priority else "P2",
        "status": status,
        "route_sources": _route_sources_for_template(root, relpath),
        "guardrails": {
            "compact_marker_present": compact_marker_present,
            "benchmark_marker_present": benchmark_marker_present,
            "professional_marker_present": professional_marker_present,
        },
        "finding_count": len(findings),
        "findings": findings,
        "next_machine_action": (
            "build_sitewide_ui_ux_repair_package"
            if findings
            else "keep_sitewide_ui_ux_agent_monitoring"
        ),
    }


def build_sitewide_ui_ux_agent_inventory(
    *,
    root: Path | str | None = None,
) -> dict[str, Any]:
    """Inventory every template for sitewide professional UI/UX automation."""
    source_root = Path(root).resolve() if root is not None else ROOT
    surfaces = [_evaluate_site_template(source_root, path) for path in _iter_site_templates(source_root)]
    issue_surfaces = [item for item in surfaces if item["status"] != "ok"]
    high_priority = [item for item in surfaces if item["priority"] == "P1"]
    raw_issue_count = sum(
        1
        for item in surfaces
        for finding in item.get("findings", [])
        if finding.get("type") == "raw_engineering_copy"
    )
    compact_count = sum(
        1
        for item in surfaces
        if item.get("guardrails", {}).get("compact_marker_present")
    )
    status = "ok" if not issue_surfaces else "warning"
    return {
        "policy": SITEWIDE_POLICY,
        "status": status,
        "version": SYSTEM_VERSION,
        "generated_at": datetime.now().isoformat(timespec="seconds"),
        "summary": {
            "template_count": len(surfaces),
            "high_priority_template_count": len(high_priority),
            "compact_guardrail_count": compact_count,
            "issue_surface_count": len(issue_surfaces),
            "raw_engineering_issue_count": raw_issue_count,
            "primary_human_gate_count": 0,
            "writes_database_count": 0,
        },
        "guardrail_contract": {
            "target_experience": "mainstream_professional_product_website",
            "principles": [
                "first_viewport_status_and_next_action",
                "compact_information_density",
                "traditional_chinese_product_language",
                "evidence_on_demand",
                "no_raw_engineering_copy_on_product_surfaces",
                "stable_responsive_layout",
            ],
        },
        "surfaces": surfaces,
        "issue_surfaces": issue_surfaces,
        "next_machine_action": (
            "build_sitewide_ui_ux_repair_package"
            if issue_surfaces
            else "keep_sitewide_ui_ux_agent_monitoring"
        ),
        "automation_policy": {
            "primary_flow": "ai_controlled",
            "manual_review_mode": "exception_only",
            "machine_verifiable_evidence": True,
            "primary_human_gate_count": 0,
        },
        "safety": {
            "read_only": True,
            "writes_database": False,
            "writes_database_count": 0,
            "sends_notifications": False,
            "requires_browser": False,
            "requires_secret": False,
        },
    }


def build_sitewide_ui_ux_repair_package(
    *,
    root: Path | str | None = None,
    source_inventory: dict[str, Any] | None = None,
    limit: int = 12,
) -> dict[str, Any]:
    """Build a no-write sitewide UI/UX controlled repair package."""
    inventory = source_inventory or build_sitewide_ui_ux_agent_inventory(root=root)
    issue_surfaces = list(inventory.get("issue_surfaces") or [])
    prioritized = sorted(
        issue_surfaces,
        key=lambda item: (
            0 if item.get("priority") == "P1" else 1,
            0 if item.get("status") == "critical" else 1,
            str(item.get("template") or ""),
        ),
    )[: max(1, int(limit or 12))]
    repair_items: list[dict[str, Any]] = []
    for surface in prioritized:
        actions: list[dict[str, Any]] = []
        for finding in surface.get("findings", []):
            if finding.get("type") == "raw_engineering_copy":
                actions.append({
                    "action": "replace_raw_engineering_copy",
                    "target_template": surface.get("template"),
                    "fragments": finding.get("fragments") or [],
                    "safe_apply_hint": "Use Traditional Chinese product wording and move raw evidence into drilldown/API.",
                })
            if finding.get("type") == "missing_professional_workbench_guardrail":
                actions.append({
                    "action": "add_professional_workbench_guardrail",
                    "target_template": surface.get("template"),
                    "safe_apply_hint": (
                        "Add first-viewport status, next action, compact density marker, and evidence-on-demand pattern."
                    ),
                })
            if finding.get("type") == "template_encoding_error":
                actions.append({
                    "action": "normalize_template_encoding_utf8",
                    "target_template": surface.get("template"),
                    "safe_apply_hint": (
                        "Convert the template to UTF-8, preserve visible Traditional Chinese copy, "
                        "then rerun sitewide UI/UX inventory."
                    ),
                })
        repair_items.append({
            "template": surface.get("template"),
            "family": surface.get("family"),
            "priority": surface.get("priority"),
            "status": "ready_for_controlled_repair",
            "controlled_actions": actions,
            "post_apply_verifier": "build_sitewide_ui_ux_agent_inventory",
        })
    action_count = sum(len(item.get("controlled_actions") or []) for item in repair_items)
    return {
        "policy": SITEWIDE_REPAIR_POLICY,
        "status": "repair_ready" if action_count else "no_op",
        "version": SYSTEM_VERSION,
        "generated_at": datetime.now().isoformat(timespec="seconds"),
        "source_inventory_policy": inventory.get("policy"),
        "source_inventory_status": inventory.get("status"),
        "summary": {
            "selected_surface_count": len(repair_items),
            "controlled_action_count": action_count,
            "total_issue_surface_count": int((inventory.get("summary") or {}).get("issue_surface_count") or 0),
            "primary_human_gate_count": 0,
            "writes_database_count": 0,
            "executes_shell_count": 0,
        },
        "repair_items": repair_items,
        "next_machine_action": (
            "apply_sitewide_ui_ux_controlled_repairs"
            if action_count
            else "keep_sitewide_ui_ux_agent_monitoring"
        ),
        "controlled_apply_contract": {
            "mode": "ai_controlled_low_blast_radius_template_patch",
            "allowed_target_globs": ["templates/**/*.html", "templates/**/*.j2", "web/static/css/**/*.css"],
            "forbidden_targets": [".env", "database", "runtime volumes", "secrets", "raw sessions", "sqlite"],
            "requires_post_apply_verifier": True,
            "post_apply_verifier": "build_sitewide_ui_ux_agent_inventory",
            "rollback_strategy": "revert_template_or_css_patch_and_rerun_sitewide_ui_ux_agent",
        },
        "automation_policy": {
            "primary_flow": "ai_controlled",
            "manual_review_mode": "exception_only",
            "machine_verifiable_evidence": True,
            "primary_human_gate_count": 0,
        },
        "safety": {
            "read_only_package": True,
            "writes_database": False,
            "writes_database_count": 0,
            "executes_shell": False,
            "sends_notifications": False,
            "requires_secret": False,
        },
    }