From f6f9bf574c02fa054387637911e2535f51a14f50 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 18:00:41 +0800 Subject: [PATCH] Add competitor match rescore audit --- config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 2 +- docs/memory/history_logs.md | 1 + routes/dashboard_routes.py | 12 + .../audit_competitor_match_attempt_rescore.py | 91 ++++++ services/competitor_intel_repository.py | 15 + .../competitor_match_attempt_rescore_audit.py | 273 ++++++++++++++++++ tests/test_competitor_identity_revalidator.py | 20 ++ ..._competitor_match_attempt_rescore_audit.py | 63 ++++ 9 files changed, 477 insertions(+), 2 deletions(-) create mode 100755 scripts/audit_competitor_match_attempt_rescore.py create mode 100644 services/competitor_match_attempt_rescore_audit.py create mode 100644 tests/test_competitor_match_attempt_rescore_audit.py diff --git a/config.py b/config.py index af41f13..6d869a8 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.440" +SYSTEM_VERSION = "V10.441" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index aa075c1..59b484f 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-24 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉 -> **適用版本**: V10.440 +> **適用版本**: V10.441 --- diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index dca17d7..329978c 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.441 PChome matcher re-score audit**: 新增 read-only `competitor_match_attempt_rescore_audit` 服務與 `scripts/audit_competitor_match_attempt_rescore.py`,可針對既有 `competitor_match_attempts` 用最新版 matcher 重新分類成 `accepted_current` / `unit_comparable_current` / `identity_veto_current` / `low_score_current`,預設不寫 DB、不更新正式價格;商品看板同步補蘭蔻/達特醫/hoi/Saugella/Lactacyd 等 focused matcher reason 中文標籤。正式抽樣中 31 筆舊 `strong_exact_spec_match` 低信心資料,最新版 matcher 可讀出 10 筆 gate pass、1 筆單位價、11 筆 hard veto、9 筆仍低信心,作為後續人工覆核與批次回刷前的安全量化。 - **V10.440 Mustela 爽身潤膚乳同款 anchor**: marketplace matcher 新增 `慕之幼爽身潤膚乳` identity anchor,並讓標題中插入「加量版」時仍可抽出同一身份詞;正式樣本 `【Mustela 慕之恬廊】慕之幼 加量版爽身潤膚乳 500mlX2入` vs `【慕之恬廊】慕之幼爽身潤膚乳(500毫升X2入)` 由 0.741 提升到 0.801,維持 `hard_veto=false`、人工 review 型態,不放寬全域門檻、不寫正式 `competitor_prices`。 - **V10.439 外部 BI / 資料協作入口收斂**: `/metabase`、`/grist` 保持在 momo-pro 內部診斷 bridge,不再出現空白頁或錯連其他專案;`.env.example` 與 bi profile 的 Grist 預設 URL 改為 `https://mo.wooo.work/grist` / `GRIST_APP_HOME_URL`,測試同步守住 `grist.wooo.work` 與 `awoooi` 不再回到 app/template/env/compose 導覽設定。外部工具頁 H1 移除 viewport font scaling,改用新版 token 與手機 media query。 - **V10.438 PPT QA 失敗重跑精準化**: `/observability/ppt_audit_history` 的 QA 失敗與 issue triage 卡片會從 PPT 檔名前綴推回原始 `report_type`,不再把所有視覺 QA 失敗硬編成 daily 重跑;單筆「重跑」會以 `force=true` 呼叫補齊 API,並在產生前只失效同一 `report_type + parameters` 的 active `ppt_reports` cache,避免重新產出仍命中舊簡報。頁面也把 audit lane 的預覽按鈕補上,讓失敗檔案可直接站內回放 PDF/PPTX 預覽。 diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py index 88ee66e..ac18926 100644 --- a/routes/dashboard_routes.py +++ b/routes/dashboard_routes.py @@ -125,6 +125,18 @@ def _diagnostic_match_rejection_label(diagnostic_text, score_text, *, blocked=Tr return '工具功能不同', f'{score_text},同品牌但指甲工具功能不同,{suffix}' if 'schick_razor_line_conflict' in diagnostic_text: return '除毛刀品線不同', f'{score_text},同品牌但除毛刀子系列不同,{suffix}' + if any(token in diagnostic_text for token in ( + 'lancome_line_conflict', + 'dr_hsieh_labsmart_line_conflict', + 'hoi_candle_line_conflict', + 'sun_protection_line_conflict', + )): + return '商品線不符已排除', f'{score_text},同品牌但商品線或用途不同,{suffix}' + if any(token in diagnostic_text for token in ( + 'saugella_variant_conflict', + 'lactacyd_variant_conflict', + )): + return '款式版本不符', f'{score_text},同品牌同容量但清潔/保養款式不同,{suffix}' if 'variant_selection_review' in diagnostic_text: return '多款任選待確認', f'{score_text},一側是多款任選或缺少明確色號,需人工確認' if not blocked and score_pct is not None and score_pct < 60: diff --git a/scripts/audit_competitor_match_attempt_rescore.py b/scripts/audit_competitor_match_attempt_rescore.py new file mode 100755 index 0000000..bd0f1f0 --- /dev/null +++ b/scripts/audit_competitor_match_attempt_rescore.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +"""Read-only audit for stored PChome match attempts under the current matcher.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any, Iterator + +from sqlalchemy import create_engine + + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from services.competitor_match_attempt_rescore_audit import ( # noqa: E402 + DEFAULT_RESCAN_STATUSES, + build_match_attempt_rescore_audit, + summarize_match_attempt_rescore, +) +from services.competitor_price_feeder import MIN_MATCH_SCORE # noqa: E402 + + +def _read_jsonl(path: str) -> Iterator[dict[str, Any]]: + handle = sys.stdin if path == "-" else open(path, "r", encoding="utf-8") + try: + for line_no, line in enumerate(handle, start=1): + line = line.strip() + if not line: + continue + try: + payload = json.loads(line) + except json.JSONDecodeError as exc: + yield {"_invalid_json": True, "_line_no": line_no, "_error": str(exc)} + continue + if isinstance(payload, dict): + yield payload + else: + yield {"_invalid_json": True, "_line_no": line_no, "_error": "line is not a JSON object"} + finally: + if handle is not sys.stdin: + handle.close() + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description=( + "Re-score stored competitor_match_attempts with the current matcher. " + "Default mode reads the configured DB and never writes back." + ) + ) + parser.add_argument("--input", help="JSONL file path, or '-' for stdin. If omitted, query DATABASE_PATH.") + parser.add_argument("--source", default="pchome") + parser.add_argument("--status", action="append", dest="statuses", help="Attempt status to include; repeatable.") + parser.add_argument("--reason-filter", default="strong_exact_spec_match") + parser.add_argument("--limit", type=int, default=100) + parser.add_argument("--sample-limit", type=int, default=20) + parser.add_argument("--min-score", type=float, default=MIN_MATCH_SCORE) + args = parser.parse_args(argv) + + statuses = tuple(args.statuses or DEFAULT_RESCAN_STATUSES) + if args.input: + rows = [row for row in _read_jsonl(args.input) if not row.get("_invalid_json")] + summary = summarize_match_attempt_rescore( + rows, + min_score=args.min_score, + sample_limit=args.sample_limit, + ) + else: + from config import DATABASE_PATH + + engine = create_engine(DATABASE_PATH) + summary = build_match_attempt_rescore_audit( + engine, + source=args.source, + statuses=statuses, + reason_filter=args.reason_filter or None, + limit=args.limit, + min_score=args.min_score, + sample_limit=args.sample_limit, + ) + + print(json.dumps(summary, ensure_ascii=False, indent=2, default=str)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/services/competitor_intel_repository.py b/services/competitor_intel_repository.py index 695bf3a..b443e62 100644 --- a/services/competitor_intel_repository.py +++ b/services/competitor_intel_repository.py @@ -110,10 +110,25 @@ MATCH_DIAGNOSTIC_REASON_LABELS = { "makeup_finish_conflict": "妝效質地不同", "nail_tool_function_conflict": "工具功能不同", "schick_razor_line_conflict": "除毛刀品線不同", + "lancome_line_conflict": "蘭蔻商品線不符", + "dr_hsieh_labsmart_line_conflict": "達特醫 LabSmart 系列不符", + "hoi_candle_line_conflict": "香氛蠟燭系列不符", + "sun_protection_line_conflict": "防曬品線不符", + "saugella_variant_conflict": "賽吉兒款式不符", + "lactacyd_variant_conflict": "立朵舒款式不符", "variant_descriptor_conflict": "款式描述不同", "variant_selection_review": "多款任選待確認", "strong_exact_spec_match": "強規格同款", "strong_product_line_match": "商品線強吻合", + "shared_identity_anchor_reordered_line": "身份詞順序同款", + "focused_exact_identity_lab52_mouthwash": "Lab52 漱口水同款", + "focused_exact_identity_derma_eco_skin_oil": "Derma 護膚油同款", + "focused_exact_identity_pavaruni_40_scent_oil": "Pavaruni 40 香味精油同款", + "focused_exact_identity_pavaruni_20_scent_candle": "Pavaruni 20 香味蠟燭同款", + "focused_exact_identity_yuskin_classic_cream_30g_6pack": "悠斯晶 30g 六入同款", + "focused_exact_identity_lush_sakura_body_spray": "LUSH 櫻之花噴霧同款", + "focused_exact_identity_artmis_virile_gel": "ARTMIS 凝膠同款", + "focused_exact_identity_johnsons_baby_lotion_variant_catalog": "嬌生乳液型錄款待確認", "shared_model_token": "型號一致", } MATCH_TYPE_LABELS = { diff --git a/services/competitor_match_attempt_rescore_audit.py b/services/competitor_match_attempt_rescore_audit.py new file mode 100644 index 0000000..ad544a5 --- /dev/null +++ b/services/competitor_match_attempt_rescore_audit.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Read-only re-score audit for stored competitor match attempts.""" + +from __future__ import annotations + +from collections import Counter +from dataclasses import asdict, dataclass +from typing import Any, Iterable, Sequence + +from sqlalchemy import bindparam, text + +from services.competitor_price_feeder import MIN_MATCH_SCORE +from services.marketplace_product_matcher import score_marketplace_match + + +DEFAULT_RESCAN_STATUSES = ( + "true_low_confidence", + "recoverable_low_score", + "low_score", + "refresh_low_score", +) + + +@dataclass(frozen=True) +class MatchAttemptRescoreDecision: + sku: str + stored_status: str + suggested_status: str + gate_pass: bool + stored_score: float | None + current_score: float | None + hard_veto: bool + comparison_mode: str + match_type: str + price_basis: str + alert_tier: str + reasons: list[str] + momo_product_name: str + competitor_product_name: str + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +def _to_float(value: Any) -> float | None: + try: + if value in (None, ""): + return None + return float(value) + except (TypeError, ValueError): + return None + + +def _text_value(row: dict[str, Any], *keys: str) -> str: + for key in keys: + value = row.get(key) + if value not in (None, ""): + return str(value) + return "" + + +def classify_match_attempt_row( + row: dict[str, Any], + *, + min_score: float = MIN_MATCH_SCORE, +) -> MatchAttemptRescoreDecision: + """Classify one stored match attempt using the current matcher.""" + sku = _text_value(row, "sku") + stored_status = _text_value(row, "attempt_status", "stored_status") + momo_name = _text_value(row, "momo_product_name", "momo_name") + competitor_name = _text_value( + row, + "best_competitor_product_name", + "competitor_product_name", + "pchome_name", + ) + stored_score = _to_float(row.get("best_match_score") or row.get("stored_score") or row.get("match_score")) + + if not momo_name or not competitor_name: + return MatchAttemptRescoreDecision( + sku=sku, + stored_status=stored_status, + suggested_status="skipped_missing_identity_text", + gate_pass=False, + stored_score=stored_score, + current_score=None, + hard_veto=False, + comparison_mode="unknown", + match_type="unknown", + price_basis="none", + alert_tier="suppress", + reasons=["missing_identity_text"], + momo_product_name=momo_name, + competitor_product_name=competitor_name, + ) + + diagnostics = score_marketplace_match( + momo_name, + competitor_name, + momo_price=_to_float(row.get("momo_price")), + competitor_price=_to_float( + row.get("best_competitor_price") + or row.get("competitor_price") + or row.get("pchome_price") + or row.get("price") + ), + ) + reasons = list(diagnostics.reasons or ()) + + if diagnostics.hard_veto: + suggested_status = "identity_veto_current" + gate_pass = False + elif diagnostics.comparison_mode == "unit_comparable": + suggested_status = "unit_comparable_current" + gate_pass = False + elif diagnostics.score >= min_score: + suggested_status = "accepted_current" + gate_pass = True + else: + suggested_status = "low_score_current" + gate_pass = False + + return MatchAttemptRescoreDecision( + sku=sku, + stored_status=stored_status, + suggested_status=suggested_status, + gate_pass=gate_pass, + stored_score=stored_score, + current_score=diagnostics.score, + hard_veto=diagnostics.hard_veto, + comparison_mode=diagnostics.comparison_mode, + match_type=diagnostics.match_type, + price_basis=diagnostics.price_basis, + alert_tier=diagnostics.alert_tier, + reasons=reasons, + momo_product_name=momo_name, + competitor_product_name=competitor_name, + ) + + +def summarize_match_attempt_rescore( + rows: Iterable[dict[str, Any]], + *, + min_score: float = MIN_MATCH_SCORE, + sample_limit: int = 20, +) -> dict[str, Any]: + decisions: list[MatchAttemptRescoreDecision] = [] + status_counts: Counter[str] = Counter() + reason_counts: Counter[str] = Counter() + + for row in rows: + decision = classify_match_attempt_row(row, min_score=min_score) + decisions.append(decision) + status_counts[decision.suggested_status] += 1 + reason_counts.update(decision.reasons) + + return { + "scanned": len(decisions), + "gate_pass": sum(1 for decision in decisions if decision.gate_pass), + "unit_comparable": status_counts.get("unit_comparable_current", 0), + "identity_veto": status_counts.get("identity_veto_current", 0), + "still_low": status_counts.get("low_score_current", 0), + "skipped": status_counts.get("skipped_missing_identity_text", 0), + "status_counts": dict(status_counts), + "top_reasons": [ + {"reason": reason, "count": count} + for reason, count in reason_counts.most_common(30) + ], + "samples": [ + decision.to_dict() + for decision in decisions[: max(0, sample_limit)] + ], + } + + +def fetch_match_attempt_rescore_rows( + conn, + *, + source: str = "pchome", + statuses: Sequence[str] = DEFAULT_RESCAN_STATUSES, + reason_filter: str | None = None, + limit: int = 100, +) -> list[dict[str, Any]]: + """Fetch latest stored attempts for read-only re-score auditing.""" + status_values = tuple(status for status in statuses if status) or DEFAULT_RESCAN_STATUSES + + if conn.dialect.name == "postgresql": + reason_predicate = "AND diagnostic_codes::text LIKE :reason_filter" if reason_filter else "" + sql = text(f""" + SELECT DISTINCT ON (sku, best_competitor_product_id) + sku, + attempt_status, + momo_product_name, + momo_price, + best_competitor_product_id, + best_competitor_product_name, + best_competitor_price, + best_match_score, + diagnostic_codes, + attempted_at + FROM competitor_match_attempts + WHERE source = :source + AND attempt_status IN :statuses + {reason_predicate} + ORDER BY sku, best_competitor_product_id, attempted_at DESC NULLS LAST, id DESC + LIMIT :limit + """).bindparams(bindparam("statuses", expanding=True)) + else: + reason_predicate = "AND CAST(diagnostic_codes AS TEXT) LIKE :reason_filter" if reason_filter else "" + sql = text(f""" + WITH ranked AS ( + SELECT + sku, + attempt_status, + momo_product_name, + momo_price, + best_competitor_product_id, + best_competitor_product_name, + best_competitor_price, + best_match_score, + diagnostic_codes, + attempted_at, + ROW_NUMBER() OVER ( + PARTITION BY sku, best_competitor_product_id + ORDER BY attempted_at DESC, id DESC + ) AS rn + FROM competitor_match_attempts + WHERE source = :source + AND attempt_status IN :statuses + {reason_predicate} + ) + SELECT * + FROM ranked + WHERE rn = 1 + ORDER BY attempted_at DESC + LIMIT :limit + """).bindparams(bindparam("statuses", expanding=True)) + + params = { + "source": source, + "statuses": status_values, + "limit": max(1, int(limit)), + } + if reason_filter: + params["reason_filter"] = f"%{reason_filter}%" + + return [dict(row) for row in conn.execute(sql, params).mappings().all()] + + +def build_match_attempt_rescore_audit( + engine, + *, + source: str = "pchome", + statuses: Sequence[str] = DEFAULT_RESCAN_STATUSES, + reason_filter: str | None = None, + limit: int = 100, + min_score: float = MIN_MATCH_SCORE, + sample_limit: int = 20, +) -> dict[str, Any]: + with engine.connect() as conn: + rows = fetch_match_attempt_rescore_rows( + conn, + source=source, + statuses=statuses, + reason_filter=reason_filter, + limit=limit, + ) + return summarize_match_attempt_rescore( + rows, + min_score=min_score, + sample_limit=sample_limit, + ) diff --git a/tests/test_competitor_identity_revalidator.py b/tests/test_competitor_identity_revalidator.py index 4b18e12..a9becc6 100644 --- a/tests/test_competitor_identity_revalidator.py +++ b/tests/test_competitor_identity_revalidator.py @@ -163,3 +163,23 @@ def test_dashboard_match_status_uses_specific_matcher_reason_labels(): assert "妝效質地不同" in finish_gap["summary"] assert variant_review["label"] == "多款任選待確認" assert "需人工確認" in variant_review["summary"] + + +def test_dashboard_match_status_uses_focused_marketplace_reason_labels(): + from routes.dashboard_routes import _build_pchome_match_status + + line_gap = _build_pchome_match_status({ + "attempt_status": "identity_veto", + "best_match_score": 0.32, + "error_message": "score=0.32; reasons=lancome_line_conflict", + }) + variant_gap = _build_pchome_match_status({ + "attempt_status": "identity_veto", + "best_match_score": 0.32, + "error_message": "score=0.32; reasons=saugella_variant_conflict", + }) + + assert line_gap["label"] == "商品線不符已排除" + assert "商品線或用途不同" in line_gap["summary"] + assert variant_gap["label"] == "款式版本不符" + assert "款式不同" in variant_gap["summary"] diff --git a/tests/test_competitor_match_attempt_rescore_audit.py b/tests/test_competitor_match_attempt_rescore_audit.py new file mode 100644 index 0000000..104cd8f --- /dev/null +++ b/tests/test_competitor_match_attempt_rescore_audit.py @@ -0,0 +1,63 @@ +def test_match_attempt_rescore_audit_classifies_current_gate_pass_and_veto(): + from services.competitor_match_attempt_rescore_audit import summarize_match_attempt_rescore + + rows = [ + { + "sku": "9031334", + "attempt_status": "true_low_confidence", + "momo_product_name": "【Mustela 慕之恬廊】慕之幼 加量版爽身潤膚乳 500mlX2入(寶寶 嬰兒乳液 公司貨 台灣獨家總代理)", + "best_competitor_product_name": "【慕之恬廊】慕之幼爽身潤膚乳(500毫升X2入)", + "momo_price": 1390, + "best_competitor_price": 1210, + "best_match_score": 0.709, + }, + { + "sku": "12534698", + "attempt_status": "true_low_confidence", + "momo_product_name": "【LANCOME 蘭蔻】官方直營 超極光活粹晶露150ml(LANCOME/四重酸極光水/化妝水/精華水)", + "best_competitor_product_name": "LANCOME 蘭蔻 超極限肌因精華露150ml 專櫃公司貨", + "momo_price": 3555, + "best_competitor_price": 1880, + "best_match_score": 0.748, + }, + ] + + summary = summarize_match_attempt_rescore(rows) + + assert summary["scanned"] == 2 + assert summary["gate_pass"] == 1 + assert summary["identity_veto"] == 1 + assert summary["status_counts"]["accepted_current"] == 1 + assert summary["status_counts"]["identity_veto_current"] == 1 + assert summary["samples"][0]["current_score"] >= 0.76 + assert "shared_identity_anchor_reordered_line" in summary["samples"][0]["reasons"] + assert "lancome_line_conflict" in summary["samples"][1]["reasons"] + + +def test_match_attempt_rescore_audit_keeps_unit_price_cases_out_of_gate_pass(): + from services.competitor_match_attempt_rescore_audit import classify_match_attempt_row + + decision = classify_match_attempt_row({ + "sku": "12670442", + "attempt_status": "true_low_confidence", + "momo_product_name": "【日本Beauty Foot】去角質足膜25mlx2枚入 5入組(一般尺寸、大尺寸可選)", + "best_competitor_product_name": "【日本Beauty Foot 】煥膚足膜(25ml*2枚入)四入組", + "momo_price": 961, + "best_competitor_price": 989, + "best_match_score": 0.738, + }) + + assert decision.suggested_status == "unit_comparable_current" + assert decision.gate_pass is False + assert decision.comparison_mode == "unit_comparable" + assert "unit_comparable" in decision.reasons + + +def test_match_attempt_rescore_audit_skips_missing_identity_text(): + from services.competitor_match_attempt_rescore_audit import classify_match_attempt_row + + decision = classify_match_attempt_row({"sku": "NO-TEXT", "attempt_status": "low_score"}) + + assert decision.suggested_status == "skipped_missing_identity_text" + assert decision.current_score is None + assert decision.gate_pass is False