diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 2ee748c..c533f3a 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,8 @@ ================================================================================ 【已完成】 + - V10.453 補 PChome matcher 安全回收規則:新增 Herbacin 小甘菊護手霜 20ml brandless 同款 anchor;修正 `EX8` 型號不可被誤解析成 `x8` 入數;新增 GONESH / 香氛固體凝膠的一側泛稱、一側明確香味或 No. 款式 veto,避免近門檻 replay 把不同香味、不同入數商品錯寫成正式價差。 + - V10.452 修正 PChome rescore audit 掃描口徑:`audit_competitor_match_attempt_rescore.py` 預設先取每個 SKU 最新 attempt,再套用 status / reason 篩選,和 Dashboard review queue 的最新狀態一致;舊 SKU/候選考古掃描需明確加 `--include-historical-candidates`,避免已修正或已入隊商品被舊低信心紀錄重複推回報表。 - V10.451 拆分 PChome `low_score` 操作分流並補 read-only queue API:比價覆核頁把近門檻可救、證據不足、低信心舊候選拆成獨立篩選;repository 同步提供 `recoverable_low_score`、`true_low_confidence`、`legacy_low_score` 三個 status filter,`/api/pchome-review/queue` 可直接用同一套 review_status 做 smoke / operator tools 查詢,讓回刷、人工覆核與報表不再把所有低信心候選混在一起。 - V10.450 補 PChome 覆核 fast-count UI 語意與重算可採用指標:預設全量覆核頁跳過 exact count 時,模板會顯示「約」作為快取總數提示;搜尋、分類、單一狀態仍是精準總數。`fetch_competitor_coverage()` 同步輸出 `rescore_accepted_count`,讓 Dashboard、daily/growth 與 OpenClaw 摘要能把「重算可採用待審」從一般覆核隊列拆出來。 - V10.449 修正 PChome 覆核 exact count 條件:只有預設「全部覆核、無搜尋、無分類」頁跳過 exact count;只要有搜尋詞、分類篩選或單一 review status,就保留精準總數,避免分頁資訊失準。 diff --git a/config.py b/config.py index 86416a9..fc0c1c9 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.451" +SYSTEM_VERSION = "V10.453" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index faad4bb..a238199 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-24 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉 -> **適用版本**: V10.451 +> **適用版本**: V10.453 --- @@ -83,6 +83,8 @@ SQL漏斗(~300筆) - 商品看板第一屏:`/` 的 V2 看板直接以 `products`、`price_records`、`competitor_prices`、`competitor_match_attempts`、`competitor_match_reviews`、`ai_price_recommendations` 顯示比對覆蓋率、PChome 優勢、MOMO 威脅、AI 挑品、待比對優先清單與 PChome 覆核隊列;`filter=ai_picks` 可查看 50 品 AI 挑品列表,`filter=pchome_review` 可直接查看需人工處理的比價覆核 SKU,並以 DB 分頁支援 search/category/status 後的完整隊列,不得只截前 50 筆。覆核狀態篩選必須至少包含全部、需單位價、已排除、低信心、價格過期、找不到同款與人工閉環,讓人工可依 matcher 診斷類型分批處理。列內顯示候選 PChome 商品、候選價、match score、單位價換算摘要、人工動作與 matcher 診斷原因標籤(品牌不符、商品線不符、容量差異、組合差異、需單位價、價差極端等),不得只顯示籠統「待比對」。`/api/export/excel/pchome-review` 必須匯出同一套覆核隊列、人工處置、候選 PChome、單位價比較與原始診斷,讓人工覆核、簡報與後續 AI 分析共用同一份證據。`/api/pchome-review//decision` 是人工閉環入口:`accept_identity` 才可把候選寫入 `competitor_prices` 與 `competitor_price_history` 並打上 `manual_review/manual_accept/identity_v2`;`reject_identity`、`unit_price_required` 與 `needs_research` 只寫 `competitor_match_reviews` 並追加 manual attempt,不得把不同販售組合或否決候選灌入正式價差。PChome feeder 後續搜尋同一候選時必須讀取 `competitor_match_reviews`:已否決候選寫 `manual_rejected` 並跳過正式寫入,且必須繼續評估下一個候選,不能讓已否決候選長期阻塞同 SKU;已標記單位價候選寫 `manual_unit_price_required`;已要求補搜尋候選寫 `manual_needs_research` 並停留在覆核隊列;已採用候選可保守補到最低門檻並保留 `manual_review/manual_accept` 標籤。搜尋候選池只有強同款分數達 `0.90` 才可提前停止,避免 0.76 灰區候選卡掉後續更精準搜尋詞。人工 `reject_identity`、`unit_price_required`、`needs_research` 若命中當前正式候選,必須將同候選 `competitor_prices` 過期,不得繼續顯示正式總價差。商品列表必須將 `manual_rejected`、`manual_unit_price_required`、`manual_needs_research` 顯示為明確人工閉環狀態,不可回落成籠統「待比對」。`fetch_competitor_coverage()` 必須輸出人工採用、人工否決、人工單位價與採用率,daily/growth/PPT 共用 payload 必須顯示人工閉環成效,避免只呈現待審數。商品看板深度快取同時寫入 `data/dashboard_full_cache.pkl`,供多個 Gunicorn worker 共用,避免部署後各 worker 重複重建 7,000+ 商品統計造成開頁變慢;所有資料異動與 AI 挑品重算都透過 `clear_dashboard_cache()` 同步清除記憶體與共享快取,手動重算 API 會立即預熱商品看板快取,避免第一位使用者承擔重建成本。 - PChome re-score 回收線:`rescore_accepted_current` 只能表示最新版 matcher 判定「可人工採用」,不可直接寫入正式 `competitor_prices`;`fetch_competitor_coverage()` 必須輸出 `rescore_accepted_count`,Dashboard、daily/growth 與 OpenClaw 競品摘要都要把「重算可採用待審」獨立呈現,避免和一般低信心/單位價覆核混在一起。 - PChome 低信心操作分流:Dashboard 與 read-only `/api/pchome-review/queue` 必須把近門檻可救、證據不足、低信心舊候選拆成 `recoverable_low_score`、`true_low_confidence`、`legacy_low_score` 三個可篩選桶;廣義 `low_score` 僅作 repository/export 相容查詢,不可在 UI 中冒充單一操作分流。 +- PChome re-score audit 預設必須先取每個 SKU 的最新 `competitor_match_attempts` 狀態,再套用 status / reason 篩選;舊低信心歷史候選只能透過 `--include-historical-candidates` 明確進入考古掃描,避免已入隊、已否決或已修正 SKU 被舊紀錄重新推回報表。 +- PChome matcher replay 必須先守住假陽性:`EX8` 等型號不可被誤解析成 `x8` 入數;香氛固體凝膠 / 空氣芳香劑若一側為泛稱、一側含明確香味或 No. 款式,必須走 `aroma_scent_variant_conflict` veto,不得因同品牌同重量直接寫正式價差。 | 角色 | 模型 | 主機 | 成本 | 每日限額 | |------|------|------|------|---------| diff --git a/docs/memory/code_modularization_inventory_20260430.md b/docs/memory/code_modularization_inventory_20260430.md index c8f109a..7cdd8ac 100644 --- a/docs/memory/code_modularization_inventory_20260430.md +++ b/docs/memory/code_modularization_inventory_20260430.md @@ -51,6 +51,7 @@ - 2026-05-24 追記:同步背景 PChome 近門檻身份回收與 focused identity 系列更新後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更商品比對行為。 - 2026-05-24 追記:同步 111 fallback circuit breaker、NemoTron 決策信封與 Telegram template governance 後的 `run_scheduler.py`、`services/ollama_service.py`、`services/nemoton_dispatcher_service.py`、`services/telegram_templates.py` 行數;此處只更新 inventory,不變更模組化決策。 - 2026-05-24 追記:同步 PChome 覆核頁 fast-count、輕量 render 與重算可採用指標後的 `routes/dashboard_routes.py` 行數;此處只更新 inventory,不變更 dashboard 行為。 +- 2026-05-24 追記:同步 PChome rescore audit 最新狀態口徑與單位價 multiplier 修正後的 `services/marketplace_product_matcher.py` 行數;此處只更新 inventory,不變更拆分策略。 ## 達到或超過 800 行檔案清單 @@ -77,7 +78,7 @@ | 940 | `services/import_service.py` | P2 import service | validators / import writers / report builders | | 1071 | `services/telegram_templates.py` | P2 Telegram templates | alert template groups / channel-specific formatting / reusable render helpers | | 867 | `services/token_report_service.py` | P2 token report service | query / aggregation / chart payload / notification formatting | -| 3393 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | +| 3786 | `services/marketplace_product_matcher.py` | P2 marketplace matcher | identity parsing / unit-comparable scoring / search term quality / persistence normalization | | 865 | `routes/daily_sales_routes.py` | P2 Daily Sales Blueprint | route glue / export helpers / daily query and formatting service | | 1117 | `services/ollama_service.py` | P2 Ollama client | host health / request client / fallback policy / response parsing | | 849 | `services/pchome_crawler.py` | P2 PChome crawler | search fetch / parsing / fallback source handling / rate limit policy | diff --git a/docs/memory/current_execution_queue_20260524.md b/docs/memory/current_execution_queue_20260524.md index 7f1ff5d..5229120 100644 --- a/docs/memory/current_execution_queue_20260524.md +++ b/docs/memory/current_execution_queue_20260524.md @@ -18,6 +18,8 @@ - aroma / diffuser / essential oil - lip / cosmetic variant - private-care / body-care +- 2026-05-24 22:10 CST 起,PChome rescore audit 預設對齊 review queue 最新狀態:先取每個 SKU 最新 attempt,再套用 status / reason 篩選;歷史候選回看需明確使用 `--include-historical-candidates`。 +- 2026-05-24 22:20 CST 起,matcher replay 先套用 V10.453 安全修正:`EX8` 型號不視為 `x8` 入數,香氛固體凝膠一側泛稱、一側具體香味/No. 款式走 veto;Herbacin 小甘菊護手霜 20ml brandless 可作窄範圍安全回收。 - 只新增窄範圍、可解釋 matcher 規則。 - 保留 `MIN_MATCH_SCORE`、`identity_veto`、既有正式候選覆寫保護。 - 驗收:`matched` 有增加、目標 `low_score` 下降、`needs_review` 不異常上升、無明顯跨色號/跨款式/跨劑型錯配。 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index d037972..95052e9 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,8 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.453 matcher 安全回收規則**: 新增 Herbacin 小甘菊護手霜 20ml brandless 同款 anchor;修正 `EX8` 型號不再被誤解析為 `x8` 入數;新增香氛固體凝膠 / 空氣芳香劑一側泛稱、一側明確香味或 No. 款式的 `aroma_scent_variant_conflict` veto。這輪目標是讓 retryable replay 可救回真同款,同時先封住 MIRAE 入數與 GONESH 香味款式的假陽性。 +- **V10.452 PChome rescore audit 最新狀態口徑**: `scripts/audit_competitor_match_attempt_rescore.py` 與 `fetch_match_attempt_rescore_rows()` 預設改成先取每個 SKU 最新 attempt,再套用 status / reason 篩選,與 Dashboard review queue 一致;需要回看歷史候選時才使用 `--include-historical-candidates`,避免舊低信心紀錄讓已修正、已否決或已入隊 SKU 重複回到操作報表。 - **V10.451 low_score 操作分流拆分與 queue API**: Dashboard 比價覆核頁不再只給一個籠統低信心分頁;新增「近門檻可救」「證據不足」「低信心舊候選」三個篩選,`competitor_intel_repository.REVIEW_STATUS_FILTER_GROUPS` 同步提供對應分流,`/api/pchome-review/queue` 也能用同一套 `review_status` 做 read-only smoke / operator tools 查詢,讓 matcher 回刷、人工覆核、OpenClaw 報表能分清楚可自動回收、應保守等待、與需補搜尋的候選。 - **V10.450 PChome 覆核 fast-count UI 語意與重算可採用指標**: 預設全量覆核頁跳過 exact count 時,模板會以「約」標記快取總數,避免操作員把快取總數誤認為即時計算;搜尋、分類與單一狀態分流仍保留精準總數。`fetch_competitor_coverage()` 同步輸出 `rescore_accepted_count`,Dashboard、daily/growth 與 OpenClaw 摘要會把「重算可採用待審」獨立顯示,不再只混在一般覆核隊列。 - **V10.449 PChome 覆核 exact count 條件修正**: 只有預設「全部覆核、無搜尋、無分類」頁跳過 exact count;若使用搜尋詞、分類篩選或單一 review status,仍保留精準總數,避免操作員分頁資訊失準。 diff --git a/routes/dashboard_routes.py b/routes/dashboard_routes.py index ce11960..ddaf200 100644 --- a/routes/dashboard_routes.py +++ b/routes/dashboard_routes.py @@ -142,6 +142,8 @@ def _diagnostic_match_rejection_label(diagnostic_text, score_text, *, blocked=Tr 'lactacyd_variant_conflict', )): return '款式版本不符', f'{score_text},同品牌同容量但清潔/保養款式不同,{suffix}' + if 'aroma_scent_variant_conflict' in diagnostic_text: + return '香味款式不符', f'{score_text},香氛商品香味或款式不同,{suffix}' if 'variant_selection_review' in diagnostic_text: return '多款任選待確認', f'{score_text},一側是多款任選或缺少明確色號,需人工確認' if not blocked and score_pct is not None and score_pct < 60: diff --git a/scripts/audit_competitor_match_attempt_rescore.py b/scripts/audit_competitor_match_attempt_rescore.py index 5c3beeb..cdccd83 100755 --- a/scripts/audit_competitor_match_attempt_rescore.py +++ b/scripts/audit_competitor_match_attempt_rescore.py @@ -61,6 +61,14 @@ def main(argv: list[str] | None = None) -> int: parser.add_argument("--limit", type=int, default=100) parser.add_argument("--sample-limit", type=int, default=20) parser.add_argument("--min-score", type=float, default=MIN_MATCH_SCORE) + parser.add_argument( + "--include-historical-candidates", + action="store_true", + help=( + "Scan the latest row per SKU/candidate after status filtering. " + "Default scans only the latest attempt per SKU, matching the Dashboard review queue." + ), + ) parser.add_argument( "--apply-accepted", action="store_true", @@ -90,12 +98,18 @@ def main(argv: list[str] | None = None) -> int: statuses=statuses, reason_filter=args.reason_filter or None, limit=args.limit, + latest_sku_only=not args.include_historical_candidates, ) summary = summarize_match_attempt_rescore( rows, min_score=args.min_score, sample_limit=args.sample_limit, ) + summary["selection_mode"] = ( + "historical_candidate_latest" + if args.include_historical_candidates + else "latest_sku_only" + ) summary["materialize"] = materialize_rescore_accept_reviews( conn, rows, @@ -111,6 +125,7 @@ def main(argv: list[str] | None = None) -> int: limit=args.limit, min_score=args.min_score, sample_limit=args.sample_limit, + latest_sku_only=not args.include_historical_candidates, ) print(json.dumps(summary, ensure_ascii=False, indent=2, default=str)) diff --git a/services/competitor_intel_repository.py b/services/competitor_intel_repository.py index 8601323..646988f 100644 --- a/services/competitor_intel_repository.py +++ b/services/competitor_intel_repository.py @@ -123,6 +123,7 @@ MATCH_DIAGNOSTIC_REASON_LABELS = { "sun_protection_line_conflict": "防曬品線不符", "saugella_variant_conflict": "賽吉兒款式不符", "lactacyd_variant_conflict": "立朵舒款式不符", + "aroma_scent_variant_conflict": "香氛香味款式不符", "variant_descriptor_conflict": "款式描述不同", "variant_selection_review": "多款任選待確認", "strong_exact_spec_match": "強規格同款", diff --git a/services/competitor_match_attempt_rescore_audit.py b/services/competitor_match_attempt_rescore_audit.py index 814ca18..3a04167 100644 --- a/services/competitor_match_attempt_rescore_audit.py +++ b/services/competitor_match_attempt_rescore_audit.py @@ -333,11 +333,71 @@ def fetch_match_attempt_rescore_rows( statuses: Sequence[str] = DEFAULT_RESCAN_STATUSES, reason_filter: str | None = None, limit: int = 100, + latest_sku_only: bool = True, ) -> list[dict[str, Any]]: - """Fetch latest stored attempts for read-only re-score auditing.""" + """Fetch stored attempts for read-only re-score auditing. + + The default mirrors the Dashboard review queue: pick the latest attempt per + SKU first, then apply the requested status/reason filters. This keeps stale + historical low-score rows from re-entering operator reports after a SKU has + already moved to a newer review state. + """ status_values = tuple(status for status in statuses if status) or DEFAULT_RESCAN_STATUSES - if conn.dialect.name == "postgresql": + if latest_sku_only: + reason_predicate = "AND diagnostic_codes::text LIKE :reason_filter" if ( + conn.dialect.name == "postgresql" and reason_filter + ) else "AND CAST(diagnostic_codes AS TEXT) LIKE :reason_filter" if reason_filter else "" + nulls_last = " NULLS LAST" if conn.dialect.name == "postgresql" else "" + sql = text(f""" + WITH ranked AS ( + SELECT + sku, + attempt_status, + momo_product_id, + momo_product_name, + momo_price, + candidate_count, + best_competitor_product_id, + best_competitor_product_name, + best_competitor_price, + best_match_score, + competitor_product_url, + competitor_image_url, + competitor_stock, + diagnostic_codes, + attempted_at, + ROW_NUMBER() OVER ( + PARTITION BY sku + ORDER BY attempted_at DESC{nulls_last}, id DESC + ) AS rn + FROM competitor_match_attempts + WHERE source = :source + ) + SELECT + sku, + attempt_status, + momo_product_id, + momo_product_name, + momo_price, + candidate_count, + best_competitor_product_id, + best_competitor_product_name, + best_competitor_price, + best_match_score, + competitor_product_url, + competitor_image_url, + competitor_stock, + diagnostic_codes, + attempted_at + FROM ranked + WHERE rn = 1 + AND attempt_status IN :statuses + {reason_predicate} + ORDER BY attempted_at DESC{nulls_last} + LIMIT :limit + """).bindparams(bindparam("statuses", expanding=True)) + elif conn.dialect.name == "postgresql": reason_predicate = "AND diagnostic_codes::text LIKE :reason_filter" if reason_filter else "" sql = text(f""" SELECT DISTINCT ON (sku, best_competitor_product_id) @@ -419,6 +479,7 @@ def build_match_attempt_rescore_audit( limit: int = 100, min_score: float = MIN_MATCH_SCORE, sample_limit: int = 20, + latest_sku_only: bool = True, ) -> dict[str, Any]: with engine.connect() as conn: rows = fetch_match_attempt_rescore_rows( @@ -427,9 +488,12 @@ def build_match_attempt_rescore_audit( statuses=statuses, reason_filter=reason_filter, limit=limit, + latest_sku_only=latest_sku_only, ) - return summarize_match_attempt_rescore( + summary = summarize_match_attempt_rescore( rows, min_score=min_score, sample_limit=sample_limit, ) + summary["selection_mode"] = "latest_sku_only" if latest_sku_only else "historical_candidate_latest" + return summary diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index 5298b87..30b176c 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -482,6 +482,7 @@ FOCUSED_IDENTITY_REVIEW_ONLY_REASONS = { } FOCUSED_IDENTITY_BRANDLESS_REVIEW_REASONS = { + "herbacin_classic_hand_cream_20ml_brandless", "muji_aroma_hand_cream_brandless", "the_forest_maple_diffuser_flower_brandless", } @@ -1027,6 +1028,14 @@ def _extract_specs( counts.append((CHINESE_COUNT[match.group(1)], match.group(2))) for match in re.finditer(rf"(?:x|乘)\s*(\d+)\s*({COUNT_UNIT_PATTERN})?", text, re.I): unit = match.group(2) or "入" + if not match.group(2): + prefix = text[max(0, match.start() - 4):match.start()].strip().lower() + suffix = text[match.end():match.end() + 1] + spec_prefixed = bool(re.search(r"(?:ml|毫升|g|公克|kg|mg|oz)$", prefix)) + if re.search(r"[a-z]$", prefix) and not spec_prefixed: + continue + if suffix and re.match(r"[\u4e00-\u9fff]", suffix) and not spec_prefixed: + continue counts.append((int(match.group(1)), unit)) for match in re.finditer(rf"(\d+)\s*{ENGLISH_COUNT_UNIT_RE}", text, re.I): counts.append((int(match.group(1)), "入")) @@ -1928,6 +1937,9 @@ def score_marketplace_match( hoi_candle_line_conflict = _has_hoi_candle_line_conflict(left, right) if hoi_candle_line_conflict: reasons.append("hoi_candle_line_conflict") + aroma_scent_variant_conflict = _has_aroma_scent_variant_conflict(left, right) + if aroma_scent_variant_conflict: + reasons.append("aroma_scent_variant_conflict") variant_selection_review = _has_named_variant_selection_review(left, right, shared_anchor) if variant_selection_review: reasons.append("variant_selection_review") @@ -1977,6 +1989,8 @@ def score_marketplace_match( hard_veto = True if hoi_candle_line_conflict: hard_veto = True + if aroma_scent_variant_conflict: + hard_veto = True focused_exact_line_reason = _has_focused_low_score_exact_identity_line(left, right) if focused_exact_line_reason in FOCUSED_IDENTITY_REVIEW_ONLY_REASONS: @@ -2911,6 +2925,42 @@ def _has_hoi_candle_line_conflict(left: ProductIdentity, right: ProductIdentity) return bool((left_day_mountain and right_lab) or (right_day_mountain and left_lab)) +def _has_aroma_scent_variant_conflict(left: ProductIdentity, right: ProductIdentity) -> bool: + pair_text = f"{left.searchable_name} {right.searchable_name}" + if not any(term in pair_text for term in ("香氛固體凝膠", "香氛凝膠", "空氣芳香劑", "車用香氛")): + return False + if _is_multi_variant_catalog_listing(left) or _is_multi_variant_catalog_listing(right): + return False + + left_options = _explicit_variant_option_tokens(left) + right_options = _explicit_variant_option_tokens(right) + if left_options and right_options: + return not bool(left_options & right_options) + + scent_words = { + "藤蔓果園", + "清新花園", + "白麝香", + "黑麝香", + "寶貝粉香", + "青檸羅勒", + "炭木香", + "無花果", + "薰衣草", + "茉莉", + "玫瑰", + "雪松", + "檀香", + } + left_scent = {word for word in scent_words if word in left.searchable_name} + right_scent = {word for word in scent_words if word in right.searchable_name} + if bool(left_options or left_scent) != bool(right_options or right_scent): + return True + if left_scent and right_scent and not (left_scent & right_scent): + return True + return False + + def _has_taicend_baby_spray_equivalence(left: ProductIdentity, right: ProductIdentity) -> bool: brand_tokens = {"taicend", "泰陞"} return ( @@ -3090,6 +3140,16 @@ def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: Pro and bool(left.brand_tokens) != bool(right.brand_tokens) ): return "muji_aroma_hand_cream_brandless" + if ( + {"herbacin", "德國小甘菊"} & brand_tokens + and "小甘菊" in left_text + and "小甘菊" in right_text + and "護手霜" in left_text + and "護手霜" in right_text + and _has_shared_volume(left, right, 20) + and bool(left.brand_tokens) != bool(right.brand_tokens) + ): + return "herbacin_classic_hand_cream_20ml_brandless" if ( {"sab", "初淨肌"} & (left.brand_tokens & right.brand_tokens) and "私密防護舒緩噴霧" in left_text diff --git a/tests/test_competitor_match_attempt_rescore_audit.py b/tests/test_competitor_match_attempt_rescore_audit.py index 6a226f0..bf16119 100644 --- a/tests/test_competitor_match_attempt_rescore_audit.py +++ b/tests/test_competitor_match_attempt_rescore_audit.py @@ -3,6 +3,30 @@ import json from sqlalchemy import create_engine, text +def _create_match_attempts_table(conn): + conn.execute(text(""" + CREATE TABLE competitor_match_attempts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + sku TEXT, + source TEXT, + attempt_status TEXT, + momo_product_id INTEGER, + momo_product_name TEXT, + momo_price NUMERIC, + candidate_count INTEGER, + best_competitor_product_id TEXT, + best_competitor_product_name TEXT, + best_competitor_price NUMERIC, + best_match_score NUMERIC, + competitor_product_url TEXT, + competitor_image_url TEXT, + competitor_stock TEXT, + diagnostic_codes TEXT, + attempted_at TEXT + ) + """)) + + def test_match_attempt_rescore_audit_classifies_current_gate_pass_and_veto(): from services.competitor_match_attempt_rescore_audit import summarize_match_attempt_rescore @@ -68,6 +92,38 @@ def test_match_attempt_rescore_audit_skips_missing_identity_text(): assert decision.gate_pass is False +def test_fetch_match_attempt_rescore_rows_defaults_to_latest_sku_state(): + from services.competitor_match_attempt_rescore_audit import fetch_match_attempt_rescore_rows + + engine = create_engine("sqlite:///:memory:") + with engine.begin() as conn: + _create_match_attempts_table(conn) + conn.execute(text(""" + INSERT INTO competitor_match_attempts + (sku, source, attempt_status, momo_product_name, best_competitor_product_id, + best_competitor_product_name, best_match_score, diagnostic_codes, attempted_at) + VALUES + ('SKU-A', 'pchome', 'true_low_confidence', 'MOMO A', 'P-A', 'PChome A', 0.72, '["old_low"]', '2026-05-24 09:00:00'), + ('SKU-A', 'pchome', 'rescore_accepted_current', 'MOMO A', 'P-A', 'PChome A', 0.91, '["new_queue"]', '2026-05-24 10:00:00'), + ('SKU-B', 'pchome', 'true_low_confidence', 'MOMO B', 'P-B', 'PChome B', 0.73, '["current_low"]', '2026-05-24 11:00:00') + """)) + + latest_rows = fetch_match_attempt_rescore_rows( + conn, + statuses=("true_low_confidence",), + limit=10, + ) + historical_rows = fetch_match_attempt_rescore_rows( + conn, + statuses=("true_low_confidence",), + limit=10, + latest_sku_only=False, + ) + + assert [row["sku"] for row in latest_rows] == ["SKU-B"] + assert {row["sku"] for row in historical_rows} == {"SKU-A", "SKU-B"} + + def test_match_attempt_rescore_materializes_accepted_current_for_manual_review(): from services.competitor_match_attempt_rescore_audit import materialize_rescore_accept_reviews diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 82ebc44..a9ded67 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -1201,6 +1201,11 @@ def test_marketplace_matcher_promotes_focused_low_score_exact_identity_lines(): "【Yuskin悠斯晶】A乳霜 攜帶型 6盒組(30g/盒)", "focused_exact_identity_yuskin_classic_cream_30g_6pack", ), + ( + "【Herbacin 德國小甘菊】小甘菊1號護手霜20ml", + "小甘菊經典護手霜20ml", + "focused_exact_identity_herbacin_classic_hand_cream_20ml_brandless", + ), ( "【Johnsons 嬌生】嬰兒潤膚乳500ml_嬰兒乳液(牛奶/純淨/甜夢/溫和/棉柔_任選)", "嬌生嬰兒甜夢潤膚乳500ml", @@ -1338,6 +1343,14 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_ "ERBE 德國不鏽鋼指甲清垢棒", "ERBE 德國雙頭指甲緣刨刀", ) + mirae_count_gap = score_marketplace_match( + "【MIRAE 未來美】EX8分鐘極速面膜 5片(補水/淨白/舒緩/修護)", + "未來美EX8分鐘極速補水/淨白/舒緩/修護面膜x8", + ) + gonesh_scent_gap = score_marketplace_match( + "【GONESH】室內汽車用香氛固體凝膠78g(持久芳香)", + "【日本GONESH】室內汽車用香氛固體凝膠空氣芳香劑(No.4號 藤蔓果園78g/罐 長效8週持久芳香型)", + ) sunscreen_line_gap = score_marketplace_match( "【我的心機】溫和寶貝兒童防曬乳35ml(SPF50+ PA+++)", "我的心機 海洋友善保濕高效防曬乳35ml(SPF50+PA++++)", @@ -1359,6 +1372,8 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_ summer_eve_variant_gap, solone_type_gap, erbe_tool_gap, + mirae_count_gap, + gonesh_scent_gap, sunscreen_line_gap, ): assert diagnostics.score < 0.76 @@ -1372,6 +1387,10 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_ assert "makeup_usage_conflict" in lunasol.reasons assert muji_swab.hard_veto is True assert "cotton_swab_variant_conflict" in muji_swab.reasons + assert mirae_count_gap.hard_veto is True + assert "count_conflict" in mirae_count_gap.reasons + assert gonesh_scent_gap.hard_veto is True + assert "aroma_scent_variant_conflict" in gonesh_scent_gap.reasons assert lancome_line_gap.hard_veto is True assert "lancome_line_conflict" in lancome_line_gap.reasons assert peripera_variant_gap.hard_veto is False