Add competitor match rescore audit

This commit is contained in:
OoO
2026-05-24 18:00:41 +08:00
parent 48c5db3b85
commit f6f9bf574c
9 changed files with 477 additions and 2 deletions

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""Read-only audit for stored PChome match attempts under the current matcher."""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
from typing import Any, Iterator
from sqlalchemy import create_engine
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from services.competitor_match_attempt_rescore_audit import ( # noqa: E402
DEFAULT_RESCAN_STATUSES,
build_match_attempt_rescore_audit,
summarize_match_attempt_rescore,
)
from services.competitor_price_feeder import MIN_MATCH_SCORE # noqa: E402
def _read_jsonl(path: str) -> Iterator[dict[str, Any]]:
handle = sys.stdin if path == "-" else open(path, "r", encoding="utf-8")
try:
for line_no, line in enumerate(handle, start=1):
line = line.strip()
if not line:
continue
try:
payload = json.loads(line)
except json.JSONDecodeError as exc:
yield {"_invalid_json": True, "_line_no": line_no, "_error": str(exc)}
continue
if isinstance(payload, dict):
yield payload
else:
yield {"_invalid_json": True, "_line_no": line_no, "_error": "line is not a JSON object"}
finally:
if handle is not sys.stdin:
handle.close()
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description=(
"Re-score stored competitor_match_attempts with the current matcher. "
"Default mode reads the configured DB and never writes back."
)
)
parser.add_argument("--input", help="JSONL file path, or '-' for stdin. If omitted, query DATABASE_PATH.")
parser.add_argument("--source", default="pchome")
parser.add_argument("--status", action="append", dest="statuses", help="Attempt status to include; repeatable.")
parser.add_argument("--reason-filter", default="strong_exact_spec_match")
parser.add_argument("--limit", type=int, default=100)
parser.add_argument("--sample-limit", type=int, default=20)
parser.add_argument("--min-score", type=float, default=MIN_MATCH_SCORE)
args = parser.parse_args(argv)
statuses = tuple(args.statuses or DEFAULT_RESCAN_STATUSES)
if args.input:
rows = [row for row in _read_jsonl(args.input) if not row.get("_invalid_json")]
summary = summarize_match_attempt_rescore(
rows,
min_score=args.min_score,
sample_limit=args.sample_limit,
)
else:
from config import DATABASE_PATH
engine = create_engine(DATABASE_PATH)
summary = build_match_attempt_rescore_audit(
engine,
source=args.source,
statuses=statuses,
reason_filter=args.reason_filter or None,
limit=args.limit,
min_score=args.min_score,
sample_limit=args.sample_limit,
)
print(json.dumps(summary, ensure_ascii=False, indent=2, default=str))
return 0
if __name__ == "__main__":
raise SystemExit(main())