Add competitor match rescore audit
This commit is contained in:
91
scripts/audit_competitor_match_attempt_rescore.py
Executable file
91
scripts/audit_competitor_match_attempt_rescore.py
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Read-only audit for stored PChome match attempts under the current matcher."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from services.competitor_match_attempt_rescore_audit import ( # noqa: E402
|
||||
DEFAULT_RESCAN_STATUSES,
|
||||
build_match_attempt_rescore_audit,
|
||||
summarize_match_attempt_rescore,
|
||||
)
|
||||
from services.competitor_price_feeder import MIN_MATCH_SCORE # noqa: E402
|
||||
|
||||
|
||||
def _read_jsonl(path: str) -> Iterator[dict[str, Any]]:
|
||||
handle = sys.stdin if path == "-" else open(path, "r", encoding="utf-8")
|
||||
try:
|
||||
for line_no, line in enumerate(handle, start=1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(line)
|
||||
except json.JSONDecodeError as exc:
|
||||
yield {"_invalid_json": True, "_line_no": line_no, "_error": str(exc)}
|
||||
continue
|
||||
if isinstance(payload, dict):
|
||||
yield payload
|
||||
else:
|
||||
yield {"_invalid_json": True, "_line_no": line_no, "_error": "line is not a JSON object"}
|
||||
finally:
|
||||
if handle is not sys.stdin:
|
||||
handle.close()
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Re-score stored competitor_match_attempts with the current matcher. "
|
||||
"Default mode reads the configured DB and never writes back."
|
||||
)
|
||||
)
|
||||
parser.add_argument("--input", help="JSONL file path, or '-' for stdin. If omitted, query DATABASE_PATH.")
|
||||
parser.add_argument("--source", default="pchome")
|
||||
parser.add_argument("--status", action="append", dest="statuses", help="Attempt status to include; repeatable.")
|
||||
parser.add_argument("--reason-filter", default="strong_exact_spec_match")
|
||||
parser.add_argument("--limit", type=int, default=100)
|
||||
parser.add_argument("--sample-limit", type=int, default=20)
|
||||
parser.add_argument("--min-score", type=float, default=MIN_MATCH_SCORE)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
statuses = tuple(args.statuses or DEFAULT_RESCAN_STATUSES)
|
||||
if args.input:
|
||||
rows = [row for row in _read_jsonl(args.input) if not row.get("_invalid_json")]
|
||||
summary = summarize_match_attempt_rescore(
|
||||
rows,
|
||||
min_score=args.min_score,
|
||||
sample_limit=args.sample_limit,
|
||||
)
|
||||
else:
|
||||
from config import DATABASE_PATH
|
||||
|
||||
engine = create_engine(DATABASE_PATH)
|
||||
summary = build_match_attempt_rescore_audit(
|
||||
engine,
|
||||
source=args.source,
|
||||
statuses=statuses,
|
||||
reason_filter=args.reason_filter or None,
|
||||
limit=args.limit,
|
||||
min_score=args.min_score,
|
||||
sample_limit=args.sample_limit,
|
||||
)
|
||||
|
||||
print(json.dumps(summary, ensure_ascii=False, indent=2, default=str))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user