fix: sanitize monitoring source labels
Some checks failed
CD Pipeline / deploy (push) Has been cancelled

This commit is contained in:
ogt
2026-06-27 19:46:51 +08:00
parent fdaa4bb2c9
commit 8861cbc3ac
5 changed files with 99 additions and 24 deletions

View File

@@ -402,7 +402,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
# ==========================================
# 系統版本與路徑
# ==========================================
SYSTEM_VERSION = "V10.719"
SYSTEM_VERSION = "V10.720"
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = PUBLIC_URL # 用於模板顯示

View File

@@ -1,8 +1,8 @@
# PChome 業績成長自動化作戰系統 — AI 競價情報模組 Single Source of Truth
> **最後更新**: 2026-06-26 (台北時間)
> **最後更新**: 2026-06-27 (台北時間)
> **狀態**: 🟢 四 AI Agent 自動化閉環已落地LLM 路由紅線升級為 Ollama-first 三主機級聯PChome 後台業績匯入韌性已補強產品定位正名為「PChome 業績成長自動化作戰系統」外部市場來源正規化層、自動同步、作戰清單與價格參考表優先讀取、CSV 備援預檢、前台操作入口、高可見頁面繁中化守門、比價/作戰 UI 工作台化、跨平台來源治理與商品身份 UI 契約已建立GCP embedding 熔斷延後處理、110 proxy rescue 與 direct host health skip 已建立
> **適用版本**: V10.719
> **適用版本**: V10.720
---
@@ -804,3 +804,4 @@ POSTGRES_HOST=momo-db
| 2026-06-26 | 供貨風險頁不得使用資料表或英文模組名作為主語 | V10.717 起缺貨清單與補貨通知頁統一使用「供貨風險、缺貨處理清單、補貨通知紀錄」等營運語言不再顯示「缺貨資料表、缺貨資料、Vendor Stockout」等資料庫或英文模組感文案。 |
| 2026-06-26 | AI 觀測頁不得外露 caller key | V10.718 起 AI 品質診斷與知識召回頁使用「使用情境」作為可見主語,並透過 `obs_label.caller()` 顯示營運名稱;前台不得直接顯示 `<code>{{ caller }}</code>``top_k` 或「全部呼叫端」等工程語言。 |
| 2026-06-26 | 商品來源頁不得提供 raw JSON 匯出 | V10.719 起 `/pchome_crawler` 改為「PChome 商品監控」營運清單,只提供表格與賣場清單 CSV前台不得出現 `exportJson``JSON.stringify(currentProducts)``圖片URL``商品URL` 或 raw JSON 檔名。 |
| 2026-06-27 | 設定頁監控來源不得直接外露 crawler 命名 | V10.720 起 `/settings` 的商品監控來源由 API 邊界轉為營運名稱與說明,前端卡片再以 `escapeHtml(monitorText(...))` 顯示;啟停與頻率更新訊息統一使用「監控來源」,不得回傳「爬蟲 XXX 已啟用」這類工程主語。 |

View File

@@ -4,22 +4,71 @@
爬蟲管理 API 路由
提供網頁介面來管理爬蟲的啟用/停用狀態
"""
from flask import Blueprint, jsonify, request, render_template, redirect, url_for
from flask import Blueprint, jsonify, request, redirect, url_for
from services.crawler_config_loader import (
load_crawler_config,
update_crawler_status,
get_crawler_info,
get_enabled_crawlers,
get_paused_crawlers
get_paused_crawlers,
CONFIG_PATH as CRAWLER_CONFIG_PATH,
)
import json
import os
from datetime import datetime
# 創建 Blueprint
crawler_bp = Blueprint('crawler_management', __name__)
CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'data', 'crawler_config.json')
SOURCE_LABELS = {
'momo_main': ('MOMO 主站商品監控', '追蹤 MOMO 主站商品與售價,支援 PChome 價差判斷。'),
'edm_promo': ('MOMO 限時活動監控', '追蹤 MOMO 限時活動商品,補齊促銷壓力與主推機會。'),
'festival_11': ('1.1 檔期活動監控', '檔期活動商品來源;活動結束時可暫停等待下次啟用。'),
'mothers_day_2026': ('母親節檔期活動監控', '追蹤母親節檔期活動商品與促銷壓力。'),
'valentine_520_2026': ('520 情人節活動監控', '追蹤 520 檔期活動商品與促銷壓力。'),
'labor_day_2026': ('勞動節活動監控', '追蹤勞動節檔期活動商品與促銷壓力。'),
}
def _operator_text(value):
text = str(value or '').strip()
if not text:
return ''
replacements = {
'爬蟲任務': '資料擷取任務',
'商品爬蟲': '商品監控',
'促銷爬蟲': '促銷活動監控',
'EDM 爬蟲': 'EDM 活動監控',
'爬蟲': '監控來源',
'保留程式碼和邏輯': '保留設定',
'同版型活動': '同類型活動',
}
for old, new in replacements.items():
text = text.replace(old, new)
return text
def _sanitize_source_info(crawler_key, info):
if not info:
return None
sanitized = dict(info)
label, description = SOURCE_LABELS.get(
crawler_key,
(_operator_text(sanitized.get('name')) or '商品監控來源', _operator_text(sanitized.get('description'))),
)
sanitized['name'] = label
sanitized['description'] = description or _operator_text(sanitized.get('description')) or '支援商品、價格與促銷監控。'
for field in ('pause_reason', 'notes', 'activity_name'):
if field in sanitized:
sanitized[field] = _operator_text(sanitized.get(field))
sanitized.pop('function', None)
return sanitized
def _sanitize_sources(crawlers):
return {
key: _sanitize_source_info(key, info)
for key, info in (crawlers or {}).items()
}
@crawler_bp.route('/crawler_management')
def crawler_management_page():
@@ -33,7 +82,7 @@ def get_crawlers():
config = load_crawler_config()
return jsonify({
"status": "success",
"data": config.get('crawlers', {})
"data": _sanitize_sources(config.get('crawlers', {}))
})
except Exception as e:
return jsonify({
@@ -49,12 +98,12 @@ def get_crawler(crawler_key):
if info is None:
return jsonify({
"status": "error",
"message": f"爬蟲 {crawler_key} 不存在"
"message": "監控來源不存在"
}), 404
return jsonify({
"status": "success",
"data": info
"data": _sanitize_source_info(crawler_key, info)
})
except Exception as e:
return jsonify({
@@ -87,11 +136,12 @@ def toggle_crawler(crawler_key):
# 取得更新後的資訊
info = get_crawler_info(crawler_key)
display_info = _sanitize_source_info(crawler_key, info) or {'name': '監控來源'}
return jsonify({
"status": "success",
"message": f"爬蟲 {info.get('name', crawler_key)} {'啟用' if enabled else '停用'}",
"data": info
"message": f"監控來源「{display_info.get('name', '商品監控來源')}{'啟用' if enabled else '停用'}",
"data": display_info
})
except Exception as e:
@@ -129,21 +179,22 @@ def update_crawler_schedule(crawler_key):
if crawler_key not in config.get('crawlers', {}):
return jsonify({
"status": "error",
"message": f"爬蟲 {crawler_key} 不存在"
"message": "監控來源不存在"
}), 404
# 更新執行頻率
config['crawlers'][crawler_key]['schedule_hours'] = schedule_hours
config['metadata']['last_updated'] = datetime.now().isoformat()
config.setdefault('metadata', {})['last_updated'] = datetime.now().isoformat()
# 寫回配置文件
with open(CONFIG_PATH, 'w', encoding='utf-8') as f:
with open(CRAWLER_CONFIG_PATH, 'w', encoding='utf-8') as f:
json.dump(config, f, ensure_ascii=False, indent=2)
updated_info = _sanitize_source_info(crawler_key, config['crawlers'][crawler_key])
return jsonify({
"status": "success",
"message": f"執行頻率已更新為每 {schedule_hours} 小時",
"data": config['crawlers'][crawler_key]
"data": updated_info
})
except Exception as e:

View File

@@ -190,6 +190,8 @@ def test_growth_workflow_pages_hide_raw_export_and_fallback_content():
pchome_crawler = (ROOT / "templates/pchome_crawler.html").read_text(encoding="utf-8")
market_intel = (ROOT / "templates/market_intel/disabled.html").read_text(encoding="utf-8")
settings = (ROOT / "templates/settings.html").read_text(encoding="utf-8")
settings_js = (ROOT / "web/static/js/page-settings.js").read_text(encoding="utf-8")
crawler_routes = (ROOT / "routes/crawler_management_routes.py").read_text(encoding="utf-8")
navbar = (ROOT / "templates/components/_navbar.html").read_text(encoding="utf-8")
shell = (ROOT / "templates/components/_ewoooc_shell.html").read_text(encoding="utf-8")
dashboard_js = (ROOT / "web/static/js/page-dashboard-v2.js").read_text(encoding="utf-8")
@@ -222,6 +224,11 @@ def test_growth_workflow_pages_hide_raw_export_and_fallback_content():
assert "商品監控中心" in settings
assert "監控來源設定" in settings
assert "escapeHtml(monitorText(info.name" in settings_js
assert "每 ${scheduleHours} 小時更新" in settings_js
assert "監控來源「" in crawler_routes
assert "CRAWLER_CONFIG_PATH" in crawler_routes
assert '"message": f"爬蟲' not in crawler_routes
assert "商品監控" in navbar
assert "商品監控狀態" in shell
assert "全站商品監控" in dashboard_js

View File

@@ -38,6 +38,15 @@ function showToast(message, type = 'success') {
function showLoading() { document.getElementById('loading-overlay').classList.add('active'); }
function hideLoading() { document.getElementById('loading-overlay').classList.remove('active'); }
function getCSRFToken() { return document.querySelector('meta[name="csrf-token"]').getAttribute('content'); }
function escapeHtml(value) {
const div = document.createElement('div');
div.textContent = String(value ?? '');
return div.innerHTML;
}
function monitorText(value, fallback = '') {
return String(value || fallback).replaceAll('爬蟲', '監控來源');
}
// ───────── Crawler ─────────
async function loadCrawlers() {
@@ -68,12 +77,19 @@ function createCrawlerCard(key, info) {
const card = document.createElement('div');
card.className = `crawler-card ${info.enabled ? 'active' : 'paused'}`;
const statusClass = info.enabled ? 'active' : 'paused';
const statusText = info.enabled ? '運行中' : '已暫停';
const statusText = info.enabled ? '啟用中' : '已暫停';
const sourceName = escapeHtml(monitorText(info.name, '商品監控來源'));
const sourceDescription = escapeHtml(monitorText(info.description, '支援商品、價格與促銷監控。'));
const scheduleHours = escapeHtml(info.schedule_hours || 'N/A');
const lpnCode = escapeHtml(info.lpn_code || '');
const lastActiveDate = escapeHtml(info.last_active_date || '');
const pauseReason = escapeHtml(monitorText(info.pause_reason, ''));
const notes = escapeHtml(monitorText(info.notes, ''));
card.innerHTML = `
<div class="crawler-header">
<div class="crawler-title-group">
<div class="crawler-title"><i class="fas fa-robot"></i>${info.name}</div>
<div class="crawler-title"><i class="fas fa-satellite-dish"></i>${sourceName}</div>
<span class="status-badge ${statusClass}"><i class="fas fa-circle"></i>${statusText}</span>
</div>
<label class="toggle-switch">
@@ -83,16 +99,16 @@ function createCrawlerCard(key, info) {
</div>
<div class="crawler-body">
<div class="crawler-info">
<div class="info-item"><i class="fas fa-info-circle"></i><span>${info.description || 'N/A'}</span></div>
<div class="info-item"><i class="fas fa-clock"></i><span>每 ${info.schedule_hours || 'N/A'} 小時執行</span></div>
${info.lpn_code ? `<div class="info-item"><i class="fas fa-barcode"></i><span>活動代碼:${info.lpn_code}</span></div>` : ''}
${info.last_active_date ? `<div class="info-item"><i class="fas fa-calendar-check"></i><span>最後活動:${info.last_active_date}</span></div>` : ''}
<div class="info-item"><i class="fas fa-info-circle"></i><span>${sourceDescription}</span></div>
<div class="info-item"><i class="fas fa-clock"></i><span>每 ${scheduleHours} 小時更新</span></div>
${info.lpn_code ? `<div class="info-item"><i class="fas fa-barcode"></i><span>活動代碼:${lpnCode}</span></div>` : ''}
${info.last_active_date ? `<div class="info-item"><i class="fas fa-calendar-check"></i><span>最後活動:${lastActiveDate}</span></div>` : ''}
</div>
${!info.enabled && info.pause_reason ? `
<div class="pause-reason">
<strong><i class="fas fa-pause-circle me-2"></i>暫停原因</strong>
${info.pause_reason}
${info.notes ? `<br><small><i class="fas fa-sticky-note me-1"></i>${info.notes}</small>` : ''}
${pauseReason}
${info.notes ? `<br><small><i class="fas fa-sticky-note me-1"></i>${notes}</small>` : ''}
</div>` : ''}
${info.enabled ? `
<div class="crawler-controls">