479 lines
17 KiB
Python
479 lines
17 KiB
Python
"""
|
||
PChome 爬蟲 API 路由
|
||
|
||
提供 PChome 24h 商品爬蟲的 RESTful API:
|
||
- GET /api/pchome/regions - 取得可爬取的館別列表
|
||
- POST /api/pchome/crawl/region - 爬取指定館別
|
||
- POST /api/pchome/search - 搜尋商品
|
||
- GET /api/pchome/products/<product_id> - 取得單一商品詳情
|
||
"""
|
||
|
||
from flask import Blueprint, request, jsonify, render_template
|
||
from auth import login_required, role_required as permission_required
|
||
|
||
pchome_bp = Blueprint('pchome', __name__)
|
||
|
||
|
||
@pchome_bp.route('/pchome_crawler')
|
||
@login_required
|
||
def pchome_crawler_page():
|
||
"""PChome 爬蟲管理頁面"""
|
||
return render_template('pchome_crawler.html', active_page='pchome_crawler')
|
||
|
||
|
||
# PChome 24h 館別代碼 (完整中分類)
|
||
# 格式: 'CODE': {'name': '名稱', 'category': '大分類'}
|
||
# 注意: 這是中分類 (region),不是小分類 (store)
|
||
PCHOME_REGIONS = {
|
||
# ==========================================
|
||
# 3C 資訊
|
||
# ==========================================
|
||
'DSAA': {'name': '筆記型電腦', 'category': '3C'},
|
||
'DSAB': {'name': '桌上型電腦', 'category': '3C'},
|
||
'DSAC': {'name': '平板電腦', 'category': '3C'},
|
||
'DSAD': {'name': '手機/智慧穿戴', 'category': '3C'},
|
||
'DSAE': {'name': '相機/攝影機', 'category': '3C'},
|
||
'DSAF': {'name': '電視/投影機', 'category': '3C'},
|
||
'DSAG': {'name': '音響/耳機', 'category': '3C'},
|
||
'DSAH': {'name': '電競周邊', 'category': '3C'},
|
||
'DSAI': {'name': '網通設備', 'category': '3C'},
|
||
'DSAJ': {'name': '儲存裝置', 'category': '3C'},
|
||
'DSAK': {'name': '電腦零組件', 'category': '3C'},
|
||
'DSAL': {'name': '辦公設備', 'category': '3C'},
|
||
'DSAM': {'name': '軟體/遊戲', 'category': '3C'},
|
||
|
||
# ==========================================
|
||
# 家電
|
||
# ==========================================
|
||
'DMAA': {'name': '冰箱', 'category': '家電'},
|
||
'DMAB': {'name': '洗衣機', 'category': '家電'},
|
||
'DMAC': {'name': '冷氣/空調', 'category': '家電'},
|
||
'DMAD': {'name': '清淨/除濕', 'category': '家電'},
|
||
'DMAE': {'name': '廚房家電', 'category': '家電'},
|
||
'DMAF': {'name': '生活家電', 'category': '家電'},
|
||
'DMAG': {'name': '季節家電', 'category': '家電'},
|
||
'DMAH': {'name': '美容家電', 'category': '家電'},
|
||
'DMAI': {'name': '按摩家電', 'category': '家電'},
|
||
|
||
# ==========================================
|
||
# 美妝保養 (DD開頭 - 實測有效館別)
|
||
# ==========================================
|
||
# 基礎保養
|
||
'DDAB': {'name': '開架保養', 'category': '美妝'},
|
||
'DDAD': {'name': '專櫃保養', 'category': '美妝'},
|
||
'DDAF': {'name': '醫美保養', 'category': '美妝'},
|
||
'DDBB': {'name': '精華液', 'category': '美妝'},
|
||
|
||
# 身體保養
|
||
'DDAG': {'name': '護手霜/身體乳', 'category': '美妝'},
|
||
'DDAN': {'name': '美體保養', 'category': '美妝'},
|
||
'DDAO': {'name': '身體保養', 'category': '美妝'},
|
||
'DDDF': {'name': '私密保養', 'category': '美妝'},
|
||
|
||
# 香氛
|
||
'DDAH': {'name': '香水', 'category': '美妝'},
|
||
'DDCC': {'name': '香氛品牌', 'category': '美妝'},
|
||
|
||
# 美髮
|
||
'DDAE': {'name': '沙龍美髮', 'category': '美妝'},
|
||
'DDBX': {'name': '沙龍髮品', 'category': '美妝'},
|
||
'DDCH': {'name': '染髮造型', 'category': '美妝'},
|
||
|
||
# 男士
|
||
'DDAX': {'name': '男士清潔', 'category': '美妝'},
|
||
|
||
# 品牌專區
|
||
'DDAJ': {'name': 'LUSH', 'category': '美妝品牌'},
|
||
'DDAP': {'name': '蕾莉歐', 'category': '美妝品牌'},
|
||
'DDAQ': {'name': '歐舒丹', 'category': '美妝品牌'},
|
||
'DDAS': {'name': "KIEHL'S", 'category': '美妝品牌'},
|
||
'DDAU': {'name': '品木宣言', 'category': '美妝品牌'},
|
||
'DDAW': {'name': 'BIOTHERM碧兒泉', 'category': '美妝品牌'},
|
||
'DDBM': {'name': "BURT'S BEES", 'category': '美妝品牌'},
|
||
'DDBN': {'name': '薇姿', 'category': '美妝品牌'},
|
||
'DDBO': {'name': 'UNT', 'category': '美妝品牌'},
|
||
'DDBT': {'name': 'DHC', 'category': '美妝品牌'},
|
||
'DDBV': {'name': 'DR.WU', 'category': '美妝品牌'},
|
||
'DDCA': {'name': '理膚寶水', 'category': '美妝品牌'},
|
||
'DDCM': {'name': 'KOSE', 'category': '美妝品牌'},
|
||
'DDCN': {'name': '適樂膚CeraVe', 'category': '美妝品牌'},
|
||
'DDCQ': {'name': '蘭芝', 'category': '美妝品牌'},
|
||
'DDCU': {'name': '雅漾', 'category': '美妝品牌'},
|
||
'DDBA': {'name': 'Dr.Ci:Labo', 'category': '美妝品牌'},
|
||
'DDDB': {'name': '蔻蘿蘭', 'category': '美妝品牌'},
|
||
'DDDE': {'name': '寶拉珍選', 'category': '美妝品牌'},
|
||
'DDDI': {'name': '妮維雅', 'category': '美妝品牌'},
|
||
'DDDK': {'name': 'THE BODY SHOP', 'category': '美妝品牌'},
|
||
|
||
# ==========================================
|
||
# 食品/飲品
|
||
# ==========================================
|
||
'DBAA': {'name': '零食餅乾', 'category': '食品'},
|
||
'DBAB': {'name': '沖泡飲品', 'category': '食品'},
|
||
'DBAC': {'name': '保健食品', 'category': '食品'},
|
||
'DBAD': {'name': '生鮮食材', 'category': '食品'},
|
||
'DBAE': {'name': '米/麵/油', 'category': '食品'},
|
||
'DBAF': {'name': '調味料/醬', 'category': '食品'},
|
||
'DBAG': {'name': '罐頭/泡麵', 'category': '食品'},
|
||
'DBAH': {'name': '飲料/水', 'category': '食品'},
|
||
'DBAI': {'name': '酒類', 'category': '食品'},
|
||
'DBAJ': {'name': '咖啡/茶', 'category': '食品'},
|
||
'DBAK': {'name': '嬰幼兒食品', 'category': '食品'},
|
||
|
||
# ==========================================
|
||
# 生活用品
|
||
# ==========================================
|
||
'DEAA': {'name': '衛生紙/濕巾', 'category': '生活'},
|
||
'DEAB': {'name': '清潔用品', 'category': '生活'},
|
||
'DEAC': {'name': '寵物用品', 'category': '生活'},
|
||
'DEAD': {'name': '衛浴用品', 'category': '生活'},
|
||
'DEAE': {'name': '口腔清潔', 'category': '生活'},
|
||
'DEAF': {'name': '女性生理', 'category': '生活'},
|
||
'DEAG': {'name': '紙品/垃圾袋', 'category': '生活'},
|
||
'DEAH': {'name': '洗衣用品', 'category': '生活'},
|
||
'DEAI': {'name': '驅蚊防蟲', 'category': '生活'},
|
||
|
||
# ==========================================
|
||
# 母嬰/親子
|
||
# ==========================================
|
||
'DHAA': {'name': '奶粉/副食品', 'category': '母嬰'},
|
||
'DHAB': {'name': '尿布/濕巾', 'category': '母嬰'},
|
||
'DHAC': {'name': '嬰兒用品', 'category': '母嬰'},
|
||
'DHAD': {'name': '哺乳用品', 'category': '母嬰'},
|
||
'DHAE': {'name': '媽媽用品', 'category': '母嬰'},
|
||
'DHAF': {'name': '嬰童服飾', 'category': '母嬰'},
|
||
'DHAG': {'name': '兒童玩具', 'category': '母嬰'},
|
||
'DHAH': {'name': '推車/汽座', 'category': '母嬰'},
|
||
|
||
# ==========================================
|
||
# 服飾
|
||
# ==========================================
|
||
'DGAA': {'name': '男裝', 'category': '服飾'},
|
||
'DGAB': {'name': '女裝', 'category': '服飾'},
|
||
'DGAC': {'name': '內睡衣', 'category': '服飾'},
|
||
'DGAD': {'name': '鞋包配件', 'category': '服飾'},
|
||
'DGAE': {'name': '男鞋', 'category': '服飾'},
|
||
'DGAF': {'name': '女鞋', 'category': '服飾'},
|
||
'DGAG': {'name': '男包', 'category': '服飾'},
|
||
'DGAH': {'name': '女包', 'category': '服飾'},
|
||
'DGAI': {'name': '精品配件', 'category': '服飾'},
|
||
'DGAJ': {'name': '手錶', 'category': '服飾'},
|
||
'DGAK': {'name': '眼鏡/太陽眼鏡', 'category': '服飾'},
|
||
|
||
# ==========================================
|
||
# 運動/戶外
|
||
# ==========================================
|
||
'DIAA': {'name': '健身器材', 'category': '運動'},
|
||
'DIAB': {'name': '運動用品', 'category': '運動'},
|
||
'DIAC': {'name': '戶外露營', 'category': '運動'},
|
||
'DIAD': {'name': '自行車', 'category': '運動'},
|
||
'DIAE': {'name': '登山用品', 'category': '運動'},
|
||
'DIAF': {'name': '水上活動', 'category': '運動'},
|
||
'DIAG': {'name': '球類運動', 'category': '運動'},
|
||
'DIAH': {'name': '運動服飾', 'category': '運動'},
|
||
'DIAI': {'name': '運動鞋', 'category': '運動'},
|
||
|
||
# ==========================================
|
||
# 傢俱/寢飾
|
||
# ==========================================
|
||
'DJAA': {'name': '床墊寢具', 'category': '傢俱'},
|
||
'DJAB': {'name': '傢俱收納', 'category': '傢俱'},
|
||
'DJAC': {'name': '居家裝飾', 'category': '傢俱'},
|
||
'DJAD': {'name': '燈具', 'category': '傢俱'},
|
||
'DJAE': {'name': '廚房用品', 'category': '傢俱'},
|
||
'DJAF': {'name': '衛浴設備', 'category': '傢俱'},
|
||
'DJAG': {'name': 'DIY工具', 'category': '傢俱'},
|
||
'DJAH': {'name': '園藝用品', 'category': '傢俱'},
|
||
|
||
# ==========================================
|
||
# 汽機車
|
||
# ==========================================
|
||
'DKAA': {'name': '汽車百貨', 'category': '汽機車'},
|
||
'DKAB': {'name': '機車百貨', 'category': '汽機車'},
|
||
'DKAC': {'name': '輪胎', 'category': '汽機車'},
|
||
'DKAD': {'name': '行車記錄器', 'category': '汽機車'},
|
||
'DKAE': {'name': '導航/車用電子', 'category': '汽機車'},
|
||
|
||
# ==========================================
|
||
# 圖書/影音
|
||
# ==========================================
|
||
'DJBA': {'name': '中文書', 'category': '圖書'},
|
||
'DJBB': {'name': '外文書', 'category': '圖書'},
|
||
'DJBC': {'name': '雜誌', 'category': '圖書'},
|
||
'DJBD': {'name': 'CD/DVD', 'category': '圖書'},
|
||
|
||
# ==========================================
|
||
# 票券/旅遊
|
||
# ==========================================
|
||
'DYAA': {'name': '旅遊票券', 'category': '票券'},
|
||
'DYAB': {'name': '餐券', 'category': '票券'},
|
||
'DYAC': {'name': '住宿券', 'category': '票券'},
|
||
'DYAD': {'name': '娛樂票券', 'category': '票券'},
|
||
}
|
||
|
||
|
||
@pchome_bp.route('/api/pchome/regions', methods=['GET'])
|
||
@login_required
|
||
def get_regions():
|
||
"""取得可爬取的 PChome 館別列表"""
|
||
# 按分類分組
|
||
categories = {}
|
||
for code, info in PCHOME_REGIONS.items():
|
||
cat = info['category']
|
||
if cat not in categories:
|
||
categories[cat] = []
|
||
categories[cat].append({
|
||
'code': code,
|
||
'name': info['name']
|
||
})
|
||
|
||
return jsonify({
|
||
'success': True,
|
||
'data': {
|
||
'regions': PCHOME_REGIONS,
|
||
'categories': categories
|
||
}
|
||
})
|
||
|
||
|
||
@pchome_bp.route('/api/pchome/crawl/region', methods=['POST'])
|
||
@login_required
|
||
def crawl_region():
|
||
"""
|
||
爬取指定館別的商品
|
||
|
||
Request Body:
|
||
{
|
||
"region_code": "DDAB" // 館別代碼
|
||
}
|
||
"""
|
||
from services.pchome_crawler import crawl_pchome_region
|
||
|
||
data = request.get_json()
|
||
if not data or 'region_code' not in data:
|
||
return jsonify({'success': False, 'message': '請提供館別代碼 (region_code)'}), 400
|
||
|
||
region_code = data['region_code'].upper()
|
||
|
||
# 驗證館別代碼
|
||
if region_code not in PCHOME_REGIONS:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': f'無效的館別代碼: {region_code}'
|
||
}), 400
|
||
|
||
try:
|
||
success, message, products = crawl_pchome_region(region_code)
|
||
|
||
return jsonify({
|
||
'success': success,
|
||
'message': message,
|
||
'data': {
|
||
'region': {
|
||
'code': region_code,
|
||
'name': PCHOME_REGIONS[region_code]['name'],
|
||
'category': PCHOME_REGIONS[region_code]['category']
|
||
},
|
||
'product_count': len(products),
|
||
'products': products
|
||
}
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': f'爬取失敗: {str(e)}'
|
||
}), 500
|
||
|
||
|
||
@pchome_bp.route('/api/pchome/crawl/custom', methods=['POST'])
|
||
@login_required
|
||
def crawl_custom_url():
|
||
"""
|
||
爬取自訂 URL 的商品
|
||
|
||
Request Body:
|
||
{
|
||
"url": "https://24h.pchome.com.tw/region/DDAB"
|
||
}
|
||
"""
|
||
import re
|
||
from services.pchome_crawler import get_crawler
|
||
|
||
data = request.get_json()
|
||
if not data or 'url' not in data:
|
||
return jsonify({'success': False, 'message': '請提供 URL'}), 400
|
||
|
||
url = data['url']
|
||
|
||
# 驗證 URL 格式
|
||
if not url.startswith('https://24h.pchome.com.tw/'):
|
||
return jsonify({
|
||
'success': False,
|
||
'message': '僅支援 PChome 24h 網站 (https://24h.pchome.com.tw/)'
|
||
}), 400
|
||
|
||
# 嘗試解析 region code
|
||
region_match = re.search(r'/region/([A-Z0-9]+)', url)
|
||
|
||
try:
|
||
crawler = get_crawler()
|
||
|
||
if region_match:
|
||
# 館別頁面
|
||
region_code = region_match.group(1)
|
||
success, message, products = crawler.crawl_region(region_code)
|
||
else:
|
||
# 其他頁面 - 直接爬取並解析
|
||
import requests
|
||
response = requests.get(url, headers=crawler.DEFAULT_HEADERS, timeout=30)
|
||
response.raise_for_status()
|
||
|
||
product_ids = crawler._extract_product_ids_from_html(response.text)
|
||
if not product_ids:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': '頁面中沒有找到商品'
|
||
}), 404
|
||
|
||
success, message, products = crawler.fetch_product_details(product_ids)
|
||
products = [p.to_dict() for p in products]
|
||
|
||
return jsonify({
|
||
'success': success,
|
||
'message': message,
|
||
'data': {
|
||
'url': url,
|
||
'product_count': len(products),
|
||
'products': products
|
||
}
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': f'爬取失敗: {str(e)}'
|
||
}), 500
|
||
|
||
|
||
@pchome_bp.route('/api/pchome/search', methods=['POST'])
|
||
@login_required
|
||
def search_products():
|
||
"""
|
||
搜尋 PChome 商品
|
||
|
||
Request Body:
|
||
{
|
||
"keyword": "iPhone",
|
||
"limit": 50 // 可選,預設 50
|
||
}
|
||
"""
|
||
from services.pchome_crawler import search_pchome_products
|
||
|
||
data = request.get_json()
|
||
if not data or 'keyword' not in data:
|
||
return jsonify({'success': False, 'message': '請提供搜尋關鍵字 (keyword)'}), 400
|
||
|
||
keyword = data['keyword'].strip()
|
||
if not keyword:
|
||
return jsonify({'success': False, 'message': '關鍵字不能為空'}), 400
|
||
|
||
limit = data.get('limit', 50)
|
||
if not isinstance(limit, int) or limit < 1 or limit > 200:
|
||
limit = 50
|
||
|
||
try:
|
||
success, message, products = search_pchome_products(keyword, limit)
|
||
|
||
return jsonify({
|
||
'success': success,
|
||
'message': message,
|
||
'data': {
|
||
'keyword': keyword,
|
||
'product_count': len(products),
|
||
'products': products
|
||
}
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': f'搜尋失敗: {str(e)}'
|
||
}), 500
|
||
|
||
|
||
@pchome_bp.route('/api/pchome/products/<product_id>', methods=['GET'])
|
||
@login_required
|
||
def get_product_detail(product_id: str):
|
||
"""取得單一商品詳情"""
|
||
from services.pchome_crawler import get_crawler
|
||
|
||
try:
|
||
crawler = get_crawler()
|
||
success, message, products = crawler.fetch_product_details([product_id])
|
||
|
||
if not success or not products:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': '找不到商品'
|
||
}), 404
|
||
|
||
return jsonify({
|
||
'success': True,
|
||
'data': products[0].to_dict()
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': f'取得失敗: {str(e)}'
|
||
}), 500
|
||
|
||
|
||
@pchome_bp.route('/api/pchome/products/batch', methods=['POST'])
|
||
@login_required
|
||
def get_products_batch():
|
||
"""
|
||
批次取得商品詳情
|
||
|
||
Request Body:
|
||
{
|
||
"product_ids": ["DDABSD-1900HIE3P", "DDAO04-A900I6WUO"]
|
||
}
|
||
"""
|
||
from services.pchome_crawler import get_crawler
|
||
|
||
data = request.get_json()
|
||
if not data or 'product_ids' not in data:
|
||
return jsonify({'success': False, 'message': '請提供商品 ID 列表 (product_ids)'}), 400
|
||
|
||
product_ids = data['product_ids']
|
||
if not isinstance(product_ids, list) or not product_ids:
|
||
return jsonify({'success': False, 'message': '商品 ID 列表格式錯誤'}), 400
|
||
|
||
if len(product_ids) > 100:
|
||
return jsonify({'success': False, 'message': '一次最多查詢 100 個商品'}), 400
|
||
|
||
try:
|
||
crawler = get_crawler()
|
||
success, message, products = crawler.fetch_product_details(product_ids)
|
||
|
||
return jsonify({
|
||
'success': success,
|
||
'message': message,
|
||
'data': {
|
||
'requested_count': len(product_ids),
|
||
'found_count': len(products),
|
||
'products': [p.to_dict() for p in products]
|
||
}
|
||
})
|
||
|
||
except Exception as e:
|
||
return jsonify({
|
||
'success': False,
|
||
'message': f'取得失敗: {str(e)}'
|
||
}), 500
|
||
|
||
|
||
print("✅ PChome routes 已載入")
|