fix: 通过点击商品图片获取真实itemID,解决商品链接为空问题
cart_service: 对没有itemID的预售商品,点击图片触发跳转 从跳转URL中提取真实itemID(支持多种URL格式) 点击后自动关闭新tab并回到购物车继续处理下一个商品
This commit is contained in:
@@ -1,16 +1,17 @@
|
||||
"""
|
||||
购物车预售商品抓取服务
|
||||
通过 Playwright 打开购物车页面,拦截 API + DOM 提取商品信息
|
||||
通过 Playwright 打开购物车页面,从 DOM 提取商品信息,
|
||||
并通过点击商品图片获取跳转 URL 来提取真实 itemID。
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
from playwright.async_api import async_playwright
|
||||
from utils.stealth import stealth_async
|
||||
from server.services.auth_service import get_browser_context, has_auth
|
||||
|
||||
CART_URL = "https://weidian.com/new-cart/index.php"
|
||||
|
||||
# 从 DOM 提取购物车商品(含尝试从 Vue 组件获取 itemID)
|
||||
# 从 DOM 提取购物车商品基本信息
|
||||
EXTRACT_JS = """() => {
|
||||
const R = [];
|
||||
const sws = document.querySelectorAll(
|
||||
@@ -29,21 +30,35 @@ EXTRACT_JS = """() => {
|
||||
sale_time: '', presale_type: ''
|
||||
};
|
||||
|
||||
// 尝试从 Vue 组件数据中提取 itemID
|
||||
// 尝试多种方式提取 itemID
|
||||
// 1. Vue 组件数据
|
||||
try {
|
||||
const vue = iw.__vue__ || (iw.__vue_app__ && iw.__vue_app__._instance);
|
||||
const vue = iw.__vue__;
|
||||
if (vue) {
|
||||
const d = vue.$data || vue.data || vue;
|
||||
o.item_id = String(d.itemID || d.itemId || d.item_id || '');
|
||||
const d = vue.$data || vue._data || vue;
|
||||
o.item_id = String(d.itemID || d.itemId || d.item_id
|
||||
|| d.goodsId || d.goods_id || '');
|
||||
// 也检查 props
|
||||
if (!o.item_id && vue.$props) {
|
||||
const pp = vue.$props;
|
||||
o.item_id = String(pp.itemID || pp.itemId
|
||||
|| pp.item_id || pp.goodsId || '');
|
||||
}
|
||||
// 检查 item 对象
|
||||
if (!o.item_id && d.item) {
|
||||
o.item_id = String(d.item.itemID || d.item.itemId
|
||||
|| d.item.item_id || '');
|
||||
}
|
||||
}
|
||||
} catch(e) {}
|
||||
|
||||
// 尝试从 data-* 属性提取
|
||||
// 2. data-* 属性
|
||||
if (!o.item_id) {
|
||||
o.item_id = iw.dataset.itemId || iw.dataset.itemid || '';
|
||||
o.item_id = iw.dataset.itemId || iw.dataset.itemid
|
||||
|| iw.dataset.goodsId || iw.dataset.id || '';
|
||||
}
|
||||
|
||||
// 尝试从内部链接提取
|
||||
// 3. 内部链接
|
||||
if (!o.item_id) {
|
||||
const a = iw.querySelector('a[href*="itemID"]');
|
||||
if (a) {
|
||||
@@ -52,6 +67,10 @@ EXTRACT_JS = """() => {
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 图片 URL 中可能有商品信息(备用)
|
||||
const img = iw.querySelector('.item_img img');
|
||||
o.img_src = img ? (img.src || img.dataset.src || '') : '';
|
||||
|
||||
const te = iw.querySelector('.item_title');
|
||||
if (te) o.title = te.textContent.trim();
|
||||
const sk = iw.querySelector('.item_sku');
|
||||
@@ -89,15 +108,12 @@ EXTRACT_JS = """() => {
|
||||
async def fetch_cart_presale_items(account_id):
|
||||
"""
|
||||
获取指定账号购物车中的预售商品列表。
|
||||
双重提取:拦截购物车 API 获取 itemID 映射 + DOM 提取预售信息。
|
||||
对于没有 itemID 的商品,通过点击图片获取跳转 URL 来提取。
|
||||
返回: (success, items_or_msg)
|
||||
"""
|
||||
if not has_auth(account_id):
|
||||
return False, "账号未登录"
|
||||
|
||||
# 用于存储 API 返回的 cart_item_id -> itemID 映射
|
||||
api_item_map = {}
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser, context = await get_browser_context(
|
||||
p, account_id, headless=True
|
||||
@@ -105,20 +121,6 @@ async def fetch_cart_presale_items(account_id):
|
||||
page = await context.new_page()
|
||||
await stealth_async(page)
|
||||
|
||||
# 拦截购物车相关 API,提取 itemID
|
||||
async def on_response(response):
|
||||
url = response.url
|
||||
# 购物车 API 通常包含 cart 相关路径
|
||||
if any(k in url for k in ['cart/list', 'cart/query', 'cartList', 'getCart',
|
||||
'cart-server', 'newcart']):
|
||||
try:
|
||||
data = await response.json()
|
||||
_extract_item_ids(data, api_item_map)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
page.on("response", on_response)
|
||||
|
||||
try:
|
||||
await page.goto(
|
||||
CART_URL, wait_until="networkidle", timeout=20000
|
||||
@@ -137,74 +139,100 @@ async def fetch_cart_presale_items(account_id):
|
||||
await browser.close()
|
||||
return False, f"打开购物车失败: {e}"
|
||||
|
||||
# 也尝试从页面内嵌的 JS 变量/window 对象提取
|
||||
try:
|
||||
extra_map = await page.evaluate("""() => {
|
||||
const m = {};
|
||||
// 尝试从 window.__INITIAL_STATE__ 或类似全局变量提取
|
||||
const sources = [
|
||||
window.__INITIAL_STATE__,
|
||||
window.__NUXT__,
|
||||
window.__APP_DATA__,
|
||||
window.cartData,
|
||||
window.__data__,
|
||||
];
|
||||
function walk(obj, depth) {
|
||||
if (!obj || depth > 5) return;
|
||||
if (Array.isArray(obj)) {
|
||||
for (const item of obj) walk(item, depth + 1);
|
||||
} else if (typeof obj === 'object') {
|
||||
const cid = String(obj.cartItemId || obj.cart_item_id || obj.cartId || '');
|
||||
const iid = String(obj.itemID || obj.itemId || obj.item_id || obj.goodsId || '');
|
||||
if (cid && iid && iid !== cid) m[cid] = iid;
|
||||
// 也存 itemUrl
|
||||
if (cid && obj.itemUrl) m[cid + '_url'] = obj.itemUrl;
|
||||
for (const v of Object.values(obj)) walk(v, depth + 1);
|
||||
}
|
||||
}
|
||||
for (const s of sources) { if (s) walk(s, 0); }
|
||||
return m;
|
||||
}""")
|
||||
if extra_map:
|
||||
api_item_map.update(extra_map)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 提取基本信息
|
||||
raw_items = await page.evaluate(EXTRACT_JS)
|
||||
|
||||
# 筛选预售商品
|
||||
presale = [it for it in raw_items if it.get("is_presale")]
|
||||
|
||||
# 对没有 itemID 的预售商品,通过点击图片获取跳转 URL
|
||||
for item in presale:
|
||||
if item.get('item_id'):
|
||||
continue
|
||||
cid = item.get('cart_item_id', '')
|
||||
if not cid:
|
||||
continue
|
||||
|
||||
item_id = await _get_item_id_by_click(page, context, cid)
|
||||
if item_id:
|
||||
item['item_id'] = item_id
|
||||
item['url'] = f'https://weidian.com/item.html?itemID={item_id}'
|
||||
|
||||
await browser.close()
|
||||
|
||||
# 合并 API 数据到 DOM 提取结果
|
||||
for item in raw_items:
|
||||
cid = item.get('cart_item_id', '')
|
||||
if not item.get('item_id') and cid in api_item_map:
|
||||
item['item_id'] = api_item_map[cid]
|
||||
# 检查是否有 URL
|
||||
url_key = cid + '_url'
|
||||
if url_key in api_item_map:
|
||||
item['url'] = api_item_map[url_key]
|
||||
|
||||
# 只返回预售商品
|
||||
presale = [it for it in raw_items if it.get("is_presale")]
|
||||
return True, presale
|
||||
|
||||
|
||||
def _extract_item_ids(data, result_map):
|
||||
"""递归遍历 API 响应 JSON,提取 cart_item_id -> itemID 映射"""
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
_extract_item_ids(item, result_map)
|
||||
elif isinstance(data, dict):
|
||||
# 常见字段名
|
||||
cid = str(data.get('cartItemId', data.get('cart_item_id',
|
||||
data.get('cartId', ''))))
|
||||
iid = str(data.get('itemID', data.get('itemId',
|
||||
data.get('item_id', data.get('goodsId', '')))))
|
||||
if cid and iid and cid != iid and iid != 'None':
|
||||
result_map[cid] = iid
|
||||
# 也提取 URL
|
||||
item_url = data.get('itemUrl', data.get('item_url', ''))
|
||||
if cid and item_url:
|
||||
result_map[cid + '_url'] = item_url
|
||||
for v in data.values():
|
||||
if isinstance(v, (dict, list)):
|
||||
_extract_item_ids(v, result_map)
|
||||
async def _get_item_id_by_click(page, context, cart_item_id):
|
||||
"""
|
||||
通过点击购物车中商品的图片,拦截跳转 URL 来提取 itemID。
|
||||
点击后会打开新 tab,从新 tab 的 URL 中提取 itemID,然后关闭。
|
||||
"""
|
||||
try:
|
||||
# 定位商品图片
|
||||
img_locator = page.locator(
|
||||
f'.item_warp[id="{cart_item_id}"] .item_img')
|
||||
if await img_locator.count() == 0:
|
||||
return None
|
||||
|
||||
# 监听新页面打开事件
|
||||
async with context.expect_page(timeout=5000) as new_page_info:
|
||||
await img_locator.first.click()
|
||||
|
||||
new_page = await new_page_info.value
|
||||
# 等待 URL 加载
|
||||
await asyncio.sleep(1)
|
||||
url = new_page.url
|
||||
|
||||
# 从 URL 提取 itemID
|
||||
item_id = _extract_item_id_from_url(url)
|
||||
|
||||
# 关闭新 tab
|
||||
await new_page.close()
|
||||
|
||||
return item_id
|
||||
|
||||
except Exception:
|
||||
# 如果 expect_page 超时,可能是在当前页面跳转了
|
||||
# 检查当前页面 URL
|
||||
try:
|
||||
current_url = page.url
|
||||
item_id = _extract_item_id_from_url(current_url)
|
||||
if item_id:
|
||||
# 跳回购物车
|
||||
await page.goto(CART_URL, wait_until="networkidle",
|
||||
timeout=15000)
|
||||
await asyncio.sleep(2)
|
||||
return item_id
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 确保回到购物车页面
|
||||
try:
|
||||
if 'new-cart' not in page.url:
|
||||
await page.goto(CART_URL, wait_until="networkidle",
|
||||
timeout=15000)
|
||||
await asyncio.sleep(2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _extract_item_id_from_url(url):
|
||||
"""从 URL 中提取 itemID"""
|
||||
if not url:
|
||||
return None
|
||||
# https://weidian.com/item.html?itemID=123456
|
||||
m = re.search(r'itemID=(\d+)', url, re.IGNORECASE)
|
||||
if m:
|
||||
return m.group(1)
|
||||
# https://shop.weidian.com/item/123456
|
||||
m = re.search(r'/item/(\d+)', url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
# https://weidian.com/...?id=123456
|
||||
m = re.search(r'[?&]id=(\d+)', url)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user