From 2ebdaec965d81654892c1935f8c1eed6bd24fef3 Mon Sep 17 00:00:00 2001 From: Jeason <1710884619@qq.com> Date: Wed, 1 Apr 2026 13:40:42 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20cart=5Fitem=5Fid=E4=B8=8D=E6=98=AFitemID?= =?UTF-8?q?,=E4=BF=AE=E5=A4=8D=E5=95=86=E5=93=81=E9=93=BE=E6=8E=A5?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E5=AF=BC=E8=87=B4=E6=8A=A2=E8=B4=AD=E5=A4=B1?= =?UTF-8?q?=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - cart_service: 拦截购物车API提取真实itemID映射 - cart_service: 从Vue组件/data属性/window全局变量多路提取itemID - tasks: 区分item_id和cart_item_id,只有真实itemID才拼URL - snatcher: 增加商品不存在/已下架检测,增加空URL检测 --- .gitignore | 2 + server/routers/tasks.py | 15 +++-- server/services/cart_service.py | 115 +++++++++++++++++++++++++++++++- server/services/snatcher.py | 12 ++++ 4 files changed, 136 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 2478f20..3a604d0 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ debug_* *.db *.db-shm *.db-wal +*.har +weidian_sso_login*.py diff --git a/server/routers/tasks.py b/server/routers/tasks.py index 5bc1414..eea7445 100644 --- a/server/routers/tasks.py +++ b/server/routers/tasks.py @@ -128,10 +128,10 @@ def sync_cart(account_id): if not snatch_time: continue cart_item_id = item.get('cart_item_id', '') - item_id = item.get('item_id', '') or cart_item_id + item_id = item.get('item_id', '') title = item.get('title', '') - # 用 cart_item_id 去重(因为可能没有 item_id) - dedup_key = item_id or title + # 去重:优先用 item_id,其次 cart_item_id,最后 title + dedup_key = item_id or cart_item_id or title if dedup_key: existing = db2.execute( 'SELECT id FROM tasks WHERE account_id = ? AND (item_id = ? OR item_name = ?) AND status = "pending"', @@ -140,8 +140,11 @@ def sync_cart(account_id): if existing: continue url = item.get('url', '') + # 只有真正的 itemID 才拼 URL(cart_item_id 不是 itemID) if not url and item_id and item_id.isdigit(): url = f'https://weidian.com/item.html?itemID={item_id}' + if not item_id: + item_id = cart_item_id db2.execute( 'INSERT INTO tasks (account_id, target_url, item_name, item_id, sku_id, price, snatch_time) VALUES (?, ?, ?, ?, ?, ?, ?)', (account_id, url, title, item_id, @@ -217,9 +220,9 @@ def sync_all_carts(): if not snatch_time: continue cart_item_id = item.get('cart_item_id', '') - item_id = item.get('item_id', '') or cart_item_id + item_id = item.get('item_id', '') title = item.get('title', '') - dedup_key = item_id or title + dedup_key = item_id or cart_item_id or title if dedup_key: existing = db2.execute( 'SELECT id FROM tasks WHERE account_id = ? AND (item_id = ? OR item_name = ?) AND status = "pending"', @@ -230,6 +233,8 @@ def sync_all_carts(): url = item.get('url', '') if not url and item_id and item_id.isdigit(): url = f'https://weidian.com/item.html?itemID={item_id}' + if not item_id: + item_id = cart_item_id db2.execute( 'INSERT INTO tasks (account_id, target_url, item_name, item_id, sku_id, price, snatch_time) VALUES (?, ?, ?, ?, ?, ?, ?)', (aid, url, title, item_id, diff --git a/server/services/cart_service.py b/server/services/cart_service.py index 133c94b..87c5a62 100644 --- a/server/services/cart_service.py +++ b/server/services/cart_service.py @@ -1,15 +1,16 @@ """ 购物车预售商品抓取服务 -通过 Playwright 打开购物车页面,从 DOM 的 item_warp 提取商品信息 +通过 Playwright 打开购物车页面,拦截 API + DOM 提取商品信息 """ import asyncio +import json from playwright.async_api import async_playwright from utils.stealth import stealth_async from server.services.auth_service import get_browser_context, has_auth CART_URL = "https://weidian.com/new-cart/index.php" -# 提取购物车商品的 JS,与 test_cart.py 保持一致 +# 从 DOM 提取购物车商品(含尝试从 Vue 组件获取 itemID) EXTRACT_JS = """() => { const R = []; const sws = document.querySelectorAll( @@ -22,10 +23,35 @@ EXTRACT_JS = """() => { const o = { shop_name: sn.trim(), cart_item_id: iw.id, + item_id: '', title: '', sku_name: '', price: '', is_presale: false, countdown_text: '', sale_time: '', presale_type: '' }; + + // 尝试从 Vue 组件数据中提取 itemID + try { + const vue = iw.__vue__ || (iw.__vue_app__ && iw.__vue_app__._instance); + if (vue) { + const d = vue.$data || vue.data || vue; + o.item_id = String(d.itemID || d.itemId || d.item_id || ''); + } + } catch(e) {} + + // 尝试从 data-* 属性提取 + if (!o.item_id) { + o.item_id = iw.dataset.itemId || iw.dataset.itemid || ''; + } + + // 尝试从内部链接提取 + if (!o.item_id) { + const a = iw.querySelector('a[href*="itemID"]'); + if (a) { + const m = a.href.match(/itemID=(\\d+)/); + if (m) o.item_id = m[1]; + } + } + const te = iw.querySelector('.item_title'); if (te) o.title = te.textContent.trim(); const sk = iw.querySelector('.item_sku'); @@ -62,12 +88,16 @@ EXTRACT_JS = """() => { async def fetch_cart_presale_items(account_id): """ - 获取指定账号购物车中的预售商品列表 + 获取指定账号购物车中的预售商品列表。 + 双重提取:拦截购物车 API 获取 itemID 映射 + DOM 提取预售信息。 返回: (success, items_or_msg) """ if not has_auth(account_id): return False, "账号未登录" + # 用于存储 API 返回的 cart_item_id -> itemID 映射 + api_item_map = {} + async with async_playwright() as p: browser, context = await get_browser_context( p, account_id, headless=True @@ -75,6 +105,20 @@ async def fetch_cart_presale_items(account_id): page = await context.new_page() await stealth_async(page) + # 拦截购物车相关 API,提取 itemID + async def on_response(response): + url = response.url + # 购物车 API 通常包含 cart 相关路径 + if any(k in url for k in ['cart/list', 'cart/query', 'cartList', 'getCart', + 'cart-server', 'newcart']): + try: + data = await response.json() + _extract_item_ids(data, api_item_map) + except Exception: + pass + + page.on("response", on_response) + try: await page.goto( CART_URL, wait_until="networkidle", timeout=20000 @@ -93,9 +137,74 @@ async def fetch_cart_presale_items(account_id): await browser.close() return False, f"打开购物车失败: {e}" + # 也尝试从页面内嵌的 JS 变量/window 对象提取 + try: + extra_map = await page.evaluate("""() => { + const m = {}; + // 尝试从 window.__INITIAL_STATE__ 或类似全局变量提取 + const sources = [ + window.__INITIAL_STATE__, + window.__NUXT__, + window.__APP_DATA__, + window.cartData, + window.__data__, + ]; + function walk(obj, depth) { + if (!obj || depth > 5) return; + if (Array.isArray(obj)) { + for (const item of obj) walk(item, depth + 1); + } else if (typeof obj === 'object') { + const cid = String(obj.cartItemId || obj.cart_item_id || obj.cartId || ''); + const iid = String(obj.itemID || obj.itemId || obj.item_id || obj.goodsId || ''); + if (cid && iid && iid !== cid) m[cid] = iid; + // 也存 itemUrl + if (cid && obj.itemUrl) m[cid + '_url'] = obj.itemUrl; + for (const v of Object.values(obj)) walk(v, depth + 1); + } + } + for (const s of sources) { if (s) walk(s, 0); } + return m; + }""") + if extra_map: + api_item_map.update(extra_map) + except Exception: + pass + raw_items = await page.evaluate(EXTRACT_JS) await browser.close() + # 合并 API 数据到 DOM 提取结果 + for item in raw_items: + cid = item.get('cart_item_id', '') + if not item.get('item_id') and cid in api_item_map: + item['item_id'] = api_item_map[cid] + # 检查是否有 URL + url_key = cid + '_url' + if url_key in api_item_map: + item['url'] = api_item_map[url_key] + # 只返回预售商品 presale = [it for it in raw_items if it.get("is_presale")] return True, presale + + +def _extract_item_ids(data, result_map): + """递归遍历 API 响应 JSON,提取 cart_item_id -> itemID 映射""" + if isinstance(data, list): + for item in data: + _extract_item_ids(item, result_map) + elif isinstance(data, dict): + # 常见字段名 + cid = str(data.get('cartItemId', data.get('cart_item_id', + data.get('cartId', '')))) + iid = str(data.get('itemID', data.get('itemId', + data.get('item_id', data.get('goodsId', ''))))) + if cid and iid and cid != iid and iid != 'None': + result_map[cid] = iid + # 也提取 URL + item_url = data.get('itemUrl', data.get('item_url', '')) + if cid and item_url: + result_map[cid + '_url'] = item_url + for v in data.values(): + if isinstance(v, (dict, list)): + _extract_item_ids(v, result_map) diff --git a/server/services/snatcher.py b/server/services/snatcher.py index 3712f85..b5285f9 100644 --- a/server/services/snatcher.py +++ b/server/services/snatcher.py @@ -42,6 +42,18 @@ async def run_snatch(task_id): await browser.close() return + # 检查商品是否存在 + page_text = await page.locator('body').text_content() + if '商品不存在' in (page_text or '') or '已下架' in (page_text or ''): + _update_task(db, task_id, 'failed', f'商品不存在或已下架 (URL: {target_url})') + await browser.close() + return + + if not target_url or target_url.strip() == '': + _update_task(db, task_id, 'failed', '商品链接为空,请检查购物车同步是否获取到了 itemID') + await browser.close() + return + # 2. 等待抢购时间 snatch_time = task['snatch_time'] if snatch_time: