From 400569ad03c64287dc995692f973c5c597937874 Mon Sep 17 00:00:00 2001 From: Jeason <1710884619@qq.com> Date: Wed, 1 Apr 2026 16:23:19 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E9=80=9A=E8=BF=87=E7=82=B9=E5=87=BB?= =?UTF-8?q?=E5=95=86=E5=93=81=E5=9B=BE=E7=89=87=E8=8E=B7=E5=8F=96=E7=9C=9F?= =?UTF-8?q?=E5=AE=9EitemID,=E8=A7=A3=E5=86=B3=E5=95=86=E5=93=81=E9=93=BE?= =?UTF-8?q?=E6=8E=A5=E4=B8=BA=E7=A9=BA=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cart_service: 对没有itemID的预售商品,点击图片触发跳转 从跳转URL中提取真实itemID(支持多种URL格式) 点击后自动关闭新tab并回到购物车继续处理下一个商品 --- server/services/cart_service.py | 214 ++++++++++++++++++-------------- 1 file changed, 121 insertions(+), 93 deletions(-) diff --git a/server/services/cart_service.py b/server/services/cart_service.py index 87c5a62..e882abb 100644 --- a/server/services/cart_service.py +++ b/server/services/cart_service.py @@ -1,16 +1,17 @@ """ 购物车预售商品抓取服务 -通过 Playwright 打开购物车页面,拦截 API + DOM 提取商品信息 +通过 Playwright 打开购物车页面,从 DOM 提取商品信息, +并通过点击商品图片获取跳转 URL 来提取真实 itemID。 """ import asyncio -import json +import re from playwright.async_api import async_playwright from utils.stealth import stealth_async from server.services.auth_service import get_browser_context, has_auth CART_URL = "https://weidian.com/new-cart/index.php" -# 从 DOM 提取购物车商品(含尝试从 Vue 组件获取 itemID) +# 从 DOM 提取购物车商品基本信息 EXTRACT_JS = """() => { const R = []; const sws = document.querySelectorAll( @@ -29,21 +30,35 @@ EXTRACT_JS = """() => { sale_time: '', presale_type: '' }; - // 尝试从 Vue 组件数据中提取 itemID + // 尝试多种方式提取 itemID + // 1. Vue 组件数据 try { - const vue = iw.__vue__ || (iw.__vue_app__ && iw.__vue_app__._instance); + const vue = iw.__vue__; if (vue) { - const d = vue.$data || vue.data || vue; - o.item_id = String(d.itemID || d.itemId || d.item_id || ''); + const d = vue.$data || vue._data || vue; + o.item_id = String(d.itemID || d.itemId || d.item_id + || d.goodsId || d.goods_id || ''); + // 也检查 props + if (!o.item_id && vue.$props) { + const pp = vue.$props; + o.item_id = String(pp.itemID || pp.itemId + || pp.item_id || pp.goodsId || ''); + } + // 检查 item 对象 + if (!o.item_id && d.item) { + o.item_id = String(d.item.itemID || d.item.itemId + || d.item.item_id || ''); + } } } catch(e) {} - // 尝试从 data-* 属性提取 + // 2. data-* 属性 if (!o.item_id) { - o.item_id = iw.dataset.itemId || iw.dataset.itemid || ''; + o.item_id = iw.dataset.itemId || iw.dataset.itemid + || iw.dataset.goodsId || iw.dataset.id || ''; } - // 尝试从内部链接提取 + // 3. 内部链接 if (!o.item_id) { const a = iw.querySelector('a[href*="itemID"]'); if (a) { @@ -52,6 +67,10 @@ EXTRACT_JS = """() => { } } + // 4. 图片 URL 中可能有商品信息(备用) + const img = iw.querySelector('.item_img img'); + o.img_src = img ? (img.src || img.dataset.src || '') : ''; + const te = iw.querySelector('.item_title'); if (te) o.title = te.textContent.trim(); const sk = iw.querySelector('.item_sku'); @@ -89,15 +108,12 @@ EXTRACT_JS = """() => { async def fetch_cart_presale_items(account_id): """ 获取指定账号购物车中的预售商品列表。 - 双重提取:拦截购物车 API 获取 itemID 映射 + DOM 提取预售信息。 + 对于没有 itemID 的商品,通过点击图片获取跳转 URL 来提取。 返回: (success, items_or_msg) """ if not has_auth(account_id): return False, "账号未登录" - # 用于存储 API 返回的 cart_item_id -> itemID 映射 - api_item_map = {} - async with async_playwright() as p: browser, context = await get_browser_context( p, account_id, headless=True @@ -105,20 +121,6 @@ async def fetch_cart_presale_items(account_id): page = await context.new_page() await stealth_async(page) - # 拦截购物车相关 API,提取 itemID - async def on_response(response): - url = response.url - # 购物车 API 通常包含 cart 相关路径 - if any(k in url for k in ['cart/list', 'cart/query', 'cartList', 'getCart', - 'cart-server', 'newcart']): - try: - data = await response.json() - _extract_item_ids(data, api_item_map) - except Exception: - pass - - page.on("response", on_response) - try: await page.goto( CART_URL, wait_until="networkidle", timeout=20000 @@ -137,74 +139,100 @@ async def fetch_cart_presale_items(account_id): await browser.close() return False, f"打开购物车失败: {e}" - # 也尝试从页面内嵌的 JS 变量/window 对象提取 - try: - extra_map = await page.evaluate("""() => { - const m = {}; - // 尝试从 window.__INITIAL_STATE__ 或类似全局变量提取 - const sources = [ - window.__INITIAL_STATE__, - window.__NUXT__, - window.__APP_DATA__, - window.cartData, - window.__data__, - ]; - function walk(obj, depth) { - if (!obj || depth > 5) return; - if (Array.isArray(obj)) { - for (const item of obj) walk(item, depth + 1); - } else if (typeof obj === 'object') { - const cid = String(obj.cartItemId || obj.cart_item_id || obj.cartId || ''); - const iid = String(obj.itemID || obj.itemId || obj.item_id || obj.goodsId || ''); - if (cid && iid && iid !== cid) m[cid] = iid; - // 也存 itemUrl - if (cid && obj.itemUrl) m[cid + '_url'] = obj.itemUrl; - for (const v of Object.values(obj)) walk(v, depth + 1); - } - } - for (const s of sources) { if (s) walk(s, 0); } - return m; - }""") - if extra_map: - api_item_map.update(extra_map) - except Exception: - pass - + # 提取基本信息 raw_items = await page.evaluate(EXTRACT_JS) + + # 筛选预售商品 + presale = [it for it in raw_items if it.get("is_presale")] + + # 对没有 itemID 的预售商品,通过点击图片获取跳转 URL + for item in presale: + if item.get('item_id'): + continue + cid = item.get('cart_item_id', '') + if not cid: + continue + + item_id = await _get_item_id_by_click(page, context, cid) + if item_id: + item['item_id'] = item_id + item['url'] = f'https://weidian.com/item.html?itemID={item_id}' + await browser.close() - # 合并 API 数据到 DOM 提取结果 - for item in raw_items: - cid = item.get('cart_item_id', '') - if not item.get('item_id') and cid in api_item_map: - item['item_id'] = api_item_map[cid] - # 检查是否有 URL - url_key = cid + '_url' - if url_key in api_item_map: - item['url'] = api_item_map[url_key] - - # 只返回预售商品 - presale = [it for it in raw_items if it.get("is_presale")] return True, presale -def _extract_item_ids(data, result_map): - """递归遍历 API 响应 JSON,提取 cart_item_id -> itemID 映射""" - if isinstance(data, list): - for item in data: - _extract_item_ids(item, result_map) - elif isinstance(data, dict): - # 常见字段名 - cid = str(data.get('cartItemId', data.get('cart_item_id', - data.get('cartId', '')))) - iid = str(data.get('itemID', data.get('itemId', - data.get('item_id', data.get('goodsId', ''))))) - if cid and iid and cid != iid and iid != 'None': - result_map[cid] = iid - # 也提取 URL - item_url = data.get('itemUrl', data.get('item_url', '')) - if cid and item_url: - result_map[cid + '_url'] = item_url - for v in data.values(): - if isinstance(v, (dict, list)): - _extract_item_ids(v, result_map) +async def _get_item_id_by_click(page, context, cart_item_id): + """ + 通过点击购物车中商品的图片,拦截跳转 URL 来提取 itemID。 + 点击后会打开新 tab,从新 tab 的 URL 中提取 itemID,然后关闭。 + """ + try: + # 定位商品图片 + img_locator = page.locator( + f'.item_warp[id="{cart_item_id}"] .item_img') + if await img_locator.count() == 0: + return None + + # 监听新页面打开事件 + async with context.expect_page(timeout=5000) as new_page_info: + await img_locator.first.click() + + new_page = await new_page_info.value + # 等待 URL 加载 + await asyncio.sleep(1) + url = new_page.url + + # 从 URL 提取 itemID + item_id = _extract_item_id_from_url(url) + + # 关闭新 tab + await new_page.close() + + return item_id + + except Exception: + # 如果 expect_page 超时,可能是在当前页面跳转了 + # 检查当前页面 URL + try: + current_url = page.url + item_id = _extract_item_id_from_url(current_url) + if item_id: + # 跳回购物车 + await page.goto(CART_URL, wait_until="networkidle", + timeout=15000) + await asyncio.sleep(2) + return item_id + except Exception: + pass + + # 确保回到购物车页面 + try: + if 'new-cart' not in page.url: + await page.goto(CART_URL, wait_until="networkidle", + timeout=15000) + await asyncio.sleep(2) + except Exception: + pass + + return None + + +def _extract_item_id_from_url(url): + """从 URL 中提取 itemID""" + if not url: + return None + # https://weidian.com/item.html?itemID=123456 + m = re.search(r'itemID=(\d+)', url, re.IGNORECASE) + if m: + return m.group(1) + # https://shop.weidian.com/item/123456 + m = re.search(r'/item/(\d+)', url) + if m: + return m.group(1) + # https://weidian.com/...?id=123456 + m = re.search(r'[?&]id=(\d+)', url) + if m: + return m.group(1) + return None