84 lines
3.1 KiB
Python
84 lines
3.1 KiB
Python
|
|
import asyncio
|
|||
|
|
from playwright.async_api import async_playwright
|
|||
|
|
import re
|
|||
|
|
import yaml
|
|||
|
|
|
|||
|
|
async def resolve_url_and_extract(short_url):
|
|||
|
|
print(f"正在解析链接: {short_url}")
|
|||
|
|
async with async_playwright() as p:
|
|||
|
|
# 使用 headless=True 和 禁用GPU
|
|||
|
|
browser = await p.chromium.launch(headless=True, args=['--disable-gpu'])
|
|||
|
|
# 模拟手机,有的跳转可能依赖UA
|
|||
|
|
device = p.devices['iPhone 13']
|
|||
|
|
context = await browser.new_context(**device)
|
|||
|
|
page = await context.new_page()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
await page.goto(short_url)
|
|||
|
|
# 等待跳转完成
|
|||
|
|
await page.wait_for_load_state('networkidle')
|
|||
|
|
await asyncio.sleep(2) # 额外等待确保 URL 稳定
|
|||
|
|
|
|||
|
|
final_url = page.url
|
|||
|
|
print(f"最终链接: {final_url}")
|
|||
|
|
|
|||
|
|
item_id = ""
|
|||
|
|
shop_id = ""
|
|||
|
|
|
|||
|
|
# 尝试从 URL 提取
|
|||
|
|
# 常见的 param 是 itemID=xxx, shopId=xxx
|
|||
|
|
# 或者路径中 /item.html?itemID=...
|
|||
|
|
|
|||
|
|
item_id_match = re.search(r"[?&]itemID=(\d+)", final_url, re.IGNORECASE)
|
|||
|
|
if item_id_match:
|
|||
|
|
item_id = item_id_match.group(1)
|
|||
|
|
|
|||
|
|
shop_id_match = re.search(r"[?&]shopId=(\d+)", final_url, re.IGNORECASE)
|
|||
|
|
if shop_id_match:
|
|||
|
|
shop_id = shop_id_match.group(1)
|
|||
|
|
|
|||
|
|
# 如果 URL 里没有,尝试在页面内容里找(有时候是全局变量)
|
|||
|
|
if not item_id or not shop_id:
|
|||
|
|
content = await page.content()
|
|||
|
|
if not item_id:
|
|||
|
|
# 匹配 "itemID":"123123" 或 itemID = '123123'
|
|||
|
|
m = re.search(r'["\']?itemID["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
|
|||
|
|
if m: item_id = m.group(1)
|
|||
|
|
|
|||
|
|
if not shop_id:
|
|||
|
|
m = re.search(r'["\']?shopId["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
|
|||
|
|
if m: shop_id = m.group(1)
|
|||
|
|
|
|||
|
|
print(f"解析结果 -> itemID: {item_id}, shopId: {shop_id}")
|
|||
|
|
return final_url, item_id, shop_id
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"解析出错: {e}")
|
|||
|
|
return None, None, None
|
|||
|
|
finally:
|
|||
|
|
await browser.close()
|
|||
|
|
|
|||
|
|
def update_config(url, item_id, shop_id):
|
|||
|
|
config_path = "config.yaml"
|
|||
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|||
|
|
config = yaml.safe_load(f)
|
|||
|
|
|
|||
|
|
config['target_url'] = url
|
|||
|
|
if item_id:
|
|||
|
|
config['item_id'] = item_id
|
|||
|
|
if shop_id:
|
|||
|
|
config['shop_id'] = shop_id
|
|||
|
|
|
|||
|
|
with open(config_path, "w", encoding="utf-8") as f:
|
|||
|
|
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
|
|||
|
|
print("配置 config.yaml 已更新。")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
# URL to resolve
|
|||
|
|
target = "https://k.youshop10.com/cTO2VL6s?a=b&p=iphone&wfr=BuyercopyURL&share_relation=c03c72974993c056_1767112998_1"
|
|||
|
|
|
|||
|
|
final_url, item_id, shop_id = asyncio.run(resolve_url_and_extract(target))
|
|||
|
|
|
|||
|
|
if final_url:
|
|||
|
|
update_config(final_url, item_id, shop_id)
|