Files
weidian/resolve_url.py

84 lines
3.1 KiB
Python
Raw Normal View History

2026-02-02 09:27:49 +08:00
import asyncio
from playwright.async_api import async_playwright
import re
import yaml
async def resolve_url_and_extract(short_url):
print(f"正在解析链接: {short_url}")
async with async_playwright() as p:
# 使用 headless=True 和 禁用GPU
browser = await p.chromium.launch(headless=True, args=['--disable-gpu'])
# 模拟手机有的跳转可能依赖UA
device = p.devices['iPhone 13']
context = await browser.new_context(**device)
page = await context.new_page()
try:
await page.goto(short_url)
# 等待跳转完成
await page.wait_for_load_state('networkidle')
await asyncio.sleep(2) # 额外等待确保 URL 稳定
final_url = page.url
print(f"最终链接: {final_url}")
item_id = ""
shop_id = ""
# 尝试从 URL 提取
# 常见的 param 是 itemID=xxx, shopId=xxx
# 或者路径中 /item.html?itemID=...
item_id_match = re.search(r"[?&]itemID=(\d+)", final_url, re.IGNORECASE)
if item_id_match:
item_id = item_id_match.group(1)
shop_id_match = re.search(r"[?&]shopId=(\d+)", final_url, re.IGNORECASE)
if shop_id_match:
shop_id = shop_id_match.group(1)
# 如果 URL 里没有,尝试在页面内容里找(有时候是全局变量)
if not item_id or not shop_id:
content = await page.content()
if not item_id:
# 匹配 "itemID":"123123" 或 itemID = '123123'
m = re.search(r'["\']?itemID["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
if m: item_id = m.group(1)
if not shop_id:
m = re.search(r'["\']?shopId["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
if m: shop_id = m.group(1)
print(f"解析结果 -> itemID: {item_id}, shopId: {shop_id}")
return final_url, item_id, shop_id
except Exception as e:
print(f"解析出错: {e}")
return None, None, None
finally:
await browser.close()
def update_config(url, item_id, shop_id):
config_path = "config.yaml"
with open(config_path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
config['target_url'] = url
if item_id:
config['item_id'] = item_id
if shop_id:
config['shop_id'] = shop_id
with open(config_path, "w", encoding="utf-8") as f:
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
print("配置 config.yaml 已更新。")
if __name__ == "__main__":
# URL to resolve
target = "https://k.youshop10.com/cTO2VL6s?a=b&p=iphone&wfr=BuyercopyURL&share_relation=c03c72974993c056_1767112998_1"
final_url, item_id, shop_id = asyncio.run(resolve_url_and_extract(target))
if final_url:
update_config(final_url, item_id, shop_id)