Files
weidian/resolve_url.py

84 lines
3.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
from playwright.async_api import async_playwright
import re
import yaml
async def resolve_url_and_extract(short_url):
print(f"正在解析链接: {short_url}")
async with async_playwright() as p:
# 使用 headless=True 和 禁用GPU
browser = await p.chromium.launch(headless=True, args=['--disable-gpu'])
# 模拟手机有的跳转可能依赖UA
device = p.devices['iPhone 13']
context = await browser.new_context(**device)
page = await context.new_page()
try:
await page.goto(short_url)
# 等待跳转完成
await page.wait_for_load_state('networkidle')
await asyncio.sleep(2) # 额外等待确保 URL 稳定
final_url = page.url
print(f"最终链接: {final_url}")
item_id = ""
shop_id = ""
# 尝试从 URL 提取
# 常见的 param 是 itemID=xxx, shopId=xxx
# 或者路径中 /item.html?itemID=...
item_id_match = re.search(r"[?&]itemID=(\d+)", final_url, re.IGNORECASE)
if item_id_match:
item_id = item_id_match.group(1)
shop_id_match = re.search(r"[?&]shopId=(\d+)", final_url, re.IGNORECASE)
if shop_id_match:
shop_id = shop_id_match.group(1)
# 如果 URL 里没有,尝试在页面内容里找(有时候是全局变量)
if not item_id or not shop_id:
content = await page.content()
if not item_id:
# 匹配 "itemID":"123123" 或 itemID = '123123'
m = re.search(r'["\']?itemID["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
if m: item_id = m.group(1)
if not shop_id:
m = re.search(r'["\']?shopId["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
if m: shop_id = m.group(1)
print(f"解析结果 -> itemID: {item_id}, shopId: {shop_id}")
return final_url, item_id, shop_id
except Exception as e:
print(f"解析出错: {e}")
return None, None, None
finally:
await browser.close()
def update_config(url, item_id, shop_id):
config_path = "config.yaml"
with open(config_path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
config['target_url'] = url
if item_id:
config['item_id'] = item_id
if shop_id:
config['shop_id'] = shop_id
with open(config_path, "w", encoding="utf-8") as f:
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
print("配置 config.yaml 已更新。")
if __name__ == "__main__":
# URL to resolve
target = "https://k.youshop10.com/cTO2VL6s?a=b&p=iphone&wfr=BuyercopyURL&share_relation=c03c72974993c056_1767112998_1"
final_url, item_id, shop_id = asyncio.run(resolve_url_and_extract(target))
if final_url:
update_config(final_url, item_id, shop_id)