Update markdown and initial code
This commit is contained in:
83
resolve_url.py
Normal file
83
resolve_url.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import asyncio
|
||||
from playwright.async_api import async_playwright
|
||||
import re
|
||||
import yaml
|
||||
|
||||
async def resolve_url_and_extract(short_url):
|
||||
print(f"正在解析链接: {short_url}")
|
||||
async with async_playwright() as p:
|
||||
# 使用 headless=True 和 禁用GPU
|
||||
browser = await p.chromium.launch(headless=True, args=['--disable-gpu'])
|
||||
# 模拟手机,有的跳转可能依赖UA
|
||||
device = p.devices['iPhone 13']
|
||||
context = await browser.new_context(**device)
|
||||
page = await context.new_page()
|
||||
|
||||
try:
|
||||
await page.goto(short_url)
|
||||
# 等待跳转完成
|
||||
await page.wait_for_load_state('networkidle')
|
||||
await asyncio.sleep(2) # 额外等待确保 URL 稳定
|
||||
|
||||
final_url = page.url
|
||||
print(f"最终链接: {final_url}")
|
||||
|
||||
item_id = ""
|
||||
shop_id = ""
|
||||
|
||||
# 尝试从 URL 提取
|
||||
# 常见的 param 是 itemID=xxx, shopId=xxx
|
||||
# 或者路径中 /item.html?itemID=...
|
||||
|
||||
item_id_match = re.search(r"[?&]itemID=(\d+)", final_url, re.IGNORECASE)
|
||||
if item_id_match:
|
||||
item_id = item_id_match.group(1)
|
||||
|
||||
shop_id_match = re.search(r"[?&]shopId=(\d+)", final_url, re.IGNORECASE)
|
||||
if shop_id_match:
|
||||
shop_id = shop_id_match.group(1)
|
||||
|
||||
# 如果 URL 里没有,尝试在页面内容里找(有时候是全局变量)
|
||||
if not item_id or not shop_id:
|
||||
content = await page.content()
|
||||
if not item_id:
|
||||
# 匹配 "itemID":"123123" 或 itemID = '123123'
|
||||
m = re.search(r'["\']?itemID["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
|
||||
if m: item_id = m.group(1)
|
||||
|
||||
if not shop_id:
|
||||
m = re.search(r'["\']?shopId["\']?\s*[:=]\s*["\']?(\d+)["\']?', content, re.IGNORECASE)
|
||||
if m: shop_id = m.group(1)
|
||||
|
||||
print(f"解析结果 -> itemID: {item_id}, shopId: {shop_id}")
|
||||
return final_url, item_id, shop_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"解析出错: {e}")
|
||||
return None, None, None
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
def update_config(url, item_id, shop_id):
|
||||
config_path = "config.yaml"
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
config['target_url'] = url
|
||||
if item_id:
|
||||
config['item_id'] = item_id
|
||||
if shop_id:
|
||||
config['shop_id'] = shop_id
|
||||
|
||||
with open(config_path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(config, f, allow_unicode=True, sort_keys=False)
|
||||
print("配置 config.yaml 已更新。")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# URL to resolve
|
||||
target = "https://k.youshop10.com/cTO2VL6s?a=b&p=iphone&wfr=BuyercopyURL&share_relation=c03c72974993c056_1767112998_1"
|
||||
|
||||
final_url, item_id, shop_id = asyncio.run(resolve_url_and_extract(target))
|
||||
|
||||
if final_url:
|
||||
update_config(final_url, item_id, shop_id)
|
||||
Reference in New Issue
Block a user