Initial commit: 批量查询工具优化版本
This commit is contained in:
78
README.md
Normal file
78
README.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# 批量查询工具使用说明
|
||||
|
||||
## 问题解决方案
|
||||
|
||||
由于网站需要手动打开批量查询弹窗,我们提供了以下解决方案:
|
||||
|
||||
### 方案1: 改进的自动化脚本 (pa.py)
|
||||
- **优点**: 尝试多种方式自动打开弹窗
|
||||
- **缺点**: 如果自动方式失败,需要手动操作
|
||||
- **使用**: 直接运行 `python pa.py`
|
||||
|
||||
### 方案2: API接口调用 (pa_api.py)
|
||||
- **优点**: 完全自动化,无需浏览器操作
|
||||
- **缺点**: 需要先找到正确的API端点
|
||||
- **使用**: 运行 `python pa_api.py`
|
||||
|
||||
### 方案3: API端点发现工具
|
||||
- **用途**: 帮助找到正确的API端点
|
||||
- **使用**: 运行 `python api_discovery.py` 或 `python simple_api_test.py`
|
||||
|
||||
## 推荐使用流程
|
||||
|
||||
### 步骤1: 尝试API方案
|
||||
```bash
|
||||
python simple_api_test.py
|
||||
```
|
||||
|
||||
### 步骤2: 如果API方案失败,使用改进的自动化脚本
|
||||
```bash
|
||||
python pa.py
|
||||
```
|
||||
|
||||
当程序提示需要手动操作时:
|
||||
1. 在浏览器中找到"批量查询"按钮
|
||||
2. 点击打开弹窗
|
||||
3. 按回车键继续
|
||||
|
||||
### 步骤3: 如果需要找到API端点
|
||||
1. 在浏览器中手动执行一次批量查询
|
||||
2. 打开开发者工具 -> Network标签
|
||||
3. 查看XHR/Fetch请求
|
||||
4. 找到批量查询的API端点
|
||||
5. 更新 `pa_api.py` 中的 `API_ENDPOINT` 变量
|
||||
|
||||
## 文件说明
|
||||
|
||||
- `pa.py`: 改进的Selenium自动化脚本
|
||||
- `pa_api.py`: API接口调用脚本
|
||||
- `api_discovery.py`: API端点发现工具
|
||||
- `simple_api_test.py`: 简单API测试工具
|
||||
- `text.txt`: 查询数据文件(1138行)
|
||||
- `requirements.txt`: 依赖包列表
|
||||
|
||||
## 安装依赖
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **Cookie有效期**: 如果Cookie过期,需要重新获取
|
||||
2. **网络连接**: 确保能正常访问目标网站
|
||||
3. **页面结构变化**: 如果网站更新,可能需要调整XPath选择器
|
||||
4. **API端点**: 不同网站可能有不同的API端点格式
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 问题1: 找不到输入框
|
||||
**解决方案**: 手动打开批量查询弹窗,然后按回车继续
|
||||
|
||||
### 问题2: API返回404
|
||||
**解决方案**: 使用浏览器开发者工具找到正确的API端点
|
||||
|
||||
### 问题3: Cookie过期
|
||||
**解决方案**: 重新登录网站,获取新的Cookie
|
||||
|
||||
### 问题4: 页面加载失败
|
||||
**解决方案**: 检查网络连接和网站状态
|
||||
BIN
__pycache__/pa.cpython-313.pyc
Normal file
BIN
__pycache__/pa.cpython-313.pyc
Normal file
Binary file not shown.
289
pa.py
Normal file
289
pa.py
Normal file
@@ -0,0 +1,289 @@
|
||||
# batch_query.py - 精简版
|
||||
import os
|
||||
import time
|
||||
import pandas as pd
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
|
||||
# 配置
|
||||
URL = "https://prod-eu-cmp.simbalinkglobal.com"
|
||||
INPUT_XPATH = '//textarea[@id="iccidList"] | //input[@id="iccidList"] | //*[@id="iccidList"]'
|
||||
CONFIRM_BTN_XPATH = '//button[contains(text(),"查询")] | //button[contains(text(),"确认")] | //*[@id="pop-confirm"]//button[2] | //button[@type="submit"]'
|
||||
BATCH_QUERY_BTN_XPATH = '/html/body/div/div[2]/main/div/div/div/div/div[2]/div[2]/div[1]/div[2]/div/button'
|
||||
RESULT_ROWS_XPATH = [
|
||||
'//table[contains(@class,"ant-table")]//tbody/tr',
|
||||
'//table//tbody/tr',
|
||||
'//div[contains(@class,"ant-table")]//tbody/tr',
|
||||
'//table[@class="table"]//tbody/tr',
|
||||
'//*[contains(@class,"table")]//tr[position()>1]'
|
||||
]
|
||||
|
||||
MAX_PER_BATCH = 50
|
||||
OUTPUT_CSV = "results.csv"
|
||||
ICCID_FILE = "text.txt"
|
||||
HEADLESS = False
|
||||
BATCH_RETRY_COUNT = 2
|
||||
|
||||
COOKIES = {
|
||||
'platformUser_session': 'eyJsYXN0QWNjZXNzZWQiOjE3NTkxNDc4NjYzMzJ9.2gNtuRzCQH%2BoNra1%2B1WXxcDtTmW91yYVAOLbH6Ry%2BLM',
|
||||
'_manage_session': 'eyJ0b2tlbiI6ImV5SmhiR2NpT2lKSVV6STFOaUlzSW5SNWNDSTZJa3BYVkNKOS5leUoxYzJWeUlqcDdJblZ6WlhKZmFXUWlPaUpaU0RFNE9URXpOVFk1TkRVNU9EWTNOVEkxTVRJaWZTd2lRM0psWVhSbFZHbHRaU0k2SWpJd01qVXRNRGt0TWpsVU1URTZNVGs2TkRJdU9UZzJNRGt4TWpNekt6QXhPakF3SW4wLll6eWtYZGlweUFfaWN4TGxkX3MwS2dWQU5LM2JkZU1fNjM3NDV1ckxQNkEiLCJleHBpcmVUaW1lIjowLCJ1c2VySWQiOiJZSDE4OTEzNTY5NDU5ODY3NTI1MTIiLCJkYXRhTGltaXQiOiJjdXN0b21lIiwidHlwZSI6MSwibGV2ZWwiOjIsInBVc2VySWQiOiJZSDE3Njk5MTg2MjkxMjAyNDAzMjEiLCJsb2dpbk5hbWUiOiJ4aW9uZ3NoaV95dW53ZWkiLCJyb2xlSWQiOiJSTDE5NjI3MDM5MDkxNTU5MDE0NDAiLCJjbGllbnRJRHMiOlsiZXVfY2hlcnkiLCJlYnJvX2NoZXJ5Il0sImNsaWVudElkcyI6ImVicm9fY2hlcnkifQ%3D%3D.jBwQkblyoEP6t7OELXxUMKkoU9%2FJWWQsZPg25SZSz5o'
|
||||
}
|
||||
|
||||
def read_query_items(path):
|
||||
"""读取查询项目文件"""
|
||||
encodings = ['utf-8', 'gbk', 'utf-8-sig', 'cp1252']
|
||||
for encoding in encodings:
|
||||
try:
|
||||
with open(path, 'r', encoding=encoding) as f:
|
||||
lines = [l.strip() for l in f.readlines() if l.strip()]
|
||||
if lines:
|
||||
print(f"使用编码 {encoding} 成功读取 {len(lines)} 个查询项")
|
||||
return lines
|
||||
except Exception:
|
||||
continue
|
||||
raise Exception("无法读取文件")
|
||||
|
||||
def save_results_to_csv(results, filename):
|
||||
"""保存结果到CSV文件"""
|
||||
if not results:
|
||||
return 0
|
||||
|
||||
df_data = []
|
||||
for result in results:
|
||||
row_data = {"batch": result["batch"]}
|
||||
cells = result["cells"]
|
||||
if len(cells) >= 2:
|
||||
row_data["ICCID"] = cells[0]
|
||||
row_data["租户"] = cells[1]
|
||||
for i, cell in enumerate(cells[2:], start=2):
|
||||
row_data[f"列{i+1}"] = cell
|
||||
df_data.append(row_data)
|
||||
|
||||
df = pd.DataFrame(df_data)
|
||||
if os.path.exists(filename):
|
||||
existing_df = pd.read_csv(filename, encoding='utf-8-sig')
|
||||
df = pd.concat([existing_df, df], ignore_index=True)
|
||||
|
||||
df.to_csv(filename, index=False, encoding='utf-8-sig')
|
||||
return len(df_data)
|
||||
|
||||
def clear_input_box(driver, input_element):
|
||||
"""清空输入框"""
|
||||
try:
|
||||
input_element.clear()
|
||||
input_element.send_keys(Keys.CONTROL + "a")
|
||||
input_element.send_keys(Keys.DELETE)
|
||||
driver.execute_script("arguments[0].value = '';", input_element)
|
||||
time.sleep(0.1)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def init_driver(headless=False):
|
||||
"""初始化Chrome驱动"""
|
||||
chrome_opts = Options()
|
||||
if headless:
|
||||
chrome_opts.add_argument("--headless=new")
|
||||
|
||||
# 优化参数
|
||||
chrome_opts.add_argument("--no-sandbox")
|
||||
chrome_opts.add_argument("--disable-dev-shm-usage")
|
||||
chrome_opts.add_argument("--disable-logging")
|
||||
chrome_opts.add_argument("--disable-gpu-logging")
|
||||
chrome_opts.add_argument("--log-level=3")
|
||||
chrome_opts.add_argument("--silent")
|
||||
chrome_opts.add_argument("--disable-images")
|
||||
|
||||
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_opts)
|
||||
driver.maximize_window()
|
||||
|
||||
# 添加Cookie
|
||||
driver.get(URL)
|
||||
for name, value in COOKIES.items():
|
||||
driver.add_cookie({"name": name, "value": value})
|
||||
|
||||
return driver
|
||||
|
||||
def scrape_results_from_table(driver):
|
||||
"""抓取表格结果"""
|
||||
results = []
|
||||
|
||||
# 尝试不同的表格XPath
|
||||
for xpath in RESULT_ROWS_XPATH:
|
||||
try:
|
||||
rows = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.XPATH, xpath)))
|
||||
if rows:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
else:
|
||||
print("未找到结果表格")
|
||||
return []
|
||||
|
||||
for r in rows:
|
||||
try:
|
||||
cells = r.find_elements(By.TAG_NAME, "td")
|
||||
if not cells:
|
||||
cells = r.find_elements(By.TAG_NAME, "th")
|
||||
if not cells:
|
||||
cells = r.find_elements(By.XPATH, ".//div")
|
||||
|
||||
texts = [c.text.strip() for c in cells if c.text.strip()]
|
||||
if texts:
|
||||
results.append(texts)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
def submit_batch_and_collect(driver, batch_items):
|
||||
"""提交批次查询并收集结果"""
|
||||
if len(batch_items) > MAX_PER_BATCH:
|
||||
batch_items = batch_items[:MAX_PER_BATCH]
|
||||
|
||||
# 尝试打开批量查询弹窗
|
||||
try:
|
||||
batch_btn = WebDriverWait(driver, 1).until(EC.element_to_be_clickable((By.XPATH, BATCH_QUERY_BTN_XPATH)))
|
||||
batch_btn.click()
|
||||
time.sleep(0.5)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 查找输入框
|
||||
try:
|
||||
inp = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, INPUT_XPATH)))
|
||||
except Exception:
|
||||
print("未找到输入框")
|
||||
return []
|
||||
|
||||
# 清空并输入数据
|
||||
if not clear_input_box(driver, inp):
|
||||
return []
|
||||
|
||||
payload = "\n".join(batch_items)
|
||||
inp.send_keys(payload)
|
||||
|
||||
# 点击查询按钮
|
||||
try:
|
||||
btn = WebDriverWait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, CONFIRM_BTN_XPATH)))
|
||||
btn.click()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
# 等待结果并抓取
|
||||
time.sleep(1)
|
||||
results = scrape_results_from_table(driver)
|
||||
|
||||
# 清理输入框
|
||||
try:
|
||||
inp = driver.find_element(By.XPATH, INPUT_XPATH)
|
||||
clear_input_box(driver, inp)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return results
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
query_items = read_query_items(ICCID_FILE)
|
||||
if not query_items:
|
||||
print(f"在 {ICCID_FILE} 中未找到查询项")
|
||||
return
|
||||
|
||||
print(f"总共读取到 {len(query_items)} 个查询项")
|
||||
batches = [query_items[i:i+MAX_PER_BATCH] for i in range(0, len(query_items), MAX_PER_BATCH)]
|
||||
print(f"将分为 {len(batches)} 个批次处理")
|
||||
|
||||
driver = init_driver(HEADLESS)
|
||||
total_saved_results = 0
|
||||
failed_batches = []
|
||||
|
||||
try:
|
||||
driver.refresh()
|
||||
time.sleep(1.5)
|
||||
print("已通过Cookie自动登录,开始批量查询...")
|
||||
|
||||
for batch_index, batch in enumerate(batches, 1):
|
||||
print(f"\n=== 处理批次 {batch_index}/{len(batches)} ===")
|
||||
print(f"本批次包含 {len(batch)} 个查询项")
|
||||
|
||||
# 处理大批次分割
|
||||
if len(batch) > MAX_PER_BATCH:
|
||||
print(f"⚠️ 批次大小超过限制,自动分割")
|
||||
sub_batches = [batch[i:i+MAX_PER_BATCH] for i in range(0, len(batch), MAX_PER_BATCH)]
|
||||
for sub_idx, sub_batch in enumerate(sub_batches):
|
||||
print(f"处理子批次 {sub_idx + 1}/{len(sub_batches)}")
|
||||
try:
|
||||
results = submit_batch_and_collect(driver, sub_batch)
|
||||
if results:
|
||||
batch_results = [{"batch": f"{batch_index}-{sub_idx + 1}", "cells": row_cells} for row_cells in results]
|
||||
saved_count = save_results_to_csv(batch_results, OUTPUT_CSV)
|
||||
total_saved_results += saved_count
|
||||
print(f"子批次获得 {len(results)} 条结果,已保存")
|
||||
time.sleep(0.5)
|
||||
except Exception as e:
|
||||
print(f"子批次 {sub_idx + 1} 处理失败: {e}")
|
||||
failed_batches.append(f"{batch_index}-{sub_idx + 1}")
|
||||
continue
|
||||
|
||||
# 重试机制
|
||||
success = False
|
||||
for retry in range(BATCH_RETRY_COUNT + 1):
|
||||
try:
|
||||
if retry > 0:
|
||||
print(f"重试第 {retry} 次...")
|
||||
time.sleep(2)
|
||||
|
||||
results = submit_batch_and_collect(driver, batch)
|
||||
print(f"本批次获得 {len(results)} 条结果")
|
||||
|
||||
# 立即保存结果
|
||||
if results:
|
||||
batch_results = [{"batch": batch_index, "cells": row_cells} for row_cells in results]
|
||||
saved_count = save_results_to_csv(batch_results, OUTPUT_CSV)
|
||||
total_saved_results += saved_count
|
||||
print(f"✅ 已保存 {saved_count} 条结果到 {OUTPUT_CSV}")
|
||||
|
||||
success = True
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print(f"批次 {batch_index} 第 {retry + 1} 次尝试失败: {e}")
|
||||
if retry == BATCH_RETRY_COUNT:
|
||||
failed_batches.append(batch_index)
|
||||
print(f"❌ 批次 {batch_index} 重试失败")
|
||||
|
||||
# 批次间等待
|
||||
if batch_index < len(batches):
|
||||
time.sleep(1)
|
||||
|
||||
# 生成总结报告
|
||||
print(f"\n{'='*50}")
|
||||
print(f"📊 批量查询完成报告")
|
||||
print(f"{'='*50}")
|
||||
print(f"总查询项: {len(query_items)}")
|
||||
print(f"总批次数: {len(batches)}")
|
||||
print(f"成功批次: {len(batches) - len(failed_batches)}")
|
||||
print(f"失败批次: {len(failed_batches)}")
|
||||
print(f"成功率: {((len(batches) - len(failed_batches)) / len(batches) * 100):.1f}%")
|
||||
print(f"总保存结果数: {total_saved_results}")
|
||||
|
||||
if failed_batches:
|
||||
print(f"失败批次列表: {failed_batches}")
|
||||
|
||||
print(f"{'='*50}")
|
||||
print(f"✅ 所有结果已实时保存到 {OUTPUT_CSV}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"程序执行出错: {e}")
|
||||
finally:
|
||||
driver.quit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
selenium>=4.15.0
|
||||
webdriver-manager>=4.0.0
|
||||
pandas>=2.0.0
|
||||
requests>=2.31.0
|
||||
pathlib
|
||||
255
response_parser.py
Normal file
255
response_parser.py
Normal file
@@ -0,0 +1,255 @@
|
||||
# response_parser.py
|
||||
import requests
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# Cookie 信息
|
||||
COOKIES = {
|
||||
'platformUser_session': 'eyJsYXN0QWNjZXNzZWQiOjE3NTkxNDc4NjYzMzJ9.2gNtuRzCQH%2BoNra1%2B1WXxcDtTmW91yYVAOLbH6Ry%2BLM',
|
||||
'_manage_session': 'eyJ0b2tlbiI6ImV5SmhiR2NpT2lKSVV6STFOaUlzSW5SNWNDSTZJa3BYVkNKOS5leUoxYzJWeUlqcDdJblZ6WlhKZmFXUWlPaUpaU0RFNE9URXpOVFk1TkRVNU9EWTNOVEkxTVRJaWZTd2lRM0psWVhSbFZHbHRaU0k2SWpJd01qVXRNRGt0TWpsVU1URTZNVGs2TkRJdU9UZzJNRGt4TWpNekt6QXhPakF3SW4wLll6eWtYZGlweUFfaWN4TGxkX3MwS2dWQU5LM2JkZU1fNjM3NDV1ckxQNkEiLCJleHBpcmVUaW1lIjowLCJ1c2VySWQiOiJZSDE4OTEzNTY5NDU5ODY3NTI1MTIiLCJkYXRhTGltaXQiOiJjdXN0b21lIiwidHlwZSI6MSwibGV2ZWwiOjIsInBVc2VySWQiOiJZSDE3Njk5MTg2MjkxMjAyNDAzMjEiLCJsb2dpbk5hbWUiOiJ4aW9uZ3NoaV95dW53ZWkiLCJyb2xlSWQiOiJSTDE5NjI3MDM5MDkxNTU5MDE0NDAiLCJjbGllbnRJRHMiOlsiZXVfY2hlcnkiLCJlYnJvX2NoZXJ5Il0sImNsaWVudElkcyI6ImVicm9fY2hlcnkifQ%3D%3D.jBwQkblyoEP6t7OELXxUMKkoU9%2FJWWQsZPg25SZSz5o'
|
||||
}
|
||||
|
||||
def get_api_response():
|
||||
"""获取API响应"""
|
||||
session = requests.Session()
|
||||
|
||||
# 设置Cookie
|
||||
for name, value in COOKIES.items():
|
||||
session.cookies.set(name, value, domain='prod-eu-cmp.simbalinkglobal.com')
|
||||
|
||||
# 设置请求头
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
||||
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'Connection': 'keep-alive',
|
||||
'Host': 'prod-eu-cmp.simbalinkglobal.com',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
}
|
||||
session.headers.update(headers)
|
||||
|
||||
url = "https://prod-eu-cmp.simbalinkglobal.com/simcustomer?filterData=N4IgkgxhCWAmAy0DOAXEAuEAOAnFrATAMw4AM5AjKUQKw1GkFYDsANLvsWZdXURThwA2dnkIlypKrXqMCpUZwk8ZRIcyFFF47uTwVmBoQe1dJ0vtQAsNU8qm966w3d0X6WUlYqvzj-syevip8NOLBDqo4NDhEIKwgqACGKACuSBggIAC%2BQA&pageSize=50"
|
||||
|
||||
try:
|
||||
response = session.get(url, timeout=30)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
print(f"请求失败: {response.status_code}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"API调用异常: {e}")
|
||||
return None
|
||||
|
||||
def parse_response(response_text):
|
||||
"""解析响应内容"""
|
||||
print("=== 响应内容分析 ===")
|
||||
print(f"响应长度: {len(response_text)} 字符")
|
||||
|
||||
# 查找ICCID模式
|
||||
iccid_pattern = r'8988239000\d{8}'
|
||||
iccids = re.findall(iccid_pattern, response_text)
|
||||
|
||||
print(f"找到 {len(iccids)} 个ICCID:")
|
||||
for iccid in iccids:
|
||||
print(f" - {iccid}")
|
||||
|
||||
# 查找JSON数据
|
||||
json_patterns = [
|
||||
r'\{[^{}]*"iccid"[^{}]*\}',
|
||||
r'\[[^\[\]]*"8988239000\d{8}"[^\[\]]*\]',
|
||||
r'\{.*"data".*\}',
|
||||
r'\{.*"result".*\}'
|
||||
]
|
||||
|
||||
for pattern in json_patterns:
|
||||
matches = re.findall(pattern, response_text, re.DOTALL)
|
||||
if matches:
|
||||
print(f"\n找到JSON数据模式: {pattern}")
|
||||
for match in matches:
|
||||
try:
|
||||
json_data = json.loads(match)
|
||||
print(f"JSON数据: {json.dumps(json_data, indent=2, ensure_ascii=False)}")
|
||||
except:
|
||||
print(f"原始数据: {match[:200]}...")
|
||||
|
||||
# 查找表格数据
|
||||
table_patterns = [
|
||||
r'<table[^>]*>.*?</table>',
|
||||
r'<tbody[^>]*>.*?</tbody>',
|
||||
r'<tr[^>]*>.*?</tr>'
|
||||
]
|
||||
|
||||
for pattern in table_patterns:
|
||||
matches = re.findall(pattern, response_text, re.DOTALL | re.IGNORECASE)
|
||||
if matches:
|
||||
print(f"\n找到表格数据: {len(matches)} 个匹配")
|
||||
for i, match in enumerate(matches[:3]): # 只显示前3个
|
||||
print(f"表格 {i+1}: {match[:200]}...")
|
||||
|
||||
# 查找JavaScript数据
|
||||
js_patterns = [
|
||||
r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});',
|
||||
r'window\.__DATA__\s*=\s*(\{.*?\});',
|
||||
r'var\s+data\s*=\s*(\{.*?\});',
|
||||
r'const\s+data\s*=\s*(\{.*?\});'
|
||||
]
|
||||
|
||||
for pattern in js_patterns:
|
||||
matches = re.findall(pattern, response_text, re.DOTALL)
|
||||
if matches:
|
||||
print(f"\n找到JavaScript数据: {pattern}")
|
||||
for match in matches:
|
||||
try:
|
||||
json_data = json.loads(match)
|
||||
print(f"JS数据: {json.dumps(json_data, indent=2, ensure_ascii=False)}")
|
||||
except:
|
||||
print(f"原始JS数据: {match[:200]}...")
|
||||
|
||||
return iccids
|
||||
|
||||
def create_working_api():
|
||||
"""创建可用的API函数"""
|
||||
print("\n=== 创建可用的API函数 ===")
|
||||
|
||||
template = '''
|
||||
# 可用的批量查询API
|
||||
import requests
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
# Cookie 信息
|
||||
COOKIES = {
|
||||
'platformUser_session': 'eyJsYXN0QWNjZXNzZWQiOjE3NTkxNDc4NjYzMzJ9.2gNtuRzCQH%2BoNra1%2B1WXxcDtTmW91yYVAOLbH6Ry%2BLM',
|
||||
'_manage_session': 'eyJ0b2tlbiI6ImV5SmhiR2NpT2lKSVV6STFOaUlzSW5SNWNDSTZJa3BYVkNKOS5leUoxYzJWeUlqcDdJblZ6WlhKZmFXUWlPaUpaU0RFNE9URXpOVFk1TkRVNU9EWTNOVEkxTVRJaWZTd2lRM0psWVhSbFZHbHRaU0k2SWpJd01qVXRNRGt0TWpsVU1URTZNVGs2TkRJdU9UZzJNRGt4TWpNekt6QXhPakF3SW4wLll6eWtYZGlweUFfaWN4TGxkX3MwS2dWQU5LM2JkZU1fNjM3NDV1ckxQNkEiLCJleHBpcmVUaW1lIjowLCJ1c2VySWQiOiJZSDE4OTEzNTY5NDU5ODY3NTI1MTIiLCJkYXRhTGltaXQiOiJjdXN0b21lIiwidHlwZSI6MSwibGV2ZWwiOjIsInBVc2VySWQiOiJZSDE3Njk5MTg2MjkxMjAyNDAzMjEiLCJsb2dpbk5hbWUiOiJ4aW9uZ3NoaV95dW53ZWkiLCJyb2xlSWQiOiJSTDE5NjI3MDM5MDkxNTU5MDE0NDAiLCJjbGllbnRJRHMiOlsiZXVfY2hlcnkiLCJlYnJvX2NoZXJ5Il0sImNsaWVudElkcyI6ImVicm9fY2hlcnkifQ%3D%3D.jBwQkblyoEP6t7OELXxUMKkoU9%2FJWWQsZPg25SZSz5o'
|
||||
}
|
||||
|
||||
def read_query_items(path):
|
||||
"""读取查询项目文件"""
|
||||
p = Path(path)
|
||||
if not p.exists():
|
||||
raise FileNotFoundError(f"{path} not found.")
|
||||
|
||||
encodings = ['utf-8', 'gbk', 'utf-8-sig', 'cp1252']
|
||||
for encoding in encodings:
|
||||
try:
|
||||
lines = [l.strip() for l in p.read_text(encoding=encoding).splitlines() if l.strip()]
|
||||
if lines:
|
||||
return lines
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
raise Exception("无法读取文件")
|
||||
|
||||
def query_iccids_batch(iccid_list, page_size=50):
|
||||
"""批量查询ICCID"""
|
||||
session = requests.Session()
|
||||
|
||||
# 设置Cookie
|
||||
for name, value in COOKIES.items():
|
||||
session.cookies.set(name, value, domain='prod-eu-cmp.simbalinkglobal.com')
|
||||
|
||||
# 设置请求头
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
||||
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'Connection': 'keep-alive',
|
||||
'Host': 'prod-eu-cmp.simbalinkglobal.com',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
}
|
||||
session.headers.update(headers)
|
||||
|
||||
# 使用您提供的filterData(需要根据实际ICCID列表调整)
|
||||
filter_data = "N4IgkgxhCWAmAy0DOAXEAuEAOAnFrATAMw4AM5AjKUQKw1GkFYDsANLvsWZdXURThwA2dnkIlypKrXqMCpUZwk8ZRIcyFFF47uTwVmBoQe1dJ0vtQAsNU8qm966w3d0X6WUlYqvzj-syevip8NOLBDqo4NDhEIKwgqACGKACuSBggIAC%2BQA"
|
||||
url = f"https://prod-eu-cmp.simbalinkglobal.com/simcustomer?filterData={filter_data}&pageSize={page_size}"
|
||||
|
||||
try:
|
||||
response = session.get(url, timeout=30)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
print(f"请求失败: {response.status_code}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"API调用异常: {e}")
|
||||
return None
|
||||
|
||||
def extract_iccids_from_response(response_text):
|
||||
"""从响应中提取ICCID"""
|
||||
iccid_pattern = r'8988239000\\d{8}'
|
||||
iccids = re.findall(iccid_pattern, response_text)
|
||||
return iccids
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
# 读取查询项
|
||||
query_items = read_query_items('text.txt')
|
||||
print(f"读取到 {len(query_items)} 个查询项")
|
||||
|
||||
# 分批处理
|
||||
batch_size = 50
|
||||
batches = [query_items[i:i+batch_size] for i in range(0, len(query_items), batch_size)]
|
||||
|
||||
all_results = []
|
||||
for i, batch in enumerate(batches, 1):
|
||||
print(f"\\n处理批次 {i}/{len(batches)}: {len(batch)} 个ICCID")
|
||||
|
||||
# 查询批次
|
||||
response = query_iccids_batch(batch)
|
||||
if response:
|
||||
# 提取结果
|
||||
found_iccids = extract_iccids_from_response(response)
|
||||
print(f"找到 {len(found_iccids)} 个ICCID结果")
|
||||
all_results.extend(found_iccids)
|
||||
|
||||
# 批次间等待
|
||||
if i < len(batches):
|
||||
time.sleep(2)
|
||||
|
||||
print(f"\\n总共找到 {len(all_results)} 个ICCID结果")
|
||||
|
||||
# 保存结果
|
||||
with open('api_results.txt', 'w', encoding='utf-8') as f:
|
||||
for iccid in all_results:
|
||||
f.write(f"{iccid}\\n")
|
||||
|
||||
print("结果已保存到 api_results.txt")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
main()
|
||||
'''
|
||||
|
||||
with open('working_api.py', 'w', encoding='utf-8') as f:
|
||||
f.write(template)
|
||||
|
||||
print("可用的API函数已保存到 working_api.py")
|
||||
|
||||
def main():
|
||||
print("=== 响应解析工具 ===")
|
||||
|
||||
# 获取API响应
|
||||
response_text = get_api_response()
|
||||
if not response_text:
|
||||
print("无法获取API响应")
|
||||
return
|
||||
|
||||
# 解析响应
|
||||
iccids = parse_response(response_text)
|
||||
|
||||
# 创建可用的API
|
||||
create_working_api()
|
||||
|
||||
print("\n=== 解析完成 ===")
|
||||
print("建议:")
|
||||
print("1. 查看找到的ICCID列表")
|
||||
print("2. 使用 working_api.py 进行批量查询")
|
||||
print("3. 需要进一步分析filterData的生成规则")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
1289
results.csv
Normal file
1289
results.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user