Files
pa/response_parser.py

256 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# response_parser.py
import requests
import json
import re
from pathlib import Path
# Cookie 信息
COOKIES = {
'platformUser_session': 'eyJsYXN0QWNjZXNzZWQiOjE3NTkxNDc4NjYzMzJ9.2gNtuRzCQH%2BoNra1%2B1WXxcDtTmW91yYVAOLbH6Ry%2BLM',
'_manage_session': 'eyJ0b2tlbiI6ImV5SmhiR2NpT2lKSVV6STFOaUlzSW5SNWNDSTZJa3BYVkNKOS5leUoxYzJWeUlqcDdJblZ6WlhKZmFXUWlPaUpaU0RFNE9URXpOVFk1TkRVNU9EWTNOVEkxTVRJaWZTd2lRM0psWVhSbFZHbHRaU0k2SWpJd01qVXRNRGt0TWpsVU1URTZNVGs2TkRJdU9UZzJNRGt4TWpNekt6QXhPakF3SW4wLll6eWtYZGlweUFfaWN4TGxkX3MwS2dWQU5LM2JkZU1fNjM3NDV1ckxQNkEiLCJleHBpcmVUaW1lIjowLCJ1c2VySWQiOiJZSDE4OTEzNTY5NDU5ODY3NTI1MTIiLCJkYXRhTGltaXQiOiJjdXN0b21lIiwidHlwZSI6MSwibGV2ZWwiOjIsInBVc2VySWQiOiJZSDE3Njk5MTg2MjkxMjAyNDAzMjEiLCJsb2dpbk5hbWUiOiJ4aW9uZ3NoaV95dW53ZWkiLCJyb2xlSWQiOiJSTDE5NjI3MDM5MDkxNTU5MDE0NDAiLCJjbGllbnRJRHMiOlsiZXVfY2hlcnkiLCJlYnJvX2NoZXJ5Il0sImNsaWVudElkcyI6ImVicm9fY2hlcnkifQ%3D%3D.jBwQkblyoEP6t7OELXxUMKkoU9%2FJWWQsZPg25SZSz5o'
}
def get_api_response():
"""获取API响应"""
session = requests.Session()
# 设置Cookie
for name, value in COOKIES.items():
session.cookies.set(name, value, domain='prod-eu-cmp.simbalinkglobal.com')
# 设置请求头
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'prod-eu-cmp.simbalinkglobal.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
session.headers.update(headers)
url = "https://prod-eu-cmp.simbalinkglobal.com/simcustomer?filterData=N4IgkgxhCWAmAy0DOAXEAuEAOAnFrATAMw4AM5AjKUQKw1GkFYDsANLvsWZdXURThwA2dnkIlypKrXqMCpUZwk8ZRIcyFFF47uTwVmBoQe1dJ0vtQAsNU8qm966w3d0X6WUlYqvzj-syevip8NOLBDqo4NDhEIKwgqACGKACuSBggIAC%2BQA&pageSize=50"
try:
response = session.get(url, timeout=30)
if response.status_code == 200:
return response.text
else:
print(f"请求失败: {response.status_code}")
return None
except Exception as e:
print(f"API调用异常: {e}")
return None
def parse_response(response_text):
"""解析响应内容"""
print("=== 响应内容分析 ===")
print(f"响应长度: {len(response_text)} 字符")
# 查找ICCID模式
iccid_pattern = r'8988239000\d{8}'
iccids = re.findall(iccid_pattern, response_text)
print(f"找到 {len(iccids)} 个ICCID:")
for iccid in iccids:
print(f" - {iccid}")
# 查找JSON数据
json_patterns = [
r'\{[^{}]*"iccid"[^{}]*\}',
r'\[[^\[\]]*"8988239000\d{8}"[^\[\]]*\]',
r'\{.*"data".*\}',
r'\{.*"result".*\}'
]
for pattern in json_patterns:
matches = re.findall(pattern, response_text, re.DOTALL)
if matches:
print(f"\n找到JSON数据模式: {pattern}")
for match in matches:
try:
json_data = json.loads(match)
print(f"JSON数据: {json.dumps(json_data, indent=2, ensure_ascii=False)}")
except:
print(f"原始数据: {match[:200]}...")
# 查找表格数据
table_patterns = [
r'<table[^>]*>.*?</table>',
r'<tbody[^>]*>.*?</tbody>',
r'<tr[^>]*>.*?</tr>'
]
for pattern in table_patterns:
matches = re.findall(pattern, response_text, re.DOTALL | re.IGNORECASE)
if matches:
print(f"\n找到表格数据: {len(matches)} 个匹配")
for i, match in enumerate(matches[:3]): # 只显示前3个
print(f"表格 {i+1}: {match[:200]}...")
# 查找JavaScript数据
js_patterns = [
r'window\.__INITIAL_STATE__\s*=\s*(\{.*?\});',
r'window\.__DATA__\s*=\s*(\{.*?\});',
r'var\s+data\s*=\s*(\{.*?\});',
r'const\s+data\s*=\s*(\{.*?\});'
]
for pattern in js_patterns:
matches = re.findall(pattern, response_text, re.DOTALL)
if matches:
print(f"\n找到JavaScript数据: {pattern}")
for match in matches:
try:
json_data = json.loads(match)
print(f"JS数据: {json.dumps(json_data, indent=2, ensure_ascii=False)}")
except:
print(f"原始JS数据: {match[:200]}...")
return iccids
def create_working_api():
"""创建可用的API函数"""
print("\n=== 创建可用的API函数 ===")
template = '''
# 可用的批量查询API
import requests
import json
import re
from pathlib import Path
# Cookie 信息
COOKIES = {
'platformUser_session': 'eyJsYXN0QWNjZXNzZWQiOjE3NTkxNDc4NjYzMzJ9.2gNtuRzCQH%2BoNra1%2B1WXxcDtTmW91yYVAOLbH6Ry%2BLM',
'_manage_session': 'eyJ0b2tlbiI6ImV5SmhiR2NpT2lKSVV6STFOaUlzSW5SNWNDSTZJa3BYVkNKOS5leUoxYzJWeUlqcDdJblZ6WlhKZmFXUWlPaUpaU0RFNE9URXpOVFk1TkRVNU9EWTNOVEkxTVRJaWZTd2lRM0psWVhSbFZHbHRaU0k2SWpJd01qVXRNRGt0TWpsVU1URTZNVGs2TkRJdU9UZzJNRGt4TWpNekt6QXhPakF3SW4wLll6eWtYZGlweUFfaWN4TGxkX3MwS2dWQU5LM2JkZU1fNjM3NDV1ckxQNkEiLCJleHBpcmVUaW1lIjowLCJ1c2VySWQiOiJZSDE4OTEzNTY5NDU5ODY3NTI1MTIiLCJkYXRhTGltaXQiOiJjdXN0b21lIiwidHlwZSI6MSwibGV2ZWwiOjIsInBVc2VySWQiOiJZSDE3Njk5MTg2MjkxMjAyNDAzMjEiLCJsb2dpbk5hbWUiOiJ4aW9uZ3NoaV95dW53ZWkiLCJyb2xlSWQiOiJSTDE5NjI3MDM5MDkxNTU5MDE0NDAiLCJjbGllbnRJRHMiOlsiZXVfY2hlcnkiLCJlYnJvX2NoZXJ5Il0sImNsaWVudElkcyI6ImVicm9fY2hlcnkifQ%3D%3D.jBwQkblyoEP6t7OELXxUMKkoU9%2FJWWQsZPg25SZSz5o'
}
def read_query_items(path):
"""读取查询项目文件"""
p = Path(path)
if not p.exists():
raise FileNotFoundError(f"{path} not found.")
encodings = ['utf-8', 'gbk', 'utf-8-sig', 'cp1252']
for encoding in encodings:
try:
lines = [l.strip() for l in p.read_text(encoding=encoding).splitlines() if l.strip()]
if lines:
return lines
except Exception:
continue
raise Exception("无法读取文件")
def query_iccids_batch(iccid_list, page_size=50):
"""批量查询ICCID"""
session = requests.Session()
# 设置Cookie
for name, value in COOKIES.items():
session.cookies.set(name, value, domain='prod-eu-cmp.simbalinkglobal.com')
# 设置请求头
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'prod-eu-cmp.simbalinkglobal.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
session.headers.update(headers)
# 使用您提供的filterData需要根据实际ICCID列表调整
filter_data = "N4IgkgxhCWAmAy0DOAXEAuEAOAnFrATAMw4AM5AjKUQKw1GkFYDsANLvsWZdXURThwA2dnkIlypKrXqMCpUZwk8ZRIcyFFF47uTwVmBoQe1dJ0vtQAsNU8qm966w3d0X6WUlYqvzj-syevip8NOLBDqo4NDhEIKwgqACGKACuSBggIAC%2BQA"
url = f"https://prod-eu-cmp.simbalinkglobal.com/simcustomer?filterData={filter_data}&pageSize={page_size}"
try:
response = session.get(url, timeout=30)
if response.status_code == 200:
return response.text
else:
print(f"请求失败: {response.status_code}")
return None
except Exception as e:
print(f"API调用异常: {e}")
return None
def extract_iccids_from_response(response_text):
"""从响应中提取ICCID"""
iccid_pattern = r'8988239000\\d{8}'
iccids = re.findall(iccid_pattern, response_text)
return iccids
def main():
"""主函数"""
# 读取查询项
query_items = read_query_items('text.txt')
print(f"读取到 {len(query_items)} 个查询项")
# 分批处理
batch_size = 50
batches = [query_items[i:i+batch_size] for i in range(0, len(query_items), batch_size)]
all_results = []
for i, batch in enumerate(batches, 1):
print(f"\\n处理批次 {i}/{len(batches)}: {len(batch)} 个ICCID")
# 查询批次
response = query_iccids_batch(batch)
if response:
# 提取结果
found_iccids = extract_iccids_from_response(response)
print(f"找到 {len(found_iccids)} 个ICCID结果")
all_results.extend(found_iccids)
# 批次间等待
if i < len(batches):
time.sleep(2)
print(f"\\n总共找到 {len(all_results)} 个ICCID结果")
# 保存结果
with open('api_results.txt', 'w', encoding='utf-8') as f:
for iccid in all_results:
f.write(f"{iccid}\\n")
print("结果已保存到 api_results.txt")
if __name__ == "__main__":
import time
main()
'''
with open('working_api.py', 'w', encoding='utf-8') as f:
f.write(template)
print("可用的API函数已保存到 working_api.py")
def main():
print("=== 响应解析工具 ===")
# 获取API响应
response_text = get_api_response()
if not response_text:
print("无法获取API响应")
return
# 解析响应
iccids = parse_response(response_text)
# 创建可用的API
create_working_api()
print("\n=== 解析完成 ===")
print("建议:")
print("1. 查看找到的ICCID列表")
print("2. 使用 working_api.py 进行批量查询")
print("3. 需要进一步分析filterData的生成规则")
if __name__ == "__main__":
main()