扫码登录,获取cookies
This commit is contained in:
162
backend/signin_executor/app/services/antibot.py
Normal file
162
backend/signin_executor/app/services/antibot.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Anti-bot protection module
|
||||
Implements various techniques to avoid detection by anti-crawling systems
|
||||
"""
|
||||
|
||||
import random
|
||||
import logging
|
||||
from typing import Optional, Dict, Any, List
|
||||
import httpx
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Predefined User-Agent list for rotation
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
||||
]
|
||||
|
||||
|
||||
class AntiBotProtection:
|
||||
"""Anti-bot protection service"""
|
||||
|
||||
def __init__(self):
|
||||
self.proxy_pool_url = settings.PROXY_POOL_URL
|
||||
self.random_delay_min = settings.RANDOM_DELAY_MIN
|
||||
self.random_delay_max = settings.RANDOM_DELAY_MAX
|
||||
|
||||
def get_random_delay(self) -> float:
|
||||
"""
|
||||
Generate random delay within configured range.
|
||||
Returns delay in seconds.
|
||||
|
||||
Validates: Requirements 7.1
|
||||
"""
|
||||
delay = random.uniform(self.random_delay_min, self.random_delay_max)
|
||||
logger.debug(f"Generated random delay: {delay:.2f}s")
|
||||
return delay
|
||||
|
||||
def get_random_user_agent(self) -> str:
|
||||
"""
|
||||
Select random User-Agent from predefined list.
|
||||
Returns User-Agent string.
|
||||
|
||||
Validates: Requirements 7.2
|
||||
"""
|
||||
user_agent = random.choice(USER_AGENTS)
|
||||
logger.debug(f"Selected User-Agent: {user_agent[:50]}...")
|
||||
return user_agent
|
||||
|
||||
async def get_proxy(self) -> Optional[Dict[str, str]]:
|
||||
"""
|
||||
Get proxy from proxy pool service.
|
||||
Returns proxy dict or None if unavailable.
|
||||
Falls back to direct connection if proxy pool is unavailable.
|
||||
|
||||
Validates: Requirements 7.3, 7.4
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
response = await client.get(f"{self.proxy_pool_url}/get")
|
||||
|
||||
if response.status_code == 200:
|
||||
proxy_info = response.json()
|
||||
proxy_url = proxy_info.get("proxy")
|
||||
|
||||
if proxy_url:
|
||||
proxy_dict = {
|
||||
"http://": f"http://{proxy_url}",
|
||||
"https://": f"https://{proxy_url}"
|
||||
}
|
||||
logger.info(f"Obtained proxy: {proxy_url}")
|
||||
return proxy_dict
|
||||
else:
|
||||
logger.warning("Proxy pool returned empty proxy")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"Proxy pool returned status {response.status_code}")
|
||||
return None
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.warning(f"Proxy pool service unavailable: {e}, falling back to direct connection")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting proxy: {e}")
|
||||
return None
|
||||
|
||||
def build_headers(self, user_agent: Optional[str] = None) -> Dict[str, str]:
|
||||
"""
|
||||
Build HTTP headers with random User-Agent and common headers.
|
||||
|
||||
Args:
|
||||
user_agent: Optional custom User-Agent, otherwise random one is selected
|
||||
|
||||
Returns:
|
||||
Dict of HTTP headers
|
||||
"""
|
||||
if user_agent is None:
|
||||
user_agent = self.get_random_user_agent()
|
||||
|
||||
headers = {
|
||||
"User-Agent": user_agent,
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "https://weibo.com/",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
}
|
||||
|
||||
return headers
|
||||
|
||||
def get_fingerprint_data(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate browser fingerprint data for simulation.
|
||||
|
||||
Returns:
|
||||
Dict containing fingerprint information
|
||||
"""
|
||||
screen_resolutions = [
|
||||
"1920x1080", "1366x768", "1440x900", "1536x864",
|
||||
"1280x720", "2560x1440", "3840x2160"
|
||||
]
|
||||
|
||||
timezones = [
|
||||
"Asia/Shanghai", "Asia/Beijing", "Asia/Hong_Kong",
|
||||
"Asia/Taipei", "Asia/Singapore"
|
||||
]
|
||||
|
||||
languages = [
|
||||
"zh-CN", "zh-CN,zh;q=0.9", "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7"
|
||||
]
|
||||
|
||||
fingerprint = {
|
||||
"screen_resolution": random.choice(screen_resolutions),
|
||||
"timezone": random.choice(timezones),
|
||||
"language": random.choice(languages),
|
||||
"color_depth": random.choice([24, 32]),
|
||||
"platform": random.choice(["Win32", "MacIntel", "Linux x86_64"]),
|
||||
"hardware_concurrency": random.choice([4, 8, 12, 16]),
|
||||
"device_memory": random.choice([4, 8, 16, 32]),
|
||||
}
|
||||
|
||||
logger.debug(f"Generated fingerprint: {fingerprint}")
|
||||
return fingerprint
|
||||
|
||||
|
||||
# Global instance
|
||||
antibot = AntiBotProtection()
|
||||
Reference in New Issue
Block a user