Files
weibo_signin/backend/signin_executor/app/services/antibot.py
2026-03-09 16:10:29 +08:00

163 lines
6.1 KiB
Python

"""
Anti-bot protection module
Implements various techniques to avoid detection by anti-crawling systems
"""
import random
import logging
from typing import Optional, Dict, Any, List
import httpx
from app.config import settings
logger = logging.getLogger(__name__)
# Predefined User-Agent list for rotation
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
]
class AntiBotProtection:
"""Anti-bot protection service"""
def __init__(self):
self.proxy_pool_url = settings.PROXY_POOL_URL
self.random_delay_min = settings.RANDOM_DELAY_MIN
self.random_delay_max = settings.RANDOM_DELAY_MAX
def get_random_delay(self) -> float:
"""
Generate random delay within configured range.
Returns delay in seconds.
Validates: Requirements 7.1
"""
delay = random.uniform(self.random_delay_min, self.random_delay_max)
logger.debug(f"Generated random delay: {delay:.2f}s")
return delay
def get_random_user_agent(self) -> str:
"""
Select random User-Agent from predefined list.
Returns User-Agent string.
Validates: Requirements 7.2
"""
user_agent = random.choice(USER_AGENTS)
logger.debug(f"Selected User-Agent: {user_agent[:50]}...")
return user_agent
async def get_proxy(self) -> Optional[Dict[str, str]]:
"""
Get proxy from proxy pool service.
Returns proxy dict or None if unavailable.
Falls back to direct connection if proxy pool is unavailable.
Validates: Requirements 7.3, 7.4
"""
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(f"{self.proxy_pool_url}/get")
if response.status_code == 200:
proxy_info = response.json()
proxy_url = proxy_info.get("proxy")
if proxy_url:
proxy_dict = {
"http://": f"http://{proxy_url}",
"https://": f"https://{proxy_url}"
}
logger.info(f"Obtained proxy: {proxy_url}")
return proxy_dict
else:
logger.warning("Proxy pool returned empty proxy")
return None
else:
logger.warning(f"Proxy pool returned status {response.status_code}")
return None
except httpx.RequestError as e:
logger.warning(f"Proxy pool service unavailable: {e}, falling back to direct connection")
return None
except Exception as e:
logger.error(f"Error getting proxy: {e}")
return None
def build_headers(self, user_agent: Optional[str] = None) -> Dict[str, str]:
"""
Build HTTP headers with random User-Agent and common headers.
Args:
user_agent: Optional custom User-Agent, otherwise random one is selected
Returns:
Dict of HTTP headers
"""
if user_agent is None:
user_agent = self.get_random_user_agent()
headers = {
"User-Agent": user_agent,
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://weibo.com/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
}
return headers
def get_fingerprint_data(self) -> Dict[str, Any]:
"""
Generate browser fingerprint data for simulation.
Returns:
Dict containing fingerprint information
"""
screen_resolutions = [
"1920x1080", "1366x768", "1440x900", "1536x864",
"1280x720", "2560x1440", "3840x2160"
]
timezones = [
"Asia/Shanghai", "Asia/Beijing", "Asia/Hong_Kong",
"Asia/Taipei", "Asia/Singapore"
]
languages = [
"zh-CN", "zh-CN,zh;q=0.9", "zh-CN,zh;q=0.9,en;q=0.8",
"zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7"
]
fingerprint = {
"screen_resolution": random.choice(screen_resolutions),
"timezone": random.choice(timezones),
"language": random.choice(languages),
"color_depth": random.choice([24, 32]),
"platform": random.choice(["Win32", "MacIntel", "Linux x86_64"]),
"hardware_concurrency": random.choice([4, 8, 12, 16]),
"device_memory": random.choice([4, 8, 16, 32]),
}
logger.debug(f"Generated fingerprint: {fingerprint}")
return fingerprint
# Global instance
antibot = AntiBotProtection()