Files
damai/scripts/captcha_solver.py

161 lines
4.5 KiB
Python
Raw Permalink Normal View History

# coding: utf-8
"""
验证码处理模块
支持图像验证码滑块验证码
依赖Pillow (PIL)
"""
import io
import logging
from dataclasses import dataclass
from enum import Enum, auto
from typing import Optional, Tuple
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
logger = logging.getLogger("damai.captcha")
class CaptchaType(Enum):
NONE = auto()
IMAGE = auto() # 图形验证码
SLIDER = auto() # 滑块验证码
CLICK = auto() # 点选验证码
UNKNOWN = auto()
@dataclass
class CaptchaResult:
success: bool
captcha_type: CaptchaType
message: str = ""
def detect_captcha(driver) -> CaptchaType:
"""检测当前页面是否存在验证码"""
# 检测滑块
slider_selectors = [
".captcha-slider",
".slide-verify",
"[class*='slider']",
".nc-lang-cn", # 阿里云盾滑块
]
for sel in slider_selectors:
try:
driver.find_element(By.CSS_SELECTOR, sel)
logger.info(f"检测到滑块验证码: {sel}")
return CaptchaType.SLIDER
except Exception:
continue
# 检测图形验证码
image_selectors = [
".captcha-img",
"img[src*='captcha']",
"img[src*='verify']",
"#captcha_img",
]
for sel in image_selectors:
try:
driver.find_element(By.CSS_SELECTOR, sel)
logger.info(f"检测到图形验证码: {sel}")
return CaptchaType.IMAGE
except Exception:
continue
# 检测弹窗中是否有验证码关键词
try:
page_source = driver.page_source.lower()
if "captcha" in page_source or "验证码" in page_source:
return CaptchaType.UNKNOWN
except Exception:
pass
return CaptchaType.NONE
def solve_slider(driver, slider_element: Optional[WebElement] = None) -> CaptchaResult:
"""
处理滑块验证码
简单实现拖动滑块从左到右
更精确的方案需要图像处理来识别缺口位置
"""
from selenium.webdriver.common.action_chains import ActionChains
selectors = [
".captcha-slider .slide-btn",
".slide-verify-slider-mask-item",
".nc-lang-cn .btn_slide",
"[class*='slider'] [class*='btn']",
]
slider = slider_element
if not slider:
for sel in selectors:
try:
slider = driver.find_element(By.CSS_SELECTOR, sel)
break
except Exception:
continue
if not slider:
return CaptchaResult(False, CaptchaType.SLIDER, "找不到滑块元素")
try:
# 获取滑块轨道宽度
track = slider.find_element(By.XPATH, "..")
track_width = track.size["width"]
slider_width = slider.size["width"]
distance = track_width - slider_width
if distance <= 0:
distance = 300 # fallback
actions = ActionChains(driver)
actions.click_and_hold(slider)
actions.pause(0.1)
# 模拟人类拖动:分段加速减速
steps = 20
for i in range(steps):
progress = (i + 1) / steps
# 加速-减速曲线
if progress < 0.3:
offset = distance * progress * 0.5
elif progress < 0.7:
offset = distance * progress
else:
offset = distance * progress * 0.95
move = int(offset - (distance * (i / steps) if i > 0 else 0))
actions.move_by_offset(move, 0)
actions.pause(0.02)
actions.release()
actions.perform()
logger.info("滑块拖动完成")
return CaptchaResult(True, CaptchaType.SLIDER)
except Exception as e:
logger.error(f"滑块处理失败: {e}")
return CaptchaResult(False, CaptchaType.SLIDER, str(e))
def handle_captcha(driver) -> CaptchaResult:
"""自动检测并处理验证码"""
captcha_type = detect_captcha(driver)
if captcha_type == CaptchaType.NONE:
return CaptchaResult(True, CaptchaType.NONE)
if captcha_type == CaptchaType.SLIDER:
return solve_slider(driver)
if captcha_type == CaptchaType.IMAGE:
logger.warning("图形验证码需要人工处理或接入 OCR 服务")
return CaptchaResult(False, CaptchaType.IMAGE, "需要人工处理")
logger.warning(f"未知验证码类型,需要人工处理")
return CaptchaResult(False, captcha_type, "未知类型")