# -*- coding: utf-8 -*-
"""
AI调用成功率监控模块
监控AI API调用的成功率和性能指标
"""

import json
import logging
from typing import Dict, List, Optional, Any, Tuple
from datetime import datetime, timedelta
from dataclasses import dataclass
from collections import defaultdict
import time

from ..core.database import db_manager
from ..core.models import Alert
from ..core.redis_manager import redis_manager
from ..config.config import Config

logger = logging.getLogger(__name__)

@dataclass
class APICall:
    """API调用记录"""
    timestamp: datetime
    user_id: str
    work_order_id: Optional[int]
    model_name: str
    endpoint: str
    success: bool
    response_time: float
    status_code: Optional[int]
    error_message: Optional[str]
    input_length: int
    output_length: int

class AISuccessMonitor:
    """AI调用成功率监控器"""
    
    def __init__(self):
        # 监控阈值
        self.thresholds = {
            "success_rate_min": 0.95,           # 最低成功率95%
            "avg_response_time_max": 10.0,       # 最大平均响应时间10秒
            "error_rate_max": 0.05,              # 最大错误率5%
            "consecutive_failures_max": 5,       # 最大连续失败次数
            "hourly_failures_max": 10            # 每小时最大失败次数
        }
        
        # 性能等级定义
        self.performance_levels = {
            "excellent": {"success_rate": 0.98, "response_time": 2.0},
            "good": {"success_rate": 0.95, "response_time": 5.0},
            "fair": {"success_rate": 0.90, "response_time": 8.0},
            "poor": {"success_rate": 0.85, "response_time": 12.0}
        }
    
    def _get_redis_client(self):
        """获取Redis客户端"""
        return redis_manager.get_connection()
    
    def record_api_call(
        self,
        user_id: str,
        work_order_id: Optional[int],
        model_name: str,
        endpoint: str,
        success: bool,
        response_time: float,
        status_code: Optional[int] = None,
        error_message: Optional[str] = None,
        input_length: int = 0,
        output_length: int = 0
    ) -> APICall:
        """记录API调用"""
        try:
            api_call = APICall(
                timestamp=datetime.now(),
                user_id=user_id,
                work_order_id=work_order_id,
                model_name=model_name,
                endpoint=endpoint,
                success=success,
                response_time=response_time,
                status_code=status_code,
                error_message=error_message,
                input_length=input_length,
                output_length=output_length
            )
            
            # 保存到Redis
            self._save_to_redis(api_call)
            
            # 检查阈值
            self._check_thresholds(api_call)
            
            logger.info(f"API调用记录: {model_name} - {'成功' if success else '失败'}")
            return api_call
            
        except Exception as e:
            logger.error(f"记录API调用失败: {e}")
            return None
    
    def _save_to_redis(self, api_call: APICall):
        """保存到Redis"""
        redis_client = self._get_redis_client()
        if not redis_client:
            return
        
        try:
            timestamp = api_call.timestamp.timestamp()
            call_data = {
                "user_id": api_call.user_id,
                "work_order_id": api_call.work_order_id,
                "model_name": api_call.model_name,
                "endpoint": api_call.endpoint,
                "success": api_call.success,
                "response_time": api_call.response_time,
                "status_code": api_call.status_code,
                "error_message": api_call.error_message,
                "input_length": api_call.input_length,
                "output_length": api_call.output_length
            }
            
            # 保存到多个键
            redis_client.zadd(
                "api_calls:daily",
                {json.dumps(call_data, ensure_ascii=False): timestamp}
            )
            
            redis_client.zadd(
                f"api_calls:model:{api_call.model_name}",
                {json.dumps(call_data, ensure_ascii=False): timestamp}
            )
            
            redis_client.zadd(
                f"api_calls:user:{api_call.user_id}",
                {json.dumps(call_data, ensure_ascii=False): timestamp}
            )
            
            # 设置过期时间（保留30天）
            redis_client.expire("api_calls:daily", 30 * 24 * 3600)
            
        except Exception as e:
            logger.error(f"保存API调用到Redis失败: {e}")
    
    def _check_thresholds(self, api_call: APICall):
        """检查阈值并触发预警"""
        try:
            # 检查连续失败
            consecutive_failures = self._get_consecutive_failures(api_call.model_name)
            if consecutive_failures >= self.thresholds["consecutive_failures_max"]:
                self._trigger_alert(
                    "consecutive_failures",
                    f"模型 {api_call.model_name} 连续失败 {consecutive_failures} 次",
                    "critical"
                )
            
            # 检查每小时失败次数
            hourly_failures = self._get_hourly_failures(api_call.timestamp)
            if hourly_failures >= self.thresholds["hourly_failures_max"]:
                self._trigger_alert(
                    "high_hourly_failures",
                    f"每小时失败次数过多: {hourly_failures}",
                    "warning"
                )
            
            # 检查成功率
            success_rate = self._get_recent_success_rate(api_call.model_name, hours=1)
            if success_rate < self.thresholds["success_rate_min"]:
                self._trigger_alert(
                    "low_success_rate",
                    f"模型 {api_call.model_name} 成功率过低: {success_rate:.2%}",
                    "warning"
                )
            
            # 检查响应时间
            avg_response_time = self._get_avg_response_time(api_call.model_name, hours=1)
            if avg_response_time > self.thresholds["avg_response_time_max"]:
                self._trigger_alert(
                    "slow_response",
                    f"模型 {api_call.model_name} 响应时间过长: {avg_response_time:.2f}秒",
                    "warning"
                )
            
        except Exception as e:
            logger.error(f"检查阈值失败: {e}")
    
    def _get_consecutive_failures(self, model_name: str) -> int:
        """获取连续失败次数"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return 0
            
            # 获取最近的调用记录
            recent_calls = redis_client.zrevrange(
                f"api_calls:model:{model_name}",
                0,
                9,  # 最近10次调用
                withscores=True
            )
            
            consecutive_failures = 0
            for call_data, _ in recent_calls:
                try:
                    call = json.loads(call_data)
                    if not call.get("success", True):
                        consecutive_failures += 1
                    else:
                        break
                except json.JSONDecodeError:
                    continue
            
            return consecutive_failures
            
        except Exception as e:
            logger.error(f"获取连续失败次数失败: {e}")
            return 0
    
    def _get_hourly_failures(self, timestamp: datetime) -> int:
        """获取每小时失败次数"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return 0
            
            hour_start = timestamp.replace(minute=0, second=0, microsecond=0)
            hour_end = hour_start + timedelta(hours=1)
            
            start_time = hour_start.timestamp()
            end_time = hour_end.timestamp()
            
            calls = redis_client.zrangebyscore(
                "api_calls:daily",
                start_time,
                end_time,
                withscores=True
            )
            
            failures = 0
            for call_data, _ in calls:
                try:
                    call = json.loads(call_data)
                    if not call.get("success", True):
                        failures += 1
                except json.JSONDecodeError:
                    continue
            
            return failures
            
        except Exception as e:
            logger.error(f"获取每小时失败次数失败: {e}")
            return 0
    
    def _get_recent_success_rate(self, model_name: str, hours: int = 1) -> float:
        """获取最近成功率"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return 0.0
            
            end_time = datetime.now().timestamp()
            start_time = (datetime.now() - timedelta(hours=hours)).timestamp()
            
            calls = redis_client.zrangebyscore(
                f"api_calls:model:{model_name}",
                start_time,
                end_time,
                withscores=True
            )
            
            if not calls:
                return 1.0  # 没有调用记录时认为成功率100%
            
            successful_calls = 0
            total_calls = len(calls)
            
            for call_data, _ in calls:
                try:
                    call = json.loads(call_data)
                    if call.get("success", True):
                        successful_calls += 1
                except json.JSONDecodeError:
                    continue
            
            return successful_calls / total_calls if total_calls > 0 else 0.0
            
        except Exception as e:
            logger.error(f"获取成功率失败: {e}")
            return 0.0
    
    def _get_avg_response_time(self, model_name: str, hours: int = 1) -> float:
        """获取平均响应时间"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return 0.0
            
            end_time = datetime.now().timestamp()
            start_time = (datetime.now() - timedelta(hours=hours)).timestamp()
            
            calls = redis_client.zrangebyscore(
                f"api_calls:model:{model_name}",
                start_time,
                end_time,
                withscores=True
            )
            
            if not calls:
                return 0.0
            
            total_time = 0.0
            count = 0
            
            for call_data, _ in calls:
                try:
                    call = json.loads(call_data)
                    response_time = call.get("response_time", 0)
                    if response_time > 0:
                        total_time += response_time
                        count += 1
                except json.JSONDecodeError:
                    continue
            
            return total_time / count if count > 0 else 0.0
            
        except Exception as e:
            logger.error(f"获取平均响应时间失败: {e}")
            return 0.0
    
    def _trigger_alert(self, alert_type: str, message: str, severity: str):
        """触发预警"""
        try:
            alert = Alert(
                rule_name=f"AI成功率监控_{alert_type}",
                alert_type=alert_type,
                level=severity,
                severity=severity,
                message=message,
                is_active=True,
                created_at=datetime.now()
            )
            
            with db_manager.get_session() as session:
                session.add(alert)
                session.commit()
            
            logger.warning(f"AI成功率监控预警: {message}")
            
        except Exception as e:
            logger.error(f"触发AI成功率监控预警失败: {e}")
    
    def get_model_performance(self, model_name: str, hours: int = 24) -> Dict[str, Any]:
        """获取模型性能指标"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return {}
            
            end_time = datetime.now().timestamp()
            start_time = (datetime.now() - timedelta(hours=hours)).timestamp()
            
            calls = redis_client.zrangebyscore(
                f"api_calls:model:{model_name}",
                start_time,
                end_time,
                withscores=True
            )
            
            if not calls:
                return {
                    "model_name": model_name,
                    "total_calls": 0,
                    "success_rate": 0.0,
                    "avg_response_time": 0.0,
                    "error_rate": 0.0,
                    "performance_level": "unknown"
                }
            
            stats = {
                "total_calls": len(calls),
                "successful_calls": 0,
                "failed_calls": 0,
                "total_response_time": 0.0,
                "response_times": [],
                "errors": defaultdict(int)
            }
            
            for call_data, _ in calls:
                try:
                    call = json.loads(call_data)
                    
                    if call.get("success", True):
                        stats["successful_calls"] += 1
                    else:
                        stats["failed_calls"] += 1
                        error_msg = call.get("error_message", "unknown")
                        stats["errors"][error_msg] += 1
                    
                    response_time = call.get("response_time", 0)
                    if response_time > 0:
                        stats["total_response_time"] += response_time
                        stats["response_times"].append(response_time)
                    
                except json.JSONDecodeError:
                    continue
            
            # 计算指标
            success_rate = stats["successful_calls"] / stats["total_calls"] if stats["total_calls"] > 0 else 0
            avg_response_time = stats["total_response_time"] / len(stats["response_times"]) if stats["response_times"] else 0
            error_rate = stats["failed_calls"] / stats["total_calls"] if stats["total_calls"] > 0 else 0
            
            # 确定性能等级
            performance_level = self._determine_performance_level(success_rate, avg_response_time)
            
            return {
                "model_name": model_name,
                "total_calls": stats["total_calls"],
                "successful_calls": stats["successful_calls"],
                "failed_calls": stats["failed_calls"],
                "success_rate": round(success_rate, 4),
                "avg_response_time": round(avg_response_time, 2),
                "error_rate": round(error_rate, 4),
                "performance_level": performance_level,
                "top_errors": dict(list(stats["errors"].items())[:5])  # 前5个错误
            }
            
        except Exception as e:
            logger.error(f"获取模型性能失败: {e}")
            return {}
    
    def _determine_performance_level(self, success_rate: float, avg_response_time: float) -> str:
        """确定性能等级"""
        for level, thresholds in self.performance_levels.items():
            if success_rate >= thresholds["success_rate"] and avg_response_time <= thresholds["response_time"]:
                return level
        return "poor"
    
    def get_system_performance(self, hours: int = 24) -> Dict[str, Any]:
        """获取系统整体性能"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return {}
            
            end_time = datetime.now().timestamp()
            start_time = (datetime.now() - timedelta(hours=hours)).timestamp()
            
            calls = redis_client.zrangebyscore(
                "api_calls:daily",
                start_time,
                end_time,
                withscores=True
            )
            
            if not calls:
                return {
                    "total_calls": 0,
                    "success_rate": 0.0,
                    "avg_response_time": 0.0,
                    "unique_users": 0,
                    "model_distribution": {}
                }
            
            stats = {
                "total_calls": len(calls),
                "successful_calls": 0,
                "failed_calls": 0,
                "total_response_time": 0.0,
                "unique_users": set(),
                "model_distribution": defaultdict(int),
                "hourly_distribution": defaultdict(int)
            }
            
            for call_data, timestamp in calls:
                try:
                    call = json.loads(call_data)
                    
                    if call.get("success", True):
                        stats["successful_calls"] += 1
                    else:
                        stats["failed_calls"] += 1
                    
                    response_time = call.get("response_time", 0)
                    if response_time > 0:
                        stats["total_response_time"] += response_time
                    
                    stats["unique_users"].add(call.get("user_id", ""))
                    stats["model_distribution"][call.get("model_name", "unknown")] += 1
                    
                    # 按小时统计
                    hour = datetime.fromtimestamp(timestamp).strftime("%H:00")
                    stats["hourly_distribution"][hour] += 1
                    
                except json.JSONDecodeError:
                    continue
            
            # 计算指标
            success_rate = stats["successful_calls"] / stats["total_calls"] if stats["total_calls"] > 0 else 0
            avg_response_time = stats["total_response_time"] / stats["total_calls"] if stats["total_calls"] > 0 else 0
            
            return {
                "total_calls": stats["total_calls"],
                "successful_calls": stats["successful_calls"],
                "failed_calls": stats["failed_calls"],
                "success_rate": round(success_rate, 4),
                "avg_response_time": round(avg_response_time, 2),
                "unique_users": len(stats["unique_users"]),
                "model_distribution": dict(stats["model_distribution"]),
                "hourly_distribution": dict(stats["hourly_distribution"])
            }
            
        except Exception as e:
            logger.error(f"获取系统性能失败: {e}")
            return {}
    
    def get_performance_trend(self, days: int = 7) -> List[Dict[str, Any]]:
        """获取性能趋势"""
        try:
            trend_data = []
            
            for i in range(days):
                date = datetime.now().date() - timedelta(days=i)
                day_start = datetime.combine(date, datetime.min.time())
                day_end = datetime.combine(date, datetime.max.time())
                
                start_time = day_start.timestamp()
                end_time = day_end.timestamp()
                
                redis_client = self._get_redis_client()
                if not redis_client:
                    trend_data.append({
                        "date": date.isoformat(),
                        "total_calls": 0,
                        "success_rate": 0.0,
                        "avg_response_time": 0.0
                    })
                    continue
                
                calls = redis_client.zrangebyscore(
                    "api_calls:daily",
                    start_time,
                    end_time,
                    withscores=True
                )
                
                if not calls:
                    trend_data.append({
                        "date": date.isoformat(),
                        "total_calls": 0,
                        "success_rate": 0.0,
                        "avg_response_time": 0.0
                    })
                    continue
                
                successful_calls = 0
                total_response_time = 0.0
                
                for call_data, _ in calls:
                    try:
                        call = json.loads(call_data)
                        if call.get("success", True):
                            successful_calls += 1
                        
                        response_time = call.get("response_time", 0)
                        if response_time > 0:
                            total_response_time += response_time
                            
                    except json.JSONDecodeError:
                        continue
                
                success_rate = successful_calls / len(calls) if calls else 0
                avg_response_time = total_response_time / len(calls) if calls else 0
                
                trend_data.append({
                    "date": date.isoformat(),
                    "total_calls": len(calls),
                    "success_rate": round(success_rate, 4),
                    "avg_response_time": round(avg_response_time, 2)
                })
            
            return list(reversed(trend_data))
            
        except Exception as e:
            logger.error(f"获取性能趋势失败: {e}")
            return []
    
    def cleanup_old_data(self, days: int = 30) -> int:
        """清理旧数据"""
        try:
            redis_client = self._get_redis_client()
            if not redis_client:
                return 0
            
            cutoff_time = (datetime.now() - timedelta(days=days)).timestamp()
            
            # 清理每日数据
            removed_count = redis_client.zremrangebyscore(
                "api_calls:daily",
                0,
                cutoff_time
            )
            
            # 清理模型数据
            model_keys = redis_client.keys("api_calls:model:*")
            for key in model_keys:
                redis_client.zremrangebyscore(key, 0, cutoff_time)
            
            # 清理用户数据
            user_keys = redis_client.keys("api_calls:user:*")
            for key in user_keys:
                redis_client.zremrangebyscore(key, 0, cutoff_time)
            
            logger.info(f"清理AI成功率监控数据成功: 数量={removed_count}")
            return removed_count
            
        except Exception as e:
            logger.error(f"清理AI成功率监控数据失败: {e}")
            return 0