457 lines
19 KiB
Python
457 lines
19 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
数据库查询优化器
|
||
提供查询优化、批量操作、连接池管理等功能
|
||
"""
|
||
|
||
import time
|
||
import logging
|
||
from typing import List, Dict, Any, Optional, Tuple
|
||
from sqlalchemy.orm import Session
|
||
from sqlalchemy import text, func
|
||
from contextlib import contextmanager
|
||
|
||
from .cache_manager import cache_manager, cache_result
|
||
from .database import db_manager
|
||
from .models import Conversation, WorkOrder, Alert, KnowledgeEntry
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class QueryOptimizer:
|
||
"""查询优化器"""
|
||
|
||
def __init__(self):
|
||
self.query_stats = {}
|
||
self.slow_query_threshold = 1.0 # 慢查询阈值(秒)
|
||
|
||
@cache_result(ttl=60) # 缓存1分钟,提高响应速度
|
||
def get_conversations_paginated(self, page: int = 1, per_page: int = 10,
|
||
search: str = '', user_id: str = '',
|
||
date_filter: str = '') -> Dict[str, Any]:
|
||
"""分页获取对话记录(优化版)"""
|
||
start_time = time.time()
|
||
|
||
try:
|
||
with db_manager.get_session() as session:
|
||
# 构建基础查询
|
||
query = session.query(Conversation)
|
||
|
||
# 应用过滤条件
|
||
if search:
|
||
query = query.filter(
|
||
Conversation.user_message.contains(search) |
|
||
Conversation.assistant_response.contains(search)
|
||
)
|
||
|
||
# Conversation模型没有user_id字段,跳过用户过滤
|
||
# if user_id:
|
||
# query = query.filter(Conversation.user_id == user_id)
|
||
|
||
if date_filter:
|
||
from datetime import datetime, timedelta
|
||
now = datetime.now()
|
||
if date_filter == 'today':
|
||
start_date = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||
elif date_filter == 'week':
|
||
start_date = now - timedelta(days=7)
|
||
elif date_filter == 'month':
|
||
start_date = now - timedelta(days=30)
|
||
else:
|
||
start_date = None
|
||
|
||
if start_date:
|
||
query = query.filter(Conversation.timestamp >= start_date)
|
||
|
||
# 获取总数(使用索引优化)
|
||
total = query.count()
|
||
|
||
# 分页查询(使用索引)
|
||
conversations = query.order_by(
|
||
Conversation.timestamp.desc()
|
||
).offset((page - 1) * per_page).limit(per_page).all()
|
||
|
||
# 统计数据(批量查询)
|
||
stats = self._get_conversation_stats(session)
|
||
|
||
# 分页信息
|
||
pagination = {
|
||
'current_page': page,
|
||
'per_page': per_page,
|
||
'total_pages': (total + per_page - 1) // per_page,
|
||
'total': total
|
||
}
|
||
|
||
# 转换数据格式
|
||
conversation_list = []
|
||
for conv in conversations:
|
||
conversation_list.append({
|
||
'id': conv.id,
|
||
'session_id': conv.session_id,
|
||
'user_message': conv.user_message,
|
||
'assistant_response': conv.assistant_response,
|
||
'timestamp': conv.timestamp.isoformat() if conv.timestamp else None,
|
||
'confidence_score': conv.confidence_score,
|
||
'work_order_id': conv.work_order_id,
|
||
'ip_address': conv.ip_address,
|
||
'invocation_method': conv.invocation_method,
|
||
# 构造用户显示名称:如果有IP则显示IP,否则显示匿名
|
||
'user_id': f"{conv.ip_address} ({conv.invocation_method})" if conv.ip_address else "匿名"
|
||
})
|
||
|
||
# 记录查询时间
|
||
query_time = time.time() - start_time
|
||
self._record_query_time('get_conversations_paginated', query_time)
|
||
|
||
return {
|
||
'success': True,
|
||
'conversations': conversation_list,
|
||
'pagination': pagination,
|
||
'stats': stats,
|
||
'query_time': query_time
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"分页查询对话失败: {e}")
|
||
return {'success': False, 'error': str(e)}
|
||
|
||
def _get_conversation_stats(self, session: Session) -> Dict[str, Any]:
|
||
"""获取对话统计信息(批量查询优化)"""
|
||
try:
|
||
from datetime import datetime
|
||
|
||
# 使用单个查询获取多个统计信息
|
||
today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||
|
||
# 批量查询统计信息
|
||
stats_query = session.query(
|
||
func.count(Conversation.id).label('total'),
|
||
func.avg(Conversation.confidence_score).label('avg_response_time')
|
||
).first()
|
||
|
||
today_count = session.query(Conversation).filter(
|
||
Conversation.timestamp >= today_start
|
||
).count()
|
||
|
||
return {
|
||
'total': stats_query.total or 0,
|
||
'today': today_count,
|
||
'avg_response_time': round(stats_query.avg_response_time or 0, 2),
|
||
'active_users': 1 # Conversation模型没有user_id,暂时设为1
|
||
}
|
||
except Exception as e:
|
||
logger.error(f"获取对话统计失败: {e}")
|
||
return {'total': 0, 'today': 0, 'avg_response_time': 0, 'active_users': 0}
|
||
|
||
@cache_result(ttl=30) # 缓存30秒,提高响应速度
|
||
def get_workorders_optimized(self, status_filter: str = '',
|
||
priority_filter: str = '') -> List[Dict[str, Any]]:
|
||
"""优化版工单查询"""
|
||
start_time = time.time()
|
||
|
||
try:
|
||
with db_manager.get_session() as session:
|
||
query = session.query(WorkOrder)
|
||
|
||
if status_filter and status_filter != 'all':
|
||
query = query.filter(WorkOrder.status == status_filter)
|
||
|
||
if priority_filter and priority_filter != 'all':
|
||
query = query.filter(WorkOrder.priority == priority_filter)
|
||
|
||
# 使用索引排序
|
||
workorders = query.order_by(
|
||
WorkOrder.created_at.desc()
|
||
).limit(100).all() # 限制返回数量
|
||
|
||
result = []
|
||
for wo in workorders:
|
||
result.append({
|
||
"id": wo.id,
|
||
"order_id": wo.order_id,
|
||
"title": wo.title,
|
||
"description": wo.description,
|
||
"category": wo.category,
|
||
"priority": wo.priority,
|
||
"status": wo.status,
|
||
"created_at": wo.created_at.isoformat() if wo.created_at else None,
|
||
"updated_at": wo.updated_at.isoformat() if wo.updated_at else None,
|
||
"resolution": wo.resolution,
|
||
"satisfaction_score": wo.satisfaction_score
|
||
})
|
||
|
||
query_time = time.time() - start_time
|
||
self._record_query_time('get_workorders_optimized', query_time)
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"优化工单查询失败: {e}")
|
||
return []
|
||
|
||
def batch_insert_conversations(self, conversations: List[Dict[str, Any]]) -> bool:
|
||
"""批量插入对话记录"""
|
||
try:
|
||
with db_manager.get_session() as session:
|
||
# 批量插入
|
||
conversation_objects = []
|
||
for conv_data in conversations:
|
||
conv = Conversation(**conv_data)
|
||
conversation_objects.append(conv)
|
||
|
||
session.add_all(conversation_objects)
|
||
session.commit()
|
||
|
||
# 清除相关缓存
|
||
cache_manager.delete('get_conversations_paginated')
|
||
|
||
logger.info(f"批量插入 {len(conversations)} 条对话记录")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"批量插入对话记录失败: {e}")
|
||
return False
|
||
|
||
def batch_update_workorders(self, updates: List[Tuple[int, Dict[str, Any]]]) -> bool:
|
||
"""批量更新工单"""
|
||
try:
|
||
with db_manager.get_session() as session:
|
||
for workorder_id, update_data in updates:
|
||
workorder = session.query(WorkOrder).filter(
|
||
WorkOrder.id == workorder_id
|
||
).first()
|
||
|
||
if workorder:
|
||
for key, value in update_data.items():
|
||
setattr(workorder, key, value)
|
||
|
||
session.commit()
|
||
|
||
# 清除相关缓存
|
||
cache_manager.delete('get_workorders_optimized')
|
||
|
||
logger.info(f"批量更新 {len(updates)} 个工单")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"批量更新工单失败: {e}")
|
||
return False
|
||
|
||
def get_analytics_optimized(self, days: int = 30) -> Dict[str, Any]:
|
||
"""优化版分析数据查询"""
|
||
start_time = time.time()
|
||
|
||
try:
|
||
with db_manager.get_session() as session:
|
||
from datetime import datetime, timedelta
|
||
|
||
end_time = datetime.now()
|
||
start_time_query = end_time - timedelta(days=days-1)
|
||
|
||
# 批量查询所有需要的数据
|
||
# 修改:查询所有工单,不限制时间范围
|
||
workorders = session.query(WorkOrder).all()
|
||
|
||
# 修改:查询所有预警和对话,不限制时间范围
|
||
alerts = session.query(Alert).all()
|
||
|
||
conversations = session.query(Conversation).all()
|
||
|
||
# 处理数据
|
||
analytics = self._process_analytics_data(workorders, alerts, conversations, days)
|
||
|
||
query_time = time.time() - start_time
|
||
self._record_query_time('get_analytics_optimized', query_time)
|
||
|
||
return analytics
|
||
|
||
except Exception as e:
|
||
logger.error(f"优化分析查询失败: {e}")
|
||
return {}
|
||
|
||
def _process_analytics_data(self, workorders, alerts, conversations, days):
|
||
"""处理分析数据"""
|
||
from collections import defaultdict, Counter
|
||
from datetime import datetime, timedelta
|
||
|
||
end_time = datetime.now()
|
||
start_time = end_time - timedelta(days=days-1)
|
||
|
||
# 趋势数据
|
||
day_keys = [(start_time + timedelta(days=i)).strftime('%Y-%m-%d') for i in range(days)]
|
||
wo_by_day = Counter([(wo.created_at.strftime('%Y-%m-%d') if wo.created_at else end_time.strftime('%Y-%m-%d')) for wo in workorders])
|
||
alert_by_day = Counter([(al.created_at.strftime('%Y-%m-%d') if al.created_at else end_time.strftime('%Y-%m-%d')) for al in alerts])
|
||
|
||
trend = [{
|
||
'date': d,
|
||
'workorders': int(wo_by_day.get(d, 0)),
|
||
'alerts': int(alert_by_day.get(d, 0))
|
||
} for d in day_keys]
|
||
|
||
# 工单统计
|
||
total = len(workorders)
|
||
status_counts = Counter([wo.status for wo in workorders])
|
||
category_counts = Counter([wo.category for wo in workorders])
|
||
priority_counts = Counter([wo.priority for wo in workorders])
|
||
|
||
|
||
# 处理状态映射(支持中英文状态)
|
||
status_mapping = {
|
||
'open': ['open', '待处理', '新建', 'new'],
|
||
'in_progress': ['in_progress', '处理中', '进行中', 'progress', 'processing', 'analysising', 'analyzing'],
|
||
'resolved': ['resolved', '已解决', '已完成'],
|
||
'closed': ['closed', '已关闭', '关闭']
|
||
}
|
||
|
||
# 统计各状态的数量
|
||
mapped_counts = {'open': 0, 'in_progress': 0, 'resolved': 0, 'closed': 0}
|
||
|
||
for status, count in status_counts.items():
|
||
if status is None:
|
||
continue
|
||
status_lower = str(status).lower()
|
||
mapped = False
|
||
for mapped_status, possible_values in status_mapping.items():
|
||
if status_lower in [v.lower() for v in possible_values]:
|
||
mapped_counts[mapped_status] += count
|
||
mapped = True
|
||
break
|
||
|
||
if not mapped:
|
||
logger.warning(f"未映射的状态: '{status}' (数量: {count})")
|
||
|
||
|
||
resolved_count = mapped_counts['resolved']
|
||
|
||
workorders_stats = {
|
||
'total': total,
|
||
'open': mapped_counts['open'],
|
||
'in_progress': mapped_counts['in_progress'],
|
||
'resolved': mapped_counts['resolved'],
|
||
'closed': mapped_counts['closed'],
|
||
'by_category': dict(category_counts),
|
||
'by_priority': dict(priority_counts)
|
||
}
|
||
|
||
# 满意度统计
|
||
scores = [float(wo.satisfaction_score) for wo in workorders if wo.satisfaction_score is not None]
|
||
avg_satisfaction = round(sum(scores)/len(scores), 1) if scores else 0
|
||
dist = Counter([str(int(round(s))) for s in scores]) if scores else {}
|
||
|
||
satisfaction_stats = {
|
||
'average': avg_satisfaction,
|
||
'distribution': {k: int(v) for k, v in dist.items()}
|
||
}
|
||
|
||
# 预警统计
|
||
level_counts = Counter([al.level for al in alerts])
|
||
active_alerts = len([al for al in alerts if al.is_active])
|
||
resolved_alerts = len([al for al in alerts if not al.is_active and al.resolved_at])
|
||
|
||
alerts_stats = {
|
||
'total': len(alerts),
|
||
'active': active_alerts,
|
||
'resolved': resolved_alerts,
|
||
'by_level': {k: int(v) for k, v in level_counts.items()}
|
||
}
|
||
|
||
# 性能指标
|
||
resp_times = [float(c.response_time) for c in conversations if c.response_time is not None]
|
||
avg_resp = round(sum(resp_times)/len(resp_times), 2) if resp_times else 0
|
||
throughput = len(conversations)
|
||
|
||
critical = level_counts.get('critical', 0)
|
||
error_rate = round((critical / alerts_stats['total']) * 100, 2) if alerts_stats['total'] > 0 else 0
|
||
|
||
performance_stats = {
|
||
'response_time': avg_resp,
|
||
'uptime': 99.0,
|
||
'error_rate': error_rate,
|
||
'throughput': throughput
|
||
}
|
||
|
||
return {
|
||
'trend': trend,
|
||
'workorders': workorders_stats,
|
||
'satisfaction': satisfaction_stats,
|
||
'alerts': alerts_stats,
|
||
'performance': performance_stats,
|
||
'summary': {
|
||
'total_workorders': total,
|
||
'resolution_rate': round((resolved_count/total)*100, 1) if total > 0 else 0,
|
||
'avg_satisfaction': avg_satisfaction,
|
||
'active_alerts': active_alerts
|
||
}
|
||
}
|
||
|
||
def _record_query_time(self, query_name: str, query_time: float):
|
||
"""记录查询时间"""
|
||
if query_name not in self.query_stats:
|
||
self.query_stats[query_name] = []
|
||
|
||
self.query_stats[query_name].append(query_time)
|
||
|
||
# 保持最近100次记录
|
||
if len(self.query_stats[query_name]) > 100:
|
||
self.query_stats[query_name] = self.query_stats[query_name][-100:]
|
||
|
||
|
||
def get_query_performance_report(self) -> Dict[str, Any]:
|
||
"""获取查询性能报告"""
|
||
report = {}
|
||
|
||
for query_name, times in self.query_stats.items():
|
||
if times:
|
||
report[query_name] = {
|
||
'count': len(times),
|
||
'avg_time': round(sum(times) / len(times), 3),
|
||
'max_time': round(max(times), 3),
|
||
'min_time': round(min(times), 3),
|
||
'slow_queries': len([t for t in times if t > self.slow_query_threshold])
|
||
}
|
||
|
||
return report
|
||
|
||
def optimize_database_indexes(self) -> bool:
|
||
"""优化数据库索引"""
|
||
try:
|
||
with db_manager.get_session() as session:
|
||
# 创建常用查询的索引
|
||
indexes = [
|
||
"CREATE INDEX IF NOT EXISTS idx_conversations_timestamp ON conversations(timestamp DESC)",
|
||
"CREATE INDEX IF NOT EXISTS idx_conversations_user_id ON conversations(user_id)",
|
||
"CREATE INDEX IF NOT EXISTS idx_conversations_work_order_id ON conversations(work_order_id)",
|
||
"CREATE INDEX IF NOT EXISTS idx_workorders_status ON work_orders(status)",
|
||
"CREATE INDEX IF NOT EXISTS idx_workorders_priority ON work_orders(priority)",
|
||
"CREATE INDEX IF NOT EXISTS idx_workorders_created_at ON work_orders(created_at DESC)",
|
||
"CREATE INDEX IF NOT EXISTS idx_alerts_level ON alerts(level)",
|
||
"CREATE INDEX IF NOT EXISTS idx_alerts_is_active ON alerts(is_active)",
|
||
"CREATE INDEX IF NOT EXISTS idx_alerts_created_at ON alerts(created_at DESC)"
|
||
]
|
||
|
||
for index_sql in indexes:
|
||
try:
|
||
session.execute(text(index_sql))
|
||
except Exception as e:
|
||
logger.warning(f"创建索引失败: {e}")
|
||
|
||
session.commit()
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"数据库索引优化失败: {e}")
|
||
return False
|
||
|
||
def clear_all_caches(self) -> bool:
|
||
"""清除所有缓存"""
|
||
try:
|
||
cache_manager.clear()
|
||
logger.info("所有缓存已清除")
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"清除缓存失败: {e}")
|
||
return False
|
||
|
||
|
||
# 全局查询优化器实例
|
||
query_optimizer = QueryOptimizer()
|