2025-09-06 21:06:18 +08:00
|
|
|
|
import json
|
|
|
|
|
|
import logging
|
|
|
|
|
|
from typing import List, Dict, Optional, Any
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
|
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
2026-04-01 16:11:02 +08:00
|
|
|
|
from sqlalchemy import func, Integer
|
2025-09-06 21:06:18 +08:00
|
|
|
|
|
|
|
|
|
|
from ..core.database import db_manager
|
|
|
|
|
|
from ..core.models import KnowledgeEntry, WorkOrder, Conversation
|
|
|
|
|
|
from ..core.llm_client import QwenClient
|
2026-03-20 16:50:26 +08:00
|
|
|
|
from ..core.embedding_client import EmbeddingClient
|
|
|
|
|
|
from ..core.vector_store import vector_store
|
|
|
|
|
|
from ..config.unified_config import get_config
|
2025-09-06 21:06:18 +08:00
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class KnowledgeManager:
|
|
|
|
|
|
"""知识库管理器"""
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def __init__(self):
|
2025-09-18 20:12:54 +01:00
|
|
|
|
self.llm_client = QwenClient()
|
2026-03-20 16:50:26 +08:00
|
|
|
|
self.embedding_client = EmbeddingClient()
|
|
|
|
|
|
self.embedding_enabled = get_config().embedding.enabled
|
|
|
|
|
|
self.similarity_threshold = get_config().embedding.similarity_threshold
|
2025-09-06 21:06:18 +08:00
|
|
|
|
self.vectorizer = TfidfVectorizer(
|
|
|
|
|
|
max_features=1000,
|
|
|
|
|
|
stop_words=None, # 不使用英文停用词,因为数据是中文
|
|
|
|
|
|
ngram_range=(1, 2)
|
|
|
|
|
|
)
|
|
|
|
|
|
self._load_vectorizer()
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 加载向量索引(embedding 模式)
|
|
|
|
|
|
if self.embedding_enabled:
|
|
|
|
|
|
vector_store.load_from_db()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def _load_vectorizer(self):
|
|
|
|
|
|
"""加载向量化器"""
|
|
|
|
|
|
try:
|
2026-04-02 22:19:56 +08:00
|
|
|
|
logger.debug("正在初始化知识库向量化器...")
|
2025-09-06 21:06:18 +08:00
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
entries = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).all()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if entries:
|
|
|
|
|
|
texts = [entry.question + " " + entry.answer for entry in entries]
|
|
|
|
|
|
self.vectorizer.fit(texts)
|
2026-04-02 22:19:56 +08:00
|
|
|
|
logger.debug(f"向量化器加载成功: 共处理 {len(entries)} 个知识条目")
|
2026-02-11 00:08:09 +08:00
|
|
|
|
else:
|
|
|
|
|
|
logger.warning("知识库尚无活跃条目,向量化器将保持空状态")
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"加载向量化器失败: {e}")
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def learn_from_work_order(self, work_order_id: int) -> bool:
|
|
|
|
|
|
"""从工单中学习知识"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
work_order = session.query(WorkOrder).filter(
|
|
|
|
|
|
WorkOrder.id == work_order_id
|
|
|
|
|
|
).first()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not work_order or not work_order.resolution:
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 提取问题和答案
|
|
|
|
|
|
question = work_order.title + " " + work_order.description
|
|
|
|
|
|
answer = work_order.resolution
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
|
|
|
|
|
logger.info(f"开始从工单 {work_order_id} 学习知识: 标题长度={len(work_order.title)}, 描述长度={len(work_order.description)}")
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 检查是否已存在相似条目
|
|
|
|
|
|
existing_entry = self._find_similar_entry(question, session)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if existing_entry:
|
|
|
|
|
|
# 更新现有条目
|
2026-02-11 00:08:09 +08:00
|
|
|
|
logger.info(f"检测到相似知识条目 (ID: {existing_entry.id}),执行更新操作")
|
2025-09-06 21:06:18 +08:00
|
|
|
|
existing_entry.answer = answer
|
|
|
|
|
|
existing_entry.usage_count += 1
|
|
|
|
|
|
existing_entry.updated_at = datetime.now()
|
|
|
|
|
|
if work_order.satisfaction_score:
|
|
|
|
|
|
existing_entry.confidence_score = work_order.satisfaction_score
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 更新 embedding
|
|
|
|
|
|
if self.embedding_enabled:
|
|
|
|
|
|
vec = self.embedding_client.embed_text(question + " " + answer)
|
|
|
|
|
|
if vec:
|
|
|
|
|
|
existing_entry.vector_embedding = json.dumps(vec)
|
|
|
|
|
|
vector_store.update(existing_entry.id, vec)
|
2025-09-06 21:06:18 +08:00
|
|
|
|
else:
|
|
|
|
|
|
# 创建新条目
|
2026-02-11 00:08:09 +08:00
|
|
|
|
logger.info(f"未发现相似条目,正在为工单 {work_order_id} 创建新知识点")
|
2026-03-20 16:50:26 +08:00
|
|
|
|
embedding_json = None
|
|
|
|
|
|
vec = None
|
|
|
|
|
|
if self.embedding_enabled:
|
|
|
|
|
|
vec = self.embedding_client.embed_text(question + " " + answer)
|
|
|
|
|
|
if vec:
|
|
|
|
|
|
embedding_json = json.dumps(vec)
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
new_entry = KnowledgeEntry(
|
|
|
|
|
|
question=question,
|
|
|
|
|
|
answer=answer,
|
|
|
|
|
|
category=work_order.category,
|
2026-04-02 21:53:05 +08:00
|
|
|
|
tenant_id=work_order.tenant_id,
|
2025-09-06 21:06:18 +08:00
|
|
|
|
confidence_score=work_order.satisfaction_score or 0.5,
|
2026-03-20 16:50:26 +08:00
|
|
|
|
usage_count=1,
|
|
|
|
|
|
vector_embedding=embedding_json
|
2025-09-06 21:06:18 +08:00
|
|
|
|
)
|
|
|
|
|
|
session.add(new_entry)
|
2026-03-20 16:50:26 +08:00
|
|
|
|
session.flush() # 获取 ID
|
|
|
|
|
|
if vec and new_entry.id:
|
|
|
|
|
|
vector_store.add(new_entry.id, vec)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
session.commit()
|
|
|
|
|
|
logger.info(f"从工单 {work_order_id} 学习知识成功")
|
|
|
|
|
|
return True
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"从工单学习知识失败: {e}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def _find_similar_entry(self, question: str, session) -> Optional[KnowledgeEntry]:
|
|
|
|
|
|
"""查找相似的知识库条目"""
|
|
|
|
|
|
try:
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 优先使用 embedding 查找
|
|
|
|
|
|
if self.embedding_enabled:
|
|
|
|
|
|
query_vec = self.embedding_client.embed_text(question)
|
|
|
|
|
|
if query_vec:
|
|
|
|
|
|
candidates = vector_store.search(query_vec, top_k=1, threshold=0.8)
|
|
|
|
|
|
if candidates:
|
|
|
|
|
|
entry_id, score = candidates[0]
|
|
|
|
|
|
entry = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id == entry_id,
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).first()
|
|
|
|
|
|
if entry:
|
|
|
|
|
|
logger.info(f"Embedding 匹配成功: 相似度 {score:.4f}, ID={entry_id}")
|
|
|
|
|
|
return entry
|
|
|
|
|
|
|
|
|
|
|
|
# 降级:TF-IDF 匹配
|
2025-09-06 21:06:18 +08:00
|
|
|
|
entries = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).all()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not entries:
|
|
|
|
|
|
return None
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
texts = [entry.question for entry in entries]
|
|
|
|
|
|
question_vector = self.vectorizer.transform([question])
|
|
|
|
|
|
entry_vectors = self.vectorizer.transform(texts)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
similarities = cosine_similarity(question_vector, entry_vectors)[0]
|
|
|
|
|
|
max_similarity_idx = np.argmax(similarities)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
max_score = similarities[max_similarity_idx]
|
|
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
logger.debug(f"TF-IDF 相似度检索: 最高分值={max_score:.4f}")
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
if max_score > 0.8:
|
2025-09-06 21:06:18 +08:00
|
|
|
|
return entries[max_similarity_idx]
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
return None
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"查找相似条目失败: {e}")
|
|
|
|
|
|
return None
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
def search_knowledge(self, query: str, top_k: int = 3, verified_only: bool = True, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
2026-03-20 16:50:26 +08:00
|
|
|
|
"""搜索知识库 — 优先使用 embedding 语义检索,降级为关键词匹配"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 尝试 embedding 语义检索
|
|
|
|
|
|
if self.embedding_enabled:
|
2026-04-01 16:11:02 +08:00
|
|
|
|
results = self._search_by_embedding(query, top_k, verified_only, tenant_id=tenant_id)
|
2026-03-20 16:50:26 +08:00
|
|
|
|
if results:
|
|
|
|
|
|
return results
|
|
|
|
|
|
logger.debug("Embedding 检索无结果,降级为关键词匹配")
|
|
|
|
|
|
|
|
|
|
|
|
# 降级:关键词匹配
|
2026-04-01 16:11:02 +08:00
|
|
|
|
return self._search_by_keyword(query, top_k, verified_only, tenant_id=tenant_id)
|
2026-03-20 16:50:26 +08:00
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"搜索知识库失败: {e}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
def _search_by_embedding(self, query: str, top_k: int = 3, verified_only: bool = True, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
2026-03-20 16:50:26 +08:00
|
|
|
|
"""基于 embedding 向量的语义检索"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
query_vec = self.embedding_client.embed_text(query)
|
|
|
|
|
|
if query_vec is None:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
# 向量检索
|
|
|
|
|
|
candidates = vector_store.search(
|
|
|
|
|
|
query_vector=query_vec,
|
|
|
|
|
|
top_k=top_k * 3, # 多取一些,后面过滤
|
|
|
|
|
|
threshold=self.similarity_threshold
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not candidates:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
# 从 DB 获取完整条目并过滤
|
|
|
|
|
|
candidate_ids = [cid for cid, _ in candidates]
|
|
|
|
|
|
score_map = {cid: score for cid, score in candidates}
|
|
|
|
|
|
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
query_filter = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id.in_(candidate_ids),
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
)
|
2026-04-01 16:11:02 +08:00
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
query_filter = query_filter.filter(KnowledgeEntry.tenant_id == tenant_id)
|
2026-03-20 16:50:26 +08:00
|
|
|
|
if verified_only:
|
|
|
|
|
|
query_filter = query_filter.filter(KnowledgeEntry.is_verified == True)
|
|
|
|
|
|
|
|
|
|
|
|
entries = query_filter.all()
|
|
|
|
|
|
|
|
|
|
|
|
# 如果 verified_only 没结果,回退到全部
|
|
|
|
|
|
if not entries and verified_only:
|
2026-04-01 16:11:02 +08:00
|
|
|
|
fallback_filter = session.query(KnowledgeEntry).filter(
|
2026-03-20 16:50:26 +08:00
|
|
|
|
KnowledgeEntry.id.in_(candidate_ids),
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
2026-04-01 16:11:02 +08:00
|
|
|
|
)
|
|
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
fallback_filter = fallback_filter.filter(KnowledgeEntry.tenant_id == tenant_id)
|
|
|
|
|
|
entries = fallback_filter.all()
|
2026-03-20 16:50:26 +08:00
|
|
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
|
for entry in entries:
|
|
|
|
|
|
results.append({
|
|
|
|
|
|
"id": entry.id,
|
|
|
|
|
|
"question": entry.question,
|
|
|
|
|
|
"answer": entry.answer,
|
|
|
|
|
|
"category": entry.category,
|
|
|
|
|
|
"confidence_score": entry.confidence_score,
|
|
|
|
|
|
"similarity_score": score_map.get(entry.id, 0.0),
|
|
|
|
|
|
"usage_count": entry.usage_count,
|
|
|
|
|
|
"is_verified": entry.is_verified
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
results.sort(key=lambda x: x['similarity_score'], reverse=True)
|
|
|
|
|
|
results = results[:top_k]
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"Embedding 搜索 '{query[:30]}' 返回 {len(results)} 个结果")
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"Embedding 搜索失败: {e}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
def _search_by_keyword(self, query: str, top_k: int = 3, verified_only: bool = True, tenant_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
2026-03-20 16:50:26 +08:00
|
|
|
|
"""基于关键词的搜索(降级方案)"""
|
2025-09-06 21:06:18 +08:00
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
# 构建查询条件
|
|
|
|
|
|
query_filter = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
query_filter = query_filter.filter(KnowledgeEntry.tenant_id == tenant_id)
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 如果只搜索已验证的知识库
|
|
|
|
|
|
if verified_only:
|
|
|
|
|
|
query_filter = query_filter.filter(KnowledgeEntry.is_verified == True)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
entries = query_filter.all()
|
2025-09-16 17:05:50 +01:00
|
|
|
|
# 若已验证为空,则回退到全部活跃条目
|
|
|
|
|
|
if not entries and verified_only:
|
2026-04-01 16:11:02 +08:00
|
|
|
|
fallback_filter = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True)
|
|
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
fallback_filter = fallback_filter.filter(KnowledgeEntry.tenant_id == tenant_id)
|
|
|
|
|
|
entries = fallback_filter.all()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not entries:
|
2025-09-22 16:28:00 +01:00
|
|
|
|
logger.warning("知识库中没有活跃条目")
|
2025-09-06 21:06:18 +08:00
|
|
|
|
return []
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 如果查询为空,返回所有条目
|
|
|
|
|
|
if not query.strip():
|
|
|
|
|
|
logger.info("查询为空,返回所有条目")
|
|
|
|
|
|
return [{
|
|
|
|
|
|
"id": entry.id,
|
|
|
|
|
|
"question": entry.question,
|
|
|
|
|
|
"answer": entry.answer,
|
|
|
|
|
|
"category": entry.category,
|
|
|
|
|
|
"confidence_score": entry.confidence_score,
|
|
|
|
|
|
"similarity_score": 1.0,
|
|
|
|
|
|
"usage_count": entry.usage_count,
|
|
|
|
|
|
"is_verified": entry.is_verified
|
|
|
|
|
|
} for entry in entries[:top_k]]
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 使用简化的关键词匹配搜索
|
|
|
|
|
|
q = query.strip().lower()
|
2025-09-06 21:06:18 +08:00
|
|
|
|
results = []
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
for entry in entries:
|
|
|
|
|
|
# 组合问题和答案进行搜索
|
|
|
|
|
|
search_text = (entry.question + " " + entry.answer).lower()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 计算匹配分数
|
|
|
|
|
|
score = 0.0
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 完全匹配
|
|
|
|
|
|
if q in search_text:
|
|
|
|
|
|
score = 1.0
|
|
|
|
|
|
else:
|
|
|
|
|
|
# 分词匹配
|
|
|
|
|
|
query_words = q.split()
|
|
|
|
|
|
text_words = search_text.split()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 计算单词匹配度
|
|
|
|
|
|
matched_words = 0
|
|
|
|
|
|
for word in query_words:
|
|
|
|
|
|
if word in text_words:
|
|
|
|
|
|
matched_words += 1
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
if matched_words > 0:
|
|
|
|
|
|
score = matched_words / len(query_words) * 0.8
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 如果分数大于0,添加到结果中
|
|
|
|
|
|
if score > 0:
|
2025-09-06 21:06:18 +08:00
|
|
|
|
results.append({
|
|
|
|
|
|
"id": entry.id,
|
|
|
|
|
|
"question": entry.question,
|
|
|
|
|
|
"answer": entry.answer,
|
|
|
|
|
|
"category": entry.category,
|
|
|
|
|
|
"confidence_score": entry.confidence_score,
|
2025-09-22 16:28:00 +01:00
|
|
|
|
"similarity_score": score,
|
2025-09-06 21:06:18 +08:00
|
|
|
|
"usage_count": entry.usage_count,
|
|
|
|
|
|
"is_verified": entry.is_verified
|
|
|
|
|
|
})
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
# 按相似度排序并返回top_k个结果
|
|
|
|
|
|
results.sort(key=lambda x: x['similarity_score'], reverse=True)
|
|
|
|
|
|
results = results[:top_k]
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-22 16:28:00 +01:00
|
|
|
|
logger.info(f"搜索查询 '{query}' 返回 {len(results)} 个结果")
|
2025-09-06 21:06:18 +08:00
|
|
|
|
return results
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"搜索知识库失败: {e}")
|
|
|
|
|
|
return []
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def add_knowledge_entry(
|
|
|
|
|
|
self,
|
|
|
|
|
|
question: str,
|
|
|
|
|
|
answer: str,
|
|
|
|
|
|
category: str,
|
|
|
|
|
|
confidence_score: float = 0.5,
|
2026-04-01 16:11:02 +08:00
|
|
|
|
is_verified: bool = False,
|
|
|
|
|
|
tenant_id: Optional[str] = None
|
2025-09-06 21:06:18 +08:00
|
|
|
|
) -> bool:
|
|
|
|
|
|
"""添加知识库条目"""
|
|
|
|
|
|
try:
|
2026-04-01 16:11:02 +08:00
|
|
|
|
# 确定 tenant_id:优先使用传入值,否则取配置默认值
|
|
|
|
|
|
effective_tenant_id = tenant_id if tenant_id is not None else get_config().server.tenant_id
|
|
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 生成 embedding
|
|
|
|
|
|
embedding_json = None
|
|
|
|
|
|
text_for_embedding = question + " " + answer
|
|
|
|
|
|
if self.embedding_enabled:
|
|
|
|
|
|
vec = self.embedding_client.embed_text(text_for_embedding)
|
|
|
|
|
|
if vec:
|
|
|
|
|
|
embedding_json = json.dumps(vec)
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
entry = KnowledgeEntry(
|
|
|
|
|
|
question=question,
|
|
|
|
|
|
answer=answer,
|
|
|
|
|
|
category=category,
|
|
|
|
|
|
confidence_score=confidence_score,
|
|
|
|
|
|
usage_count=0,
|
2026-03-20 16:50:26 +08:00
|
|
|
|
is_verified=is_verified,
|
2026-04-01 16:11:02 +08:00
|
|
|
|
tenant_id=effective_tenant_id,
|
2026-03-20 16:50:26 +08:00
|
|
|
|
vector_embedding=embedding_json
|
2025-09-06 21:06:18 +08:00
|
|
|
|
)
|
|
|
|
|
|
session.add(entry)
|
|
|
|
|
|
session.commit()
|
2026-03-20 16:50:26 +08:00
|
|
|
|
entry_id = entry.id
|
|
|
|
|
|
|
|
|
|
|
|
# 更新向量索引
|
|
|
|
|
|
if vec and entry_id:
|
|
|
|
|
|
vector_store.add(entry_id, vec)
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 重新训练 TF-IDF 向量化器
|
2025-09-06 21:06:18 +08:00
|
|
|
|
self._load_vectorizer()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
logger.info(f"添加知识库条目成功: {question[:50]}...")
|
|
|
|
|
|
return True
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"添加知识库条目失败: {e}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def update_knowledge_entry(
|
|
|
|
|
|
self,
|
|
|
|
|
|
entry_id: int,
|
|
|
|
|
|
question: str = None,
|
|
|
|
|
|
answer: str = None,
|
|
|
|
|
|
category: str = None,
|
|
|
|
|
|
confidence_score: float = None
|
|
|
|
|
|
) -> bool:
|
|
|
|
|
|
"""更新知识库条目"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
entry = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id == entry_id
|
|
|
|
|
|
).first()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not entry:
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
content_changed = False
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if question:
|
|
|
|
|
|
entry.question = question
|
2026-03-20 16:50:26 +08:00
|
|
|
|
content_changed = True
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if answer:
|
|
|
|
|
|
entry.answer = answer
|
2026-03-20 16:50:26 +08:00
|
|
|
|
content_changed = True
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if category:
|
|
|
|
|
|
entry.category = category
|
|
|
|
|
|
if confidence_score is not None:
|
|
|
|
|
|
entry.confidence_score = confidence_score
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 内容变更时重新生成 embedding
|
|
|
|
|
|
if content_changed and self.embedding_enabled:
|
|
|
|
|
|
text_for_embedding = (question or entry.question) + " " + (answer or entry.answer)
|
|
|
|
|
|
vec = self.embedding_client.embed_text(text_for_embedding)
|
|
|
|
|
|
if vec:
|
|
|
|
|
|
entry.vector_embedding = json.dumps(vec)
|
|
|
|
|
|
vector_store.update(entry_id, vec)
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
entry.updated_at = datetime.now()
|
|
|
|
|
|
session.commit()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
logger.info(f"更新知识库条目成功: {entry_id}")
|
|
|
|
|
|
return True
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新知识库条目失败: {e}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def get_knowledge_entries(self, page: int = 1, per_page: int = 10) -> Dict[str, Any]:
|
|
|
|
|
|
"""获取知识库条目(分页)"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
# 计算偏移量
|
|
|
|
|
|
offset = (page - 1) * per_page
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 获取总数
|
|
|
|
|
|
total = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).count()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 获取分页数据
|
|
|
|
|
|
entries = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).order_by(KnowledgeEntry.created_at.desc()).offset(offset).limit(per_page).all()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 转换为字典格式
|
|
|
|
|
|
knowledge_list = []
|
|
|
|
|
|
for entry in entries:
|
|
|
|
|
|
knowledge_list.append({
|
|
|
|
|
|
"id": entry.id,
|
|
|
|
|
|
"question": entry.question,
|
|
|
|
|
|
"answer": entry.answer,
|
|
|
|
|
|
"category": entry.category,
|
|
|
|
|
|
"confidence_score": entry.confidence_score,
|
|
|
|
|
|
"usage_count": entry.usage_count,
|
|
|
|
|
|
"created_at": entry.created_at.isoformat() if entry.created_at else None,
|
|
|
|
|
|
"is_verified": getattr(entry, 'is_verified', False) # 添加验证状态
|
|
|
|
|
|
})
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
return {
|
|
|
|
|
|
"knowledge": knowledge_list,
|
|
|
|
|
|
"total": total,
|
|
|
|
|
|
"page": page,
|
|
|
|
|
|
"per_page": per_page,
|
|
|
|
|
|
"total_pages": (total + per_page - 1) // per_page
|
|
|
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取知识库条目失败: {e}")
|
|
|
|
|
|
return {"knowledge": [], "total": 0, "page": 1, "per_page": per_page, "total_pages": 0}
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def verify_knowledge_entry(self, entry_id: int, verified_by: str = "admin") -> bool:
|
|
|
|
|
|
"""验证知识库条目"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
entry = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id == entry_id
|
|
|
|
|
|
).first()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not entry:
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
entry.is_verified = True
|
|
|
|
|
|
entry.verified_by = verified_by
|
|
|
|
|
|
entry.verified_at = datetime.now()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
session.commit()
|
|
|
|
|
|
logger.info(f"知识库条目验证成功: {entry_id}")
|
|
|
|
|
|
return True
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"验证知识库条目失败: {e}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def unverify_knowledge_entry(self, entry_id: int) -> bool:
|
|
|
|
|
|
"""取消验证知识库条目"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
entry = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id == entry_id
|
|
|
|
|
|
).first()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not entry:
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
entry.is_verified = False
|
|
|
|
|
|
entry.verified_by = None
|
|
|
|
|
|
entry.verified_at = None
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
session.commit()
|
|
|
|
|
|
logger.info(f"知识库条目取消验证成功: {entry_id}")
|
|
|
|
|
|
return True
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"取消验证知识库条目失败: {e}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
def delete_knowledge_entry(self, entry_id: int) -> bool:
|
|
|
|
|
|
"""删除知识库条目(软删除)"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
entry = session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id == entry_id
|
|
|
|
|
|
).first()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
if not entry:
|
|
|
|
|
|
logger.warning(f"知识库条目不存在: {entry_id}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
entry.is_active = False
|
|
|
|
|
|
session.commit()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-03-20 16:50:26 +08:00
|
|
|
|
# 从向量索引中移除
|
|
|
|
|
|
vector_store.remove(entry_id)
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
# 重新训练向量化器(如果还有活跃条目)
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._load_vectorizer()
|
|
|
|
|
|
except Exception as vectorizer_error:
|
|
|
|
|
|
logger.warning(f"重新加载向量化器失败: {vectorizer_error}")
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
logger.info(f"删除知识库条目成功: {entry_id}")
|
|
|
|
|
|
return True
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"删除知识库条目失败: {e}")
|
|
|
|
|
|
return False
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
def get_knowledge_stats(self, tenant_id: Optional[str] = None) -> Dict[str, Any]:
|
2025-09-06 21:06:18 +08:00
|
|
|
|
"""获取知识库统计信息"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
2026-04-01 16:11:02 +08:00
|
|
|
|
# 基础过滤条件
|
|
|
|
|
|
base_filter = [KnowledgeEntry.is_active == True]
|
|
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
base_filter.append(KnowledgeEntry.tenant_id == tenant_id)
|
|
|
|
|
|
|
2026-02-11 22:53:08 +08:00
|
|
|
|
# 只统计活跃(未删除)的条目
|
|
|
|
|
|
total_entries = session.query(KnowledgeEntry).filter(
|
2026-04-01 16:11:02 +08:00
|
|
|
|
*base_filter
|
2025-09-06 21:06:18 +08:00
|
|
|
|
).count()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-02-11 22:53:08 +08:00
|
|
|
|
# 统计已验证的条目
|
|
|
|
|
|
verified_entries = session.query(KnowledgeEntry).filter(
|
2026-04-01 16:11:02 +08:00
|
|
|
|
*base_filter,
|
2026-02-11 22:53:08 +08:00
|
|
|
|
KnowledgeEntry.is_verified == True
|
|
|
|
|
|
).count()
|
|
|
|
|
|
|
|
|
|
|
|
# 按类别统计(仅限活跃条目)
|
2025-09-06 21:06:18 +08:00
|
|
|
|
category_stats = session.query(
|
|
|
|
|
|
KnowledgeEntry.category,
|
2026-02-11 22:53:08 +08:00
|
|
|
|
func.count(KnowledgeEntry.id)
|
|
|
|
|
|
).filter(
|
2026-04-01 16:11:02 +08:00
|
|
|
|
*base_filter
|
2025-09-06 21:06:18 +08:00
|
|
|
|
).group_by(KnowledgeEntry.category).all()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-02-11 22:53:08 +08:00
|
|
|
|
# 平均置信度(仅限活跃条目)
|
2025-09-06 21:06:18 +08:00
|
|
|
|
avg_confidence = session.query(
|
|
|
|
|
|
func.avg(KnowledgeEntry.confidence_score)
|
2026-02-11 22:53:08 +08:00
|
|
|
|
).filter(
|
2026-04-01 16:11:02 +08:00
|
|
|
|
*base_filter
|
2025-09-06 21:06:18 +08:00
|
|
|
|
).scalar() or 0.0
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
result = {
|
2025-09-06 21:06:18 +08:00
|
|
|
|
"total_entries": total_entries,
|
2026-02-11 22:53:08 +08:00
|
|
|
|
"active_entries": verified_entries, # 将 active_entries 复用为已验证数量,或前端相应修改
|
2025-09-06 21:06:18 +08:00
|
|
|
|
"category_distribution": dict(category_stats),
|
|
|
|
|
|
"average_confidence": float(avg_confidence)
|
|
|
|
|
|
}
|
2026-02-11 00:08:09 +08:00
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
result["tenant_id"] = tenant_id
|
|
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
2025-09-06 21:06:18 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取知识库统计失败: {e}")
|
|
|
|
|
|
return {}
|
2025-12-08 00:53:23 +08:00
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
def get_tenant_summary(self) -> List[Dict[str, Any]]:
|
|
|
|
|
|
"""按 tenant_id 聚合活跃知识条目,返回租户汇总列表。
|
|
|
|
|
|
|
|
|
|
|
|
返回格式: [
|
|
|
|
|
|
{
|
|
|
|
|
|
"tenant_id": "market_a",
|
|
|
|
|
|
"entry_count": 42,
|
|
|
|
|
|
"verified_count": 30,
|
|
|
|
|
|
"category_distribution": {"FAQ": 20, "故障排查": 22}
|
|
|
|
|
|
}, ...
|
|
|
|
|
|
]
|
|
|
|
|
|
按 entry_count 降序排列。
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
# 主聚合查询:按 tenant_id 统计 entry_count 和 verified_count
|
|
|
|
|
|
summary_rows = session.query(
|
|
|
|
|
|
KnowledgeEntry.tenant_id,
|
|
|
|
|
|
func.count(KnowledgeEntry.id).label('entry_count'),
|
|
|
|
|
|
func.sum(
|
|
|
|
|
|
func.cast(KnowledgeEntry.is_verified, Integer)
|
|
|
|
|
|
).label('verified_count')
|
|
|
|
|
|
).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).group_by(
|
|
|
|
|
|
KnowledgeEntry.tenant_id
|
|
|
|
|
|
).order_by(
|
|
|
|
|
|
func.count(KnowledgeEntry.id).desc()
|
|
|
|
|
|
).all()
|
|
|
|
|
|
|
|
|
|
|
|
if not summary_rows:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
# 类别分布查询:按 tenant_id + category 统计
|
|
|
|
|
|
category_rows = session.query(
|
|
|
|
|
|
KnowledgeEntry.tenant_id,
|
|
|
|
|
|
KnowledgeEntry.category,
|
|
|
|
|
|
func.count(KnowledgeEntry.id).label('cat_count')
|
|
|
|
|
|
).filter(
|
|
|
|
|
|
KnowledgeEntry.is_active == True
|
|
|
|
|
|
).group_by(
|
|
|
|
|
|
KnowledgeEntry.tenant_id,
|
|
|
|
|
|
KnowledgeEntry.category
|
|
|
|
|
|
).all()
|
|
|
|
|
|
|
|
|
|
|
|
# 构建 tenant_id -> {category: count} 映射
|
|
|
|
|
|
category_map: Dict[str, Dict[str, int]] = {}
|
|
|
|
|
|
for row in category_rows:
|
|
|
|
|
|
if row.tenant_id not in category_map:
|
|
|
|
|
|
category_map[row.tenant_id] = {}
|
|
|
|
|
|
category_map[row.tenant_id][row.category] = row.cat_count
|
|
|
|
|
|
|
|
|
|
|
|
# 组装结果
|
|
|
|
|
|
result = []
|
|
|
|
|
|
for row in summary_rows:
|
|
|
|
|
|
result.append({
|
|
|
|
|
|
"tenant_id": row.tenant_id,
|
|
|
|
|
|
"entry_count": row.entry_count,
|
|
|
|
|
|
"verified_count": int(row.verified_count or 0),
|
|
|
|
|
|
"category_distribution": category_map.get(row.tenant_id, {})
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取租户汇总失败: {e}")
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
2025-12-08 00:53:23 +08:00
|
|
|
|
def update_usage_count(self, entry_ids: List[int]) -> bool:
|
|
|
|
|
|
"""更新知识库条目的使用次数"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
# 批量更新使用次数
|
|
|
|
|
|
session.query(KnowledgeEntry).filter(
|
|
|
|
|
|
KnowledgeEntry.id.in_(entry_ids)
|
|
|
|
|
|
).update({
|
|
|
|
|
|
"usage_count": KnowledgeEntry.usage_count + 1,
|
|
|
|
|
|
"updated_at": datetime.now()
|
2026-02-11 00:08:09 +08:00
|
|
|
|
}, synchronize_session=False)
|
2025-12-08 00:53:23 +08:00
|
|
|
|
session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"成功更新 {len(entry_ids)} 个知识库条目的使用次数")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"更新知识库使用次数失败: {e}")
|
2026-02-11 00:08:09 +08:00
|
|
|
|
return False
|
|
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
def get_knowledge_paginated(self, page: int = 1, per_page: int = 10, category_filter: str = '', verified_filter: str = '', tenant_id: Optional[str] = None) -> Dict[str, Any]:
|
2026-02-11 00:08:09 +08:00
|
|
|
|
"""获取知识库条目(分页和过滤)"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
with db_manager.get_session() as session:
|
|
|
|
|
|
query = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True)
|
|
|
|
|
|
|
2026-04-01 16:11:02 +08:00
|
|
|
|
if tenant_id is not None:
|
|
|
|
|
|
query = query.filter(KnowledgeEntry.tenant_id == tenant_id)
|
|
|
|
|
|
|
2026-02-11 00:08:09 +08:00
|
|
|
|
if category_filter:
|
|
|
|
|
|
query = query.filter(KnowledgeEntry.category == category_filter)
|
|
|
|
|
|
if verified_filter:
|
|
|
|
|
|
if verified_filter == 'true':
|
|
|
|
|
|
query = query.filter(KnowledgeEntry.is_verified == True)
|
|
|
|
|
|
elif verified_filter == 'false':
|
|
|
|
|
|
query = query.filter(KnowledgeEntry.is_verified == False)
|
|
|
|
|
|
|
|
|
|
|
|
query = query.order_by(KnowledgeEntry.created_at.desc())
|
|
|
|
|
|
|
|
|
|
|
|
total = query.count()
|
|
|
|
|
|
|
|
|
|
|
|
knowledge_entries = query.offset((page - 1) * per_page).limit(per_page).all()
|
|
|
|
|
|
|
|
|
|
|
|
knowledge_data = []
|
|
|
|
|
|
for entry in knowledge_entries:
|
|
|
|
|
|
knowledge_data.append({
|
|
|
|
|
|
'id': entry.id,
|
|
|
|
|
|
'question': entry.question,
|
|
|
|
|
|
'answer': entry.answer,
|
|
|
|
|
|
'category': entry.category,
|
|
|
|
|
|
'confidence_score': entry.confidence_score,
|
|
|
|
|
|
'usage_count': entry.usage_count,
|
|
|
|
|
|
'is_verified': entry.is_verified,
|
|
|
|
|
|
'is_active': entry.is_active,
|
|
|
|
|
|
'created_at': entry.created_at.isoformat() if entry.created_at else None,
|
|
|
|
|
|
'updated_at': entry.updated_at.isoformat() if entry.updated_at else None
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
total_pages = (total + per_page - 1) // per_page
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
'knowledge': knowledge_data,
|
|
|
|
|
|
'page': page,
|
|
|
|
|
|
'per_page': per_page,
|
|
|
|
|
|
'total': total,
|
|
|
|
|
|
'total_pages': total_pages
|
|
|
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"获取分页知识库失败: {e}")
|
|
|
|
|
|
# 返回一个空的结构以避免在调用方出现错误
|
|
|
|
|
|
return {
|
|
|
|
|
|
'knowledge': [],
|
|
|
|
|
|
'page': page,
|
|
|
|
|
|
'per_page': per_page,
|
|
|
|
|
|
'total': 0,
|
|
|
|
|
|
'total_pages': 0
|
|
|
|
|
|
}
|