feat: 自动提交 - 周一 2025/09/22 16:28:00.19
This commit is contained in:
@@ -130,50 +130,69 @@ class KnowledgeManager:
|
||||
entries = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True).all()
|
||||
|
||||
if not entries:
|
||||
logger.warning("知识库中没有活跃条目")
|
||||
return []
|
||||
|
||||
# 计算相似度
|
||||
texts = [entry.question + " " + entry.answer for entry in entries]
|
||||
|
||||
# 确保向量器已训练
|
||||
try:
|
||||
vocab_ok = hasattr(self.vectorizer, 'vocabulary_') and bool(self.vectorizer.vocabulary_)
|
||||
if not vocab_ok:
|
||||
self.vectorizer.fit(texts)
|
||||
query_vector = self.vectorizer.transform([query])
|
||||
entry_vectors = self.vectorizer.transform(texts)
|
||||
similarities = cosine_similarity(query_vector, entry_vectors)[0]
|
||||
except Exception as vec_err:
|
||||
logger.warning(f"TF-IDF搜索失败,回退到子串匹配: {vec_err}")
|
||||
# 回退:子串匹配评分
|
||||
similarities = []
|
||||
q = query.strip()
|
||||
for t in texts:
|
||||
if not q:
|
||||
similarities.append(0.0)
|
||||
else:
|
||||
score = 1.0 if q in t else 0.0
|
||||
similarities.append(score)
|
||||
similarities = np.array(similarities, dtype=float)
|
||||
|
||||
# 获取top_k个最相似的条目
|
||||
top_indices = np.argsort(similarities)[-top_k:][::-1]
|
||||
# 如果查询为空,返回所有条目
|
||||
if not query.strip():
|
||||
logger.info("查询为空,返回所有条目")
|
||||
return [{
|
||||
"id": entry.id,
|
||||
"question": entry.question,
|
||||
"answer": entry.answer,
|
||||
"category": entry.category,
|
||||
"confidence_score": entry.confidence_score,
|
||||
"similarity_score": 1.0,
|
||||
"usage_count": entry.usage_count,
|
||||
"is_verified": entry.is_verified
|
||||
} for entry in entries[:top_k]]
|
||||
|
||||
# 使用简化的关键词匹配搜索
|
||||
q = query.strip().lower()
|
||||
results = []
|
||||
for idx in top_indices:
|
||||
if similarities[idx] > 0.1: # 最小相似度阈值
|
||||
entry = entries[idx]
|
||||
|
||||
for entry in entries:
|
||||
# 组合问题和答案进行搜索
|
||||
search_text = (entry.question + " " + entry.answer).lower()
|
||||
|
||||
# 计算匹配分数
|
||||
score = 0.0
|
||||
|
||||
# 完全匹配
|
||||
if q in search_text:
|
||||
score = 1.0
|
||||
else:
|
||||
# 分词匹配
|
||||
query_words = q.split()
|
||||
text_words = search_text.split()
|
||||
|
||||
# 计算单词匹配度
|
||||
matched_words = 0
|
||||
for word in query_words:
|
||||
if word in text_words:
|
||||
matched_words += 1
|
||||
|
||||
if matched_words > 0:
|
||||
score = matched_words / len(query_words) * 0.8
|
||||
|
||||
# 如果分数大于0,添加到结果中
|
||||
if score > 0:
|
||||
results.append({
|
||||
"id": entry.id,
|
||||
"question": entry.question,
|
||||
"answer": entry.answer,
|
||||
"category": entry.category,
|
||||
"confidence_score": entry.confidence_score,
|
||||
"similarity_score": float(similarities[idx]),
|
||||
"similarity_score": score,
|
||||
"usage_count": entry.usage_count,
|
||||
"is_verified": entry.is_verified
|
||||
})
|
||||
|
||||
# 按相似度排序并返回top_k个结果
|
||||
results.sort(key=lambda x: x['similarity_score'], reverse=True)
|
||||
results = results[:top_k]
|
||||
|
||||
logger.info(f"搜索查询 '{query}' 返回 {len(results)} 个结果")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user