refactor: 清理不需要的代码文件,添加.gitignore,优化项目结构

This commit is contained in:
赵杰 Jie Zhao (雄狮汽车科技)
2025-09-16 17:05:50 +01:00
parent 9451945e08
commit 9ca36042e3
65 changed files with 3370 additions and 10809 deletions

View File

@@ -125,16 +125,36 @@ class KnowledgeManager:
query_filter = query_filter.filter(KnowledgeEntry.is_verified == True)
entries = query_filter.all()
# 若已验证为空,则回退到全部活跃条目
if not entries and verified_only:
entries = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True).all()
if not entries:
return []
# 计算相似度
texts = [entry.question + " " + entry.answer for entry in entries]
query_vector = self.vectorizer.transform([query])
entry_vectors = self.vectorizer.transform(texts)
similarities = cosine_similarity(query_vector, entry_vectors)[0]
# 确保向量器已训练
try:
vocab_ok = hasattr(self.vectorizer, 'vocabulary_') and bool(self.vectorizer.vocabulary_)
if not vocab_ok:
self.vectorizer.fit(texts)
query_vector = self.vectorizer.transform([query])
entry_vectors = self.vectorizer.transform(texts)
similarities = cosine_similarity(query_vector, entry_vectors)[0]
except Exception as vec_err:
logger.warning(f"TF-IDF搜索失败回退到子串匹配: {vec_err}")
# 回退:子串匹配评分
similarities = []
q = query.strip()
for t in texts:
if not q:
similarities.append(0.0)
else:
score = 1.0 if q in t else 0.0
similarities.append(score)
similarities = np.array(similarities, dtype=float)
# 获取top_k个最相似的条目
top_indices = np.argsort(similarities)[-top_k:][::-1]