feat: 自动提交 - 周一 2025/09/22 16:28:00.19

2025-09-22 16:28:00 +01:00
parent f75176ec69
commit d6c88d87dd
58 changed files with 1197 additions and 11922 deletions
--- a/src/knowledge_base/knowledge_manager.py
+++ b/src/knowledge_base/knowledge_manager.py
@@ -130,50 +130,69 @@ class KnowledgeManager:
                    entries = session.query(KnowledgeEntry).filter(KnowledgeEntry.is_active == True).all()
                
                if not entries:
+                    logger.warning("知识库中没有活跃条目")
                    return []
                
-                # 计算相似度
-                texts = [entry.question + " " + entry.answer for entry in entries]
-
-                # 确保向量器已训练
-                try:
-                    vocab_ok = hasattr(self.vectorizer, 'vocabulary_') and bool(self.vectorizer.vocabulary_)
-                    if not vocab_ok:
-                        self.vectorizer.fit(texts)
-                    query_vector = self.vectorizer.transform([query])
-                    entry_vectors = self.vectorizer.transform(texts)
-                    similarities = cosine_similarity(query_vector, entry_vectors)[0]
-                except Exception as vec_err:
-                    logger.warning(f"TF-IDF搜索失败，回退到子串匹配: {vec_err}")
-                    # 回退：子串匹配评分
-                    similarities = []
-                    q = query.strip()
-                    for t in texts:
-                        if not q:
-                            similarities.append(0.0)
-                        else:
-                            score = 1.0 if q in t else 0.0
-                            similarities.append(score)
-                    similarities = np.array(similarities, dtype=float)
-                
-                # 获取top_k个最相似的条目
-                top_indices = np.argsort(similarities)[-top_k:][::-1]
+                # 如果查询为空，返回所有条目
+                if not query.strip():
+                    logger.info("查询为空，返回所有条目")
+                    return [{
+                        "id": entry.id,
+                        "question": entry.question,
+                        "answer": entry.answer,
+                        "category": entry.category,
+                        "confidence_score": entry.confidence_score,
+                        "similarity_score": 1.0,
+                        "usage_count": entry.usage_count,
+                        "is_verified": entry.is_verified
+                    } for entry in entries[:top_k]]
                
+                # 使用简化的关键词匹配搜索
+                q = query.strip().lower()
                results = []
-                for idx in top_indices:
-                    if similarities[idx] > 0.1:  # 最小相似度阈值
-                        entry = entries[idx]
+                
+                for entry in entries:
+                    # 组合问题和答案进行搜索
+                    search_text = (entry.question + " " + entry.answer).lower()
+                    
+                    # 计算匹配分数
+                    score = 0.0
+                    
+                    # 完全匹配
+                    if q in search_text:
+                        score = 1.0
+                    else:
+                        # 分词匹配
+                        query_words = q.split()
+                        text_words = search_text.split()
+                        
+                        # 计算单词匹配度
+                        matched_words = 0
+                        for word in query_words:
+                            if word in text_words:
+                                matched_words += 1
+                        
+                        if matched_words > 0:
+                            score = matched_words / len(query_words) * 0.8
+                    
+                    # 如果分数大于0，添加到结果中
+                    if score > 0:
                        results.append({
                            "id": entry.id,
                            "question": entry.question,
                            "answer": entry.answer,
                            "category": entry.category,
                            "confidence_score": entry.confidence_score,
-                            "similarity_score": float(similarities[idx]),
+                            "similarity_score": score,
                            "usage_count": entry.usage_count,
                            "is_verified": entry.is_verified
                        })
                
+                # 按相似度排序并返回top_k个结果
+                results.sort(key=lambda x: x['similarity_score'], reverse=True)
+                results = results[:top_k]
+                
+                logger.info(f"搜索查询 '{query}' 返回 {len(results)} 个结果")
                return results
                
        except Exception as e: