#!/usr/bin/env python # -*- coding: utf-8 -*- """ 知识库体检脚本 对 KnowledgeEntry 做简单统计,供 kb-audit Skill 调用。 """ import sys from datetime import datetime, timedelta from pathlib import Path def add_project_root_to_path(): # 假定脚本位于 .claude/skills/kb-audit/scripts/ 下 script_path = Path(__file__).resolve() project_root = script_path.parents[4] if str(project_root) not in sys.path: sys.path.insert(0, str(project_root)) def main(): add_project_root_to_path() from src.core.database import db_manager from src.core.models import KnowledgeEntry print("=== 知识库健康检查 ===\n") with db_manager.get_session() as session: total = session.query(KnowledgeEntry).count() print(f"知识条目总数: {total}") # 低置信度(<0.7) low_conf = ( session.query(KnowledgeEntry) .filter(KnowledgeEntry.confidence_score.isnot(None)) .filter(KnowledgeEntry.confidence_score < 0.7) .count() ) print(f"低置信度条目数 (confidence_score < 0.7): {low_conf}") # 使用次数极低(usage_count < 3 或为 NULL) low_usage = ( session.query(KnowledgeEntry) .filter( (KnowledgeEntry.usage_count.is_(None)) | (KnowledgeEntry.usage_count < 3) ) .count() ) print(f"使用次数极低条目数 (usage_count < 3 或空): {low_usage}") # 长期未更新(> 90 天) cutoff = datetime.now() - timedelta(days=90) old_entries = ( session.query(KnowledgeEntry) .filter( (KnowledgeEntry.updated_at.isnot(None)) & (KnowledgeEntry.updated_at < cutoff) ) .count() ) print(f"长期未更新条目数 (updated_at > 90 天未更新): {old_entries}") print("\n示例问题条目(不含完整答案,仅展示前若干个):") sample_entries = ( session.query(KnowledgeEntry) .order_by(KnowledgeEntry.created_at.desc()) .limit(5) .all() ) for e in sample_entries: q_preview = (e.question or "")[:40] print( f" ID={e.id}, category={e.category}, " f"confidence={e.confidence_score}, usage={e.usage_count}, " f"Q='{q_preview}...'" ) print("\n提示:") print(" - 建议优先审查低置信度且 usage_count 较高的条目;") print(" - 对长期未更新且 usage_count 较高的条目,可考虑人工复查内容是否过时;") print(" - 对 usage_count 极低且从未触发的条目,可考虑合并或归档。") if __name__ == "__main__": main()